Coverage Report

Created: 2025-09-27 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/pcre/pcre2lib/pcre2_match.c
Line
Count
Source
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2015-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#include "pcre2_internal.h"
47
48
/* These defines enable debugging code */
49
50
/* #define DEBUG_FRAMES_DISPLAY */
51
/* #define DEBUG_SHOW_OPS */
52
/* #define DEBUG_SHOW_RMATCH */
53
54
#ifdef DEBUG_FRAMES_DISPLAY
55
#include <stdarg.h>
56
#endif
57
58
#ifdef DEBUG_SHOW_OPS
59
static const char *OP_names[] = { OP_NAME_LIST };
60
#endif
61
62
/* These defines identify the name of the block containing "static"
63
information, and fields within it. */
64
65
34.6M
#define NLBLOCK mb              /* Block containing newline information */
66
127k
#define PSSTART start_subject   /* Field containing processed string start */
67
8.52M
#define PSEND   end_subject     /* Field containing processed string end */
68
69
239k
#define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */
70
71
/* Masks for identifying the public options that are permitted at match time. */
72
73
#define PUBLIC_MATCH_OPTIONS \
74
3.63k
  (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
75
3.63k
   PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
76
3.63k
   PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT| \
77
3.63k
   PCRE2_DISABLE_RECURSELOOP_CHECK)
78
79
#define PUBLIC_JIT_MATCH_OPTIONS \
80
   (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
81
    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
82
    PCRE2_COPY_MATCHED_SUBJECT)
83
84
/* Non-error returns from and within the match() function. Error returns are
85
externally defined PCRE2_ERROR_xxx codes, which are all negative. */
86
87
4.25k
#define MATCH_MATCH        1
88
344M
#define MATCH_NOMATCH      0
89
90
/* Special internal returns used in the match() function. Make them
91
sufficiently negative to avoid the external error codes. */
92
93
18
#define MATCH_ACCEPT       (-999)
94
1.37k
#define MATCH_KETRPOS      (-998)
95
/* The next 5 must be kept together and in sequence so that a test that checks
96
for any one of them can use a range. */
97
0
#define MATCH_COMMIT       (-997)
98
238k
#define MATCH_PRUNE        (-996)
99
0
#define MATCH_SKIP         (-995)
100
0
#define MATCH_SKIP_ARG     (-994)
101
1.01M
#define MATCH_THEN         (-993)
102
0
#define MATCH_BACKTRACK_MAX MATCH_THEN
103
0
#define MATCH_BACKTRACK_MIN MATCH_COMMIT
104
105
/* Group frame type values. Zero means the frame is not a group frame. The
106
lower 16 bits are used for data (e.g. the capture number). Group frames are
107
used for most groups so that information about the start is easily available at
108
the end without having to scan back through intermediate frames (backtrack
109
points). */
110
111
7.74k
#define GF_CAPTURE     0x00010000u
112
18
#define GF_NOCAPTURE   0x00020000u
113
6.38k
#define GF_CONDASSERT  0x00030000u
114
8.78k
#define GF_RECURSE     0x00040000u
115
116
/* Masks for the identity and data parts of the group frame type. */
117
118
15.1k
#define GF_IDMASK(a)   ((a) & 0xffff0000u)
119
0
#define GF_DATAMASK(a) ((a) & 0x0000ffffu)
120
121
/* Repetition types */
122
123
enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
124
125
/* Min and max values for the common repeats; a maximum of UINT32_MAX =>
126
infinity. */
127
128
static const uint32_t rep_min[] = {
129
  0, 0,       /* * and *? */
130
  1, 1,       /* + and +? */
131
  0, 0,       /* ? and ?? */
132
  0, 0,       /* dummy placefillers for OP_CR[MIN]RANGE */
133
  0, 1, 0 };  /* OP_CRPOS{STAR, PLUS, QUERY} */
134
135
static const uint32_t rep_max[] = {
136
  UINT32_MAX, UINT32_MAX,      /* * and *? */
137
  UINT32_MAX, UINT32_MAX,      /* + and +? */
138
  1, 1,                        /* ? and ?? */
139
  0, 0,                        /* dummy placefillers for OP_CR[MIN]RANGE */
140
  UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
141
142
/* Repetition types - must include OP_CRPOSRANGE (not needed above) */
143
144
static const uint32_t rep_typ[] = {
145
  REPTYPE_MAX, REPTYPE_MIN,    /* * and *? */
146
  REPTYPE_MAX, REPTYPE_MIN,    /* + and +? */
147
  REPTYPE_MAX, REPTYPE_MIN,    /* ? and ?? */
148
  REPTYPE_MAX, REPTYPE_MIN,    /* OP_CRRANGE and OP_CRMINRANGE */
149
  REPTYPE_POS, REPTYPE_POS,    /* OP_CRPOSSTAR, OP_CRPOSPLUS */
150
  REPTYPE_POS, REPTYPE_POS };  /* OP_CRPOSQUERY, OP_CRPOSRANGE */
151
152
/* Numbers for RMATCH calls at backtracking points. When these lists are
153
changed, the code at RETURN_SWITCH below must be updated in sync.  */
154
155
enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
156
       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
157
       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
158
       RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39 };
159
160
#ifdef SUPPORT_WIDE_CHARS
161
enum { RM100=100, RM101, RM102, RM103 };
162
#endif
163
164
#ifdef SUPPORT_UNICODE
165
enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
166
       RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
167
       RM216,     RM217, RM218, RM219, RM220, RM221, RM222, RM223,
168
       RM224 };
169
#endif
170
171
/* Define short names for general fields in the current backtrack frame, which
172
is always pointed to by the F variable. Occasional references to fields in
173
other frames are written out explicitly. There are also some fields in the
174
current frame whose names start with "temp" that are used for short-term,
175
localised backtracking memory. These are #defined with Lxxx names at the point
176
of use and undefined afterwards. */
177
178
688M
#define Fback_frame        F->back_frame
179
245k
#define Fcapture_last      F->capture_last
180
248k
#define Fcurrent_recurse   F->current_recurse
181
1.19G
#define Fecode             F->ecode
182
1.94G
#define Feptr              F->eptr
183
344M
#define Fgroup_frame_type  F->group_frame_type
184
260k
#define Flast_group_offset F->last_group_offset
185
247M
#define Flength            F->length
186
239k
#define Fmark              F->mark
187
1.03G
#define Frdepth            F->rdepth
188
247k
#define Fstart_match       F->start_match
189
253k
#define Foffset_top        F->offset_top
190
0
#define Foccu              F->occu
191
947M
#define Fop                F->op
192
13.5k
#define Fovector           F->ovector
193
688M
#define Freturn_id         F->return_id
194
195
196
#ifdef DEBUG_FRAMES_DISPLAY
197
/*************************************************
198
*      Display current frames and contents       *
199
*************************************************/
200
201
/* This debugging function displays the current set of frames and their
202
contents. It is not called automatically from anywhere, the intention being
203
that calls can be inserted where necessary when debugging frame-related
204
problems.
205
206
Arguments:
207
  f           the file to write to
208
  F           the current top frame
209
  P           a previous frame of interest
210
  frame_size  the frame size
211
  mb          points to the match block
212
  match_data  points to the match data block
213
  s           identification text
214
215
Returns:    nothing
216
*/
217
218
static void
219
display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
220
  match_block *mb, pcre2_match_data *match_data, const char *s, ...)
221
{
222
uint32_t i;
223
heapframe *Q;
224
va_list ap;
225
va_start(ap, s);
226
227
fprintf(f, "FRAMES ");
228
vfprintf(f, s, ap);
229
va_end(ap);
230
231
if (P != NULL) fprintf(f, " P=%lu",
232
  ((char *)P - (char *)(match_data->heapframes))/frame_size);
233
fprintf(f, "\n");
234
235
for (i = 0, Q = match_data->heapframes;
236
     Q <= F;
237
     i++, Q = (heapframe *)((char *)Q + frame_size))
238
  {
239
  fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
240
    i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
241
    Q->back_frame, Q->return_id);
242
243
  if (Q->last_group_offset == PCRE2_UNSET)
244
    fprintf(f, " lgoffset=unset\n");
245
  else
246
    fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
247
  }
248
}
249
250
#endif
251
252
253
254
/*************************************************
255
*                Process a callout               *
256
*************************************************/
257
258
/* This function is called for all callouts, whether "standalone" or at the
259
start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
260
OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
261
with fixed values.
262
263
Arguments:
264
  F          points to the current backtracking frame
265
  mb         points to the match block
266
  lengthptr  where to return the length of the callout item
267
268
Returns:     the return from the callout
269
             or 0 if no callout function exists
270
*/
271
272
static int
273
do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
274
0
{
275
0
int rc;
276
0
PCRE2_SIZE save0, save1;
277
0
PCRE2_SIZE *callout_ovector;
278
0
pcre2_callout_block *cb;
279
280
0
*lengthptr = (*Fecode == OP_CALLOUT)?
281
0
  PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
282
283
0
if (mb->callout == NULL) return 0;   /* No callout function provided */
284
285
/* The original matching code (pre 10.30) worked directly with the ovector
286
passed by the user, and this was passed to callouts. Now that the working
287
ovector is in the backtracking frame, it no longer needs to reserve space for
288
the overall match offsets (which would waste space in the frame). For backward
289
compatibility, however, we pass capture_top and offset_vector to the callout as
290
if for the extended ovector, and we ensure that the first two slots are unset
291
by preserving and restoring their current contents. Picky compilers complain if
292
references such as Fovector[-2] are use directly, so we set up a separate
293
pointer. */
294
295
0
callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
296
297
/* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
298
are set externally. The first 3 never change; the last is updated for each
299
bumpalong. */
300
301
0
cb = mb->cb;
302
0
cb->capture_top      = (uint32_t)Foffset_top/2 + 1;
303
0
cb->capture_last     = Fcapture_last;
304
0
cb->offset_vector    = callout_ovector;
305
0
cb->mark             = mb->nomatch_mark;
306
0
cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
307
0
cb->pattern_position = GET(Fecode, 1);
308
0
cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
309
310
0
if (*Fecode == OP_CALLOUT)  /* Numerical callout */
311
0
  {
312
0
  cb->callout_number = Fecode[1 + 2*LINK_SIZE];
313
0
  cb->callout_string_offset = 0;
314
0
  cb->callout_string = NULL;
315
0
  cb->callout_string_length = 0;
316
0
  }
317
0
else  /* String callout */
318
0
  {
319
0
  cb->callout_number = 0;
320
0
  cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
321
0
  cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
322
0
  cb->callout_string_length =
323
0
    *lengthptr - (1 + 4*LINK_SIZE) - 2;
324
0
  }
325
326
0
save0 = callout_ovector[0];
327
0
save1 = callout_ovector[1];
328
0
callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
329
0
rc = mb->callout(cb, mb->callout_data);
330
0
callout_ovector[0] = save0;
331
0
callout_ovector[1] = save1;
332
0
cb->callout_flags = 0;
333
0
return rc;
334
0
}
335
336
337
338
/*************************************************
339
*          Match a back-reference                *
340
*************************************************/
341
342
/* This function is called only when it is known that the offset lies within
343
the offsets that have so far been used in the match. Note that in caseless
344
UTF-8 mode, the number of subject bytes matched may be different to the number
345
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
346
seems unlikely.)
347
348
Arguments:
349
  offset      index into the offset vector
350
  caseless    TRUE if caseless
351
  caseopts    bitmask of REFI_FLAG_XYZ values
352
  F           the current backtracking frame pointer
353
  mb          points to match block
354
  lengthptr   pointer for returning the length matched
355
356
Returns:      = 0 sucessful match; number of code units matched is set
357
              < 0 no match
358
              > 0 partial match
359
*/
360
361
static int
362
match_ref(PCRE2_SIZE offset, BOOL caseless, int caseopts, heapframe *F,
363
  match_block *mb, PCRE2_SIZE *lengthptr)
364
15
{
365
15
PCRE2_SPTR p;
366
15
PCRE2_SIZE length;
367
15
PCRE2_SPTR eptr;
368
15
PCRE2_SPTR eptr_start;
369
370
/* Deal with an unset group. The default is no match, but there is an option to
371
match an empty string. */
372
373
15
if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
374
15
  {
375
15
  if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
376
0
    {
377
0
    *lengthptr = 0;
378
0
    return 0;      /* Match */
379
0
    }
380
15
  else return -1;  /* No match */
381
15
  }
382
383
/* Separate the caseless and UTF cases for speed. */
384
385
0
eptr = eptr_start = Feptr;
386
0
p = mb->start_subject + Fovector[offset];
387
0
length = Fovector[offset+1] - Fovector[offset];
388
389
0
if (caseless)
390
0
  {
391
0
#if defined SUPPORT_UNICODE
392
0
  BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
393
0
  BOOL caseless_restrict = (caseopts & REFI_FLAG_CASELESS_RESTRICT) != 0;
394
0
  BOOL turkish_casing = !caseless_restrict && (caseopts & REFI_FLAG_TURKISH_CASING) != 0;
395
396
0
  if (utf || (mb->poptions & PCRE2_UCP) != 0)
397
0
    {
398
0
    PCRE2_SPTR endptr = p + length;
399
400
    /* Match characters up to the end of the reference. NOTE: the number of
401
    code units matched may differ, because in UTF-8 there are some characters
402
    whose upper and lower case codes have different numbers of bytes. For
403
    example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
404
    bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
405
    sequence of two of the latter. It is important, therefore, to check the
406
    length along the reference, not along the subject (earlier code did this
407
    wrong). UCP without uses Unicode properties but without UTF encoding. */
408
409
0
    while (p < endptr)
410
0
      {
411
0
      uint32_t c, d;
412
0
      const ucd_record *ur;
413
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
414
415
0
      if (utf)
416
0
        {
417
0
        GETCHARINC(c, eptr);
418
0
        GETCHARINC(d, p);
419
0
        }
420
0
      else
421
0
        {
422
0
        c = *eptr++;
423
0
        d = *p++;
424
0
        }
425
426
0
      if (turkish_casing && UCD_ANY_I(d))
427
0
        {
428
0
        c = UCD_FOLD_I_TURKISH(c);
429
0
        d = UCD_FOLD_I_TURKISH(d);
430
0
        if (c != d) return -1;  /* No match */
431
0
        }
432
0
      else if (c != d && c != (uint32_t)((int)d + (ur = GET_UCD(d))->other_case))
433
0
        {
434
0
        const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
435
436
        /* When PCRE2_EXTRA_CASELESS_RESTRICT is set, ignore any caseless sets
437
        that start with an ASCII character. */
438
0
        if (caseless_restrict && *pp < 128) return -1;  /* No match */
439
440
0
        for (;;)
441
0
          {
442
0
          if (c < *pp) return -1;  /* No match */
443
0
          if (c == *pp++) break;
444
0
          }
445
0
        }
446
0
      }
447
0
    }
448
0
  else
449
0
#endif
450
451
  /* Not in UTF or UCP mode */
452
0
    {
453
0
    for (; length > 0; length--)
454
0
      {
455
0
      uint32_t cc, cp;
456
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
457
0
      cc = UCHAR21TEST(eptr);
458
0
      cp = UCHAR21TEST(p);
459
0
      if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
460
0
        return -1;  /* No match */
461
0
      p++;
462
0
      eptr++;
463
0
      }
464
0
    }
465
0
  }
466
467
/* In the caseful case, we can just compare the code units, whether or not we
468
are in UTF and/or UCP mode. When partial matching, we have to do this unit by
469
unit. */
470
471
0
else
472
0
  {
473
0
  if (mb->partial != 0)
474
0
    {
475
0
    for (; length > 0; length--)
476
0
      {
477
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
478
0
      if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /* No match */
479
0
      }
480
0
    }
481
482
  /* Not partial matching */
483
484
0
  else
485
0
    {
486
0
    if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
487
0
    if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1;  /* No match */
488
0
    eptr += length;
489
0
    }
490
0
  }
491
492
0
*lengthptr = eptr - eptr_start;
493
0
return 0;  /* Match */
494
0
}
495
496
497
498
/******************************************************************************
499
*******************************************************************************
500
                   "Recursion" in the match() function
501
502
The original match() function was highly recursive, but this proved to be the
503
source of a number of problems over the years, mostly because of the relatively
504
small system stacks that are commonly found. As new features were added to
505
patterns, various kludges were invented to reduce the amount of stack used,
506
making the code hard to understand in places.
507
508
A version did exist that used individual frames on the heap instead of calling
509
match() recursively, but this ran substantially slower. The current version is
510
a refactoring that uses a vector of frames to remember backtracking points.
511
This runs no slower, and possibly even a bit faster than the original recursive
512
implementation.
513
514
At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50
515
frames) was allocated on the system stack. If this was not big enough, the heap
516
was used for a larger vector. However, it turns out that there are environments
517
where taking as little as 20KiB from the system stack is an embarrassment.
518
After another refactoring, the heap is used exclusively, but a pointer the
519
frames vector and its size are cached in the match_data block, so that there is
520
no new memory allocation if the same match_data block is used for multiple
521
matches (unless the frames vector has to be extended).
522
*******************************************************************************
523
******************************************************************************/
524
525
526
527
528
/*************************************************
529
*       Macros for the match() function          *
530
*************************************************/
531
532
/* These macros pack up tests that are used for partial matching several times
533
in the code. The second one is used when we already know we are past the end of
534
the subject. We set the "hit end" flag if the pointer is at the end of the
535
subject and either (a) the pointer is past the earliest inspected character
536
(i.e. something has been matched, even if not part of the actual matched
537
string), or (b) the pattern contains a lookbehind. These are the conditions for
538
which adding more characters may allow the current match to continue.
539
540
For hard partial matching, we immediately return a partial match. Otherwise,
541
carrying on means that a complete match on the current subject will be sought.
542
A partial match is returned only if no complete match can be found. */
543
544
#define CHECK_PARTIAL() \
545
16.5M
  do { \
546
16.5M
     if (Feptr >= mb->end_subject) \
547
16.5M
       { \
548
1.26M
       SCHECK_PARTIAL(); \
549
1.26M
       } \
550
16.5M
     } \
551
16.5M
  while (0)
552
553
#define SCHECK_PARTIAL() \
554
9.28M
  do { \
555
9.28M
     if (mb->partial != 0 && \
556
9.28M
         (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
557
9.28M
       { \
558
0
       mb->hitend = TRUE; \
559
0
       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
560
0
       } \
561
9.28M
     } \
562
9.28M
  while (0)
563
564
565
/* These macros are used to implement backtracking. They simulate a recursive
566
call to the match() function by means of a local vector of frames which
567
remember the backtracking points. */
568
569
#define RMATCH(ra,rb) \
570
344M
  do { \
571
344M
     start_ecode = ra; \
572
344M
     Freturn_id = rb; \
573
344M
     goto MATCH_RECURSE; \
574
344M
     L_##rb:; \
575
344M
     } \
576
344M
  while (0)
577
578
#define RRETURN(ra) \
579
344M
  do { \
580
344M
     rrc = ra; \
581
344M
     goto RETURN_SWITCH; \
582
344M
     } \
583
344M
  while (0)
584
585
586
587
/*************************************************
588
*         Match from current position            *
589
*************************************************/
590
591
/* This function is called to run one match attempt at a single starting point
592
in the subject.
593
594
Performance note: It might be tempting to extract commonly used fields from the
595
mb structure (e.g. end_subject) into individual variables to improve
596
performance. Tests using gcc on a SPARC disproved this; in the first case, it
597
made performance worse.
598
599
Arguments:
600
   start_eptr   starting character in subject
601
   start_ecode  starting position in compiled code
602
   top_bracket  number of capturing parentheses in the pattern
603
   frame_size   size of each backtracking frame
604
   match_data   pointer to the match_data block
605
   mb           pointer to "static" variables block
606
607
Returns:        MATCH_MATCH if matched            )  these values are >= 0
608
                MATCH_NOMATCH if failed to match  )
609
                negative MATCH_xxx value for PRUNE, SKIP, etc
610
                negative PCRE2_ERROR_xxx value if aborted by an error condition
611
                (e.g. stopped by repeated call or depth limit)
612
*/
613
614
static int
615
match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket,
616
  PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
617
239k
{
618
/* Frame-handling variables */
619
620
239k
heapframe *F;           /* Current frame pointer */
621
239k
heapframe *N = NULL;    /* Temporary frame pointers */
622
239k
heapframe *P = NULL;
623
624
239k
heapframe *frames_top;  /* End of frames vector */
625
239k
heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
626
239k
PCRE2_SIZE frame_copy_size;   /* Amount to copy when creating a new frame */
627
628
/* Local variables that do not need to be preserved over calls to RRMATCH(). */
629
630
239k
PCRE2_SPTR branch_end = NULL;
631
239k
PCRE2_SPTR branch_start;
632
239k
PCRE2_SPTR bracode;     /* Temp pointer to start of group */
633
239k
PCRE2_SIZE offset;      /* Used for group offsets */
634
239k
PCRE2_SIZE length;      /* Used for various length calculations */
635
636
239k
int rrc;                /* Return from functions & backtracking "recursions" */
637
239k
#ifdef SUPPORT_UNICODE
638
239k
int proptype;           /* Type of character property */
639
239k
#endif
640
641
239k
uint32_t i;             /* Used for local loops */
642
239k
uint32_t fc;            /* Character values */
643
239k
uint32_t number;        /* Used for group and other numbers */
644
239k
uint32_t reptype = 0;   /* Type of repetition (0 to avoid compiler warning) */
645
239k
uint32_t group_frame_type;  /* Specifies type for new group frames */
646
647
239k
BOOL condition;         /* Used in conditional groups */
648
239k
BOOL cur_is_word;       /* Used in "word" tests */
649
239k
BOOL prev_is_word;      /* Used in "word" tests */
650
651
/* UTF and UCP flags */
652
653
239k
#ifdef SUPPORT_UNICODE
654
239k
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
655
239k
BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
656
#else
657
BOOL utf = FALSE;  /* Required for convenience even when no Unicode support */
658
#endif
659
660
/* This is the length of the last part of a backtracking frame that must be
661
copied when a new frame is created. */
662
663
239k
frame_copy_size = frame_size - offsetof(heapframe, eptr);
664
665
/* Set up the first frame and the end of the frames vector. */
666
667
239k
F = match_data->heapframes;
668
239k
frames_top = (heapframe *)((char *)F + match_data->heapframes_size);
669
670
239k
Frdepth = 0;                        /* "Recursion" depth */
671
239k
Fcapture_last = 0;                  /* Number of most recent capture */
672
239k
Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
673
239k
Fstart_match = Feptr = start_eptr;  /* Current data pointer and start match */
674
239k
Fmark = NULL;                       /* Most recent mark */
675
239k
Foffset_top = 0;                    /* End of captures within the frame */
676
239k
Flast_group_offset = PCRE2_UNSET;   /* Saved frame of most recent group */
677
239k
group_frame_type = 0;               /* Not a start of group frame */
678
239k
goto NEW_FRAME;                     /* Start processing with this frame */
679
680
/* Come back here when we want to create a new frame for remembering a
681
backtracking point. */
682
683
344M
MATCH_RECURSE:
684
685
/* Set up a new backtracking frame. If the vector is full, get a new one,
686
doubling the size, but constrained by the heap limit (which is in KiB). */
687
688
344M
N = (heapframe *)((char *)F + frame_size);
689
344M
if ((heapframe *)((char *)N + frame_size) >= frames_top)
690
0
  {
691
0
  heapframe *new;
692
0
  PCRE2_SIZE newsize;
693
0
  PCRE2_SIZE usedsize = (char *)N - (char *)(match_data->heapframes);
694
695
0
  if (match_data->heapframes_size >= PCRE2_SIZE_MAX / 2)
696
0
    {
697
0
    if (match_data->heapframes_size == PCRE2_SIZE_MAX - 1)
698
0
      return PCRE2_ERROR_NOMEMORY;
699
0
    newsize = PCRE2_SIZE_MAX - 1;
700
0
    }
701
0
  else
702
0
    newsize = match_data->heapframes_size * 2;
703
704
0
  if (newsize / 1024 >= mb->heap_limit)
705
0
    {
706
0
    PCRE2_SIZE old_size = match_data->heapframes_size / 1024;
707
0
    if (mb->heap_limit <= old_size)
708
0
      return PCRE2_ERROR_HEAPLIMIT;
709
0
    else
710
0
      {
711
0
      PCRE2_SIZE max_delta = 1024 * (mb->heap_limit - old_size);
712
0
      int over_bytes = match_data->heapframes_size % 1024;
713
0
      if (over_bytes) max_delta -= (1024 - over_bytes);
714
0
      newsize = match_data->heapframes_size + max_delta;
715
0
      }
716
0
    }
717
718
  /* With a heap limit set, the permitted additional size may not be enough for
719
  another frame, so do a final check. */
720
721
0
  if (newsize - usedsize < frame_size) return PCRE2_ERROR_HEAPLIMIT;
722
0
  new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data);
723
0
  if (new == NULL) return PCRE2_ERROR_NOMEMORY;
724
0
  memcpy(new, match_data->heapframes, usedsize);
725
726
0
  N = (heapframe *)((char *)new + usedsize);
727
0
  F = (heapframe *)((char *)N - frame_size);
728
729
0
  match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data);
730
0
  match_data->heapframes = new;
731
0
  match_data->heapframes_size = newsize;
732
0
  frames_top = (heapframe *)((char *)new + newsize);
733
0
  }
734
735
#ifdef DEBUG_SHOW_RMATCH
736
fprintf(stderr, "++ RMATCH %d frame=%d", Freturn_id, Frdepth + 1);
737
if (group_frame_type != 0)
738
  {
739
  fprintf(stderr, " type=%x ", group_frame_type);
740
  switch (GF_IDMASK(group_frame_type))
741
    {
742
    case GF_CAPTURE:
743
    fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
744
    break;
745
746
    case GF_NOCAPTURE:
747
    fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
748
    break;
749
750
    case GF_CONDASSERT:
751
    fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
752
    break;
753
754
    case GF_RECURSE:
755
    fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
756
    break;
757
758
    default:
759
    fprintf(stderr, "*** unknown ***");
760
    break;
761
    }
762
  }
763
fprintf(stderr, "\n");
764
#endif
765
766
/* Copy those fields that must be copied into the new frame, increase the
767
"recursion" depth (i.e. the new frame's index) and then make the new frame
768
current. */
769
770
344M
memcpy((char *)N + offsetof(heapframe, eptr),
771
344M
       (char *)F + offsetof(heapframe, eptr),
772
344M
       frame_copy_size);
773
774
344M
N->rdepth = Frdepth + 1;
775
344M
F = N;
776
777
/* Carry on processing with a new frame. */
778
779
344M
NEW_FRAME:
780
344M
Fgroup_frame_type = group_frame_type;
781
344M
Fecode = start_ecode;      /* Starting code pointer */
782
344M
Fback_frame = frame_size;  /* Default is go back one frame */
783
784
/* If this is a special type of group frame, remember its offset for quick
785
access at the end of the group. If this is a recursion, set a new current
786
recursion value. */
787
788
344M
if (group_frame_type != 0)
789
8.78k
  {
790
8.78k
  Flast_group_offset = (char *)F - (char *)match_data->heapframes;
791
8.78k
  if (GF_IDMASK(group_frame_type) == GF_RECURSE)
792
0
    Fcurrent_recurse = GF_DATAMASK(group_frame_type);
793
8.78k
  group_frame_type = 0;
794
8.78k
  }
795
796
797
/* ========================================================================= */
798
/* This is the main processing loop. First check that we haven't recorded too
799
many backtracks (search tree is too large), or that we haven't exceeded the
800
recursive depth limit (used too many backtracking frames). If not, process the
801
opcodes. */
802
803
344M
if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
804
344M
if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
805
806
#ifdef DEBUG_SHOW_OPS
807
fprintf(stderr, "\n++ New frame: type=0x%x subject offset %ld\n",
808
  GF_IDMASK(Fgroup_frame_type), Feptr - mb->start_subject);
809
#endif
810
811
344M
for (;;)
812
446M
  {
813
#ifdef DEBUG_SHOW_OPS
814
fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
815
  OP_names[*Fecode]);
816
#endif
817
818
446M
  Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
819
446M
  switch(Fop)
820
446M
    {
821
    /* ===================================================================== */
822
    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
823
    any currently open capturing brackets. Unlike reaching the end of a group,
824
    where we know the starting frame is at the top of the chained frames, in
825
    this case we have to search back for the relevant frame in case other types
826
    of group that use chained frames have intervened. Multiple OP_CLOSEs always
827
    come innermost first, which matches the chain order. We can ignore this in
828
    a recursion, because captures are not passed out of recursions. */
829
830
0
    case OP_CLOSE:
831
0
    if (Fcurrent_recurse == RECURSE_UNSET)
832
0
      {
833
0
      number = GET2(Fecode, 1);
834
0
      offset = Flast_group_offset;
835
0
      for(;;)
836
0
        {
837
        /* Corrupted heapframes?. Trigger an assert and return an error */
838
0
        PCRE2_ASSERT(offset != PCRE2_UNSET);
839
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
840
841
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
842
0
        P = (heapframe *)((char *)N - frame_size);
843
0
        if (N->group_frame_type == (GF_CAPTURE | number)) break;
844
0
        offset = P->last_group_offset;
845
0
        }
846
0
      offset = (number << 1) - 2;
847
0
      Fcapture_last = number;
848
0
      Fovector[offset] = P->eptr - mb->start_subject;
849
0
      Fovector[offset+1] = Feptr - mb->start_subject;
850
0
      if (offset >= Foffset_top) Foffset_top = offset + 2;
851
0
      }
852
0
    Fecode += PRIV(OP_lengths)[*Fecode];
853
0
    break;
854
855
856
    /* ===================================================================== */
857
    /* Real or forced end of the pattern, assertion, or recursion. In an
858
    assertion ACCEPT, update the last used pointer and remember the current
859
    frame so that the captures and mark can be fished out of it. */
860
861
0
    case OP_ASSERT_ACCEPT:
862
0
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
863
0
    assert_accept_frame = F;
864
0
    RRETURN(MATCH_ACCEPT);
865
866
    /* For ACCEPT within a recursion, we have to find the most recent
867
    recursion. If not in a recursion, fall through to code that is common with
868
    OP_END. */
869
870
0
    case OP_ACCEPT:
871
0
    if (Fcurrent_recurse != RECURSE_UNSET)
872
0
      {
873
#ifdef DEBUG_SHOW_OPS
874
      fprintf(stderr, "++ Accept within recursion\n");
875
#endif
876
0
      offset = Flast_group_offset;
877
0
      for(;;)
878
0
        {
879
        /* Corrupted heapframes?. Trigger an assert and return an error */
880
0
        PCRE2_ASSERT(offset != PCRE2_UNSET);
881
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
882
883
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
884
0
        P = (heapframe *)((char *)N - frame_size);
885
0
        if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
886
0
        offset = P->last_group_offset;
887
0
        }
888
889
      /* N is now the frame of the recursion; the previous frame is at the
890
      OP_RECURSE position. Go back there, copying the current subject position
891
      and mark, and the start_match position (\K might have changed it), and
892
      then move on past the OP_RECURSE. */
893
894
0
      P->eptr = Feptr;
895
0
      P->mark = Fmark;
896
0
      P->start_match = Fstart_match;
897
0
      F = P;
898
0
      Fecode += 1 + LINK_SIZE;
899
0
      continue;
900
0
      }
901
    /* Fall through */
902
903
    /* OP_END itself can never be reached within a recursion because that is
904
    picked up when the OP_KET that always precedes OP_END is reached. */
905
906
2.60k
    case OP_END:
907
908
    /* Fail for an empty string match if either PCRE2_NOTEMPTY is set, or if
909
    PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the
910
    subject. In both cases, backtracking will then try other alternatives, if
911
    any. */
912
913
2.60k
    if (Feptr == Fstart_match &&
914
2.14k
         ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
915
2.14k
           ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
916
1.86k
             Fstart_match == mb->start_subject + mb->start_offset)))
917
1.86k
      {
918
#ifdef DEBUG_SHOW_OPS
919
      fprintf(stderr, "++ Backtrack because empty string\n");
920
#endif
921
1.86k
      RRETURN(MATCH_NOMATCH);
922
1.86k
      }
923
924
    /* Fail if PCRE2_ENDANCHORED is set and the end of the match is not
925
    the end of the subject. After (*ACCEPT) we fail the entire match (at this
926
    position) but backtrack if we've reached the end of the pattern. This
927
    applies whether or not we are in a recursion. */
928
929
735
    if (Feptr < mb->end_subject &&
930
546
        ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
931
0
      {
932
0
      if (Fop == OP_END)
933
0
        {
934
#ifdef DEBUG_SHOW_OPS
935
        fprintf(stderr, "++ Backtrack because not at end (endanchored set)\n");
936
#endif
937
0
        RRETURN(MATCH_NOMATCH);
938
0
        }
939
940
#ifdef DEBUG_SHOW_OPS
941
      fprintf(stderr, "++ Failed ACCEPT not at end (endanchnored set)\n");
942
#endif
943
0
      return MATCH_NOMATCH;   /* (*ACCEPT) */
944
0
      }
945
946
    /* We have a successful match of the whole pattern. Record the result and
947
    then do a direct return from the function. If there is space in the offset
948
    vector, set any pairs that follow the highest-numbered captured string but
949
    are less than the number of capturing groups in the pattern to PCRE2_UNSET.
950
    It is documented that this happens. "Gaps" are set to PCRE2_UNSET
951
    dynamically. It is only those at the end that need setting here. */
952
953
735
    mb->end_match_ptr = Feptr;           /* Record where we ended */
954
735
    mb->end_offset_top = Foffset_top;    /* and how many extracts were taken */
955
735
    mb->mark = Fmark;                    /* and the last success mark */
956
735
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
957
958
735
    match_data->ovector[0] = Fstart_match - mb->start_subject;
959
735
    match_data->ovector[1] = Feptr - mb->start_subject;
960
961
    /* Set i to the smaller of the sizes of the external and frame ovectors. */
962
963
735
    i = 2 * ((top_bracket + 1 > match_data->oveccount)?
964
735
      match_data->oveccount : top_bracket + 1);
965
735
    memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
966
863
    while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET;
967
735
    return MATCH_MATCH;  /* Note: NOT RRETURN */
968
969
970
    /*===================================================================== */
971
    /* Match any single character type except newline; have to take care with
972
    CRLF newlines and partial matching. */
973
974
596k
    case OP_ANY:
975
596k
    if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
976
593k
    if (mb->partial != 0 &&
977
0
        Feptr == mb->end_subject - 1 &&
978
0
        NLBLOCK->nltype == NLTYPE_FIXED &&
979
0
        NLBLOCK->nllen == 2 &&
980
0
        UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
981
0
      {
982
0
      mb->hitend = TRUE;
983
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
984
0
      }
985
    /* Fall through */
986
987
    /* Match any single character whatsoever. */
988
989
1.95M
    case OP_ALLANY:
990
1.95M
    if (Feptr >= mb->end_subject)  /* DO NOT merge the Feptr++ here; it must */
991
3.13k
      {                            /* not be updated before SCHECK_PARTIAL. */
992
3.13k
      SCHECK_PARTIAL();
993
3.13k
      RRETURN(MATCH_NOMATCH);
994
3.13k
      }
995
1.95M
    Feptr++;
996
1.95M
#ifdef SUPPORT_UNICODE
997
1.95M
    if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
998
1.95M
#endif
999
1.95M
    Fecode++;
1000
1.95M
    break;
1001
1002
1003
    /* ===================================================================== */
1004
    /* Match a single code unit, even in UTF mode. This opcode really does
1005
    match any code unit, even newline. (It really should be called ANYCODEUNIT,
1006
    of course - the byte name is from pre-16 bit days.) */
1007
1008
650
    case OP_ANYBYTE:
1009
650
    if (Feptr >= mb->end_subject)   /* DO NOT merge the Feptr++ here; it must */
1010
9
      {                             /* not be updated before SCHECK_PARTIAL. */
1011
9
      SCHECK_PARTIAL();
1012
9
      RRETURN(MATCH_NOMATCH);
1013
9
      }
1014
641
    Feptr++;
1015
641
    Fecode++;
1016
641
    break;
1017
1018
1019
    /* ===================================================================== */
1020
    /* Match a single character, casefully */
1021
1022
128M
    case OP_CHAR:
1023
128M
#ifdef SUPPORT_UNICODE
1024
128M
    if (utf)
1025
65.9M
      {
1026
65.9M
      Flength = 1;
1027
65.9M
      Fecode++;
1028
65.9M
      GETCHARLEN(fc, Fecode, Flength);
1029
65.9M
      if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
1030
1.16M
        {
1031
1.16M
        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
1032
1.16M
        RRETURN(MATCH_NOMATCH);
1033
1.16M
        }
1034
65.9M
      for (; Flength > 0; Flength--)
1035
64.7M
        {
1036
64.7M
        if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
1037
64.7M
        }
1038
64.7M
      }
1039
62.4M
    else
1040
62.4M
#endif
1041
1042
    /* Not UTF mode */
1043
62.4M
      {
1044
62.4M
      if (mb->end_subject - Feptr < 1)
1045
187k
        {
1046
187k
        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
1047
187k
        RRETURN(MATCH_NOMATCH);
1048
187k
        }
1049
62.2M
      if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
1050
62.2k
      Fecode += 2;
1051
62.2k
      }
1052
1.24M
    break;
1053
1054
1055
    /* ===================================================================== */
1056
    /* Match a single character, caselessly. If we are at the end of the
1057
    subject, give up immediately. We get here only when the pattern character
1058
    has at most one other case. Characters with more than two cases are coded
1059
    as OP_PROP with the pseudo-property PT_CLIST. */
1060
1061
11.6M
    case OP_CHARI:
1062
11.6M
    if (Feptr >= mb->end_subject)
1063
58.7k
      {
1064
58.7k
      SCHECK_PARTIAL();
1065
58.7k
      RRETURN(MATCH_NOMATCH);
1066
58.7k
      }
1067
1068
11.6M
#ifdef SUPPORT_UNICODE
1069
11.6M
    if (utf)
1070
2.94M
      {
1071
2.94M
      Flength = 1;
1072
2.94M
      Fecode++;
1073
2.94M
      GETCHARLEN(fc, Fecode, Flength);
1074
1075
      /* If the pattern character's value is < 128, we know that its other case
1076
      (if any) is also < 128 (and therefore only one code unit long in all
1077
      code-unit widths), so we can use the fast lookup table. We checked above
1078
      that there is at least one character left in the subject. */
1079
1080
2.94M
      if (fc < 128)
1081
2.93M
        {
1082
2.93M
        uint32_t cc = UCHAR21(Feptr);
1083
2.93M
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1084
150k
        Fecode++;
1085
150k
        Feptr++;
1086
150k
        }
1087
1088
      /* Otherwise we must pick up the subject character and use Unicode
1089
      property support to test its other case. Note that we cannot use the
1090
      value of "Flength" to check for sufficient bytes left, because the other
1091
      case of the character may have more or fewer code units. */
1092
1093
16.9k
      else
1094
16.9k
        {
1095
16.9k
        uint32_t dc;
1096
16.9k
        GETCHARINC(dc, Feptr);
1097
16.9k
        Fecode += Flength;
1098
16.9k
        if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1099
16.9k
        }
1100
2.94M
      }
1101
1102
    /* If UCP is set without UTF we must do the same as above, but with one
1103
    character per code unit. */
1104
1105
8.66M
    else if (ucp)
1106
0
      {
1107
0
      uint32_t cc = UCHAR21(Feptr);
1108
0
      fc = Fecode[1];
1109
0
      if (fc < 128)
1110
0
        {
1111
0
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1112
0
        }
1113
0
      else
1114
0
        {
1115
0
        if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1116
0
        }
1117
0
      Feptr++;
1118
0
      Fecode += 2;
1119
0
      }
1120
1121
8.66M
    else
1122
8.66M
#endif   /* SUPPORT_UNICODE */
1123
1124
    /* Not UTF or UCP mode; use the table for characters < 256. */
1125
8.66M
      {
1126
8.66M
      if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
1127
8.66M
          != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
1128
238k
      Feptr++;
1129
238k
      Fecode += 2;
1130
238k
      }
1131
388k
    break;
1132
1133
1134
    /* ===================================================================== */
1135
    /* Match not a single character. */
1136
1137
388k
    case OP_NOT:
1138
207k
    case OP_NOTI:
1139
207k
    if (Feptr >= mb->end_subject)
1140
914
      {
1141
914
      SCHECK_PARTIAL();
1142
914
      RRETURN(MATCH_NOMATCH);
1143
914
      }
1144
1145
206k
#ifdef SUPPORT_UNICODE
1146
206k
    if (utf)
1147
2.43k
      {
1148
2.43k
      uint32_t ch;
1149
2.43k
      Fecode++;
1150
2.43k
      GETCHARINC(ch, Fecode);
1151
2.43k
      GETCHARINC(fc, Feptr);
1152
2.43k
      if (ch == fc)
1153
67
        {
1154
67
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1155
67
        }
1156
2.36k
      else if (Fop == OP_NOTI)   /* If caseless */
1157
1.91k
        {
1158
1.91k
        if (ch > 127)
1159
0
          ch = UCD_OTHERCASE(ch);
1160
1.91k
        else
1161
1.91k
          ch = (mb->fcc)[ch];
1162
1.91k
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1163
1.91k
        }
1164
2.43k
      }
1165
1166
    /* UCP without UTF is as above, but with one character per code unit. */
1167
1168
204k
    else if (ucp)
1169
0
      {
1170
0
      uint32_t ch;
1171
0
      fc = UCHAR21INC(Feptr);
1172
0
      ch = Fecode[1];
1173
0
      Fecode += 2;
1174
1175
0
      if (ch == fc)
1176
0
        {
1177
0
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1178
0
        }
1179
0
      else if (Fop == OP_NOTI)   /* If caseless */
1180
0
        {
1181
0
        if (ch > 127)
1182
0
          ch = UCD_OTHERCASE(ch);
1183
0
        else
1184
0
          ch = (mb->fcc)[ch];
1185
0
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1186
0
        }
1187
0
      }
1188
1189
204k
    else
1190
204k
#endif  /* SUPPORT_UNICODE */
1191
1192
    /* Neither UTF nor UCP is set */
1193
1194
204k
      {
1195
204k
      uint32_t ch = Fecode[1];
1196
204k
      fc = UCHAR21INC(Feptr);
1197
204k
      if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
1198
1.67k
        RRETURN(MATCH_NOMATCH);
1199
202k
      Fecode += 2;
1200
202k
      }
1201
204k
    break;
1202
1203
1204
    /* ===================================================================== */
1205
    /* Match a single character repeatedly. */
1206
1207
8.36M
#define Loclength    F->temp_size
1208
27.1M
#define Lstart_eptr  F->temp_sptr[0]
1209
16.0M
#define Lcharptr     F->temp_sptr[1]
1210
91.7M
#define Lmin         F->temp_32[0]
1211
68.8M
#define Lmax         F->temp_32[1]
1212
42.6M
#define Lc           F->temp_32[2]
1213
4.61M
#define Loc          F->temp_32[3]
1214
1215
204k
    case OP_EXACT:
1216
0
    case OP_EXACTI:
1217
0
    Lmin = Lmax = GET2(Fecode, 1);
1218
0
    Fecode += 1 + IMM2_SIZE;
1219
0
    goto REPEATCHAR;
1220
1221
0
    case OP_POSUPTO:
1222
0
    case OP_POSUPTOI:
1223
0
    reptype = REPTYPE_POS;
1224
0
    Lmin = 0;
1225
0
    Lmax = GET2(Fecode, 1);
1226
0
    Fecode += 1 + IMM2_SIZE;
1227
0
    goto REPEATCHAR;
1228
1229
0
    case OP_UPTO:
1230
0
    case OP_UPTOI:
1231
0
    reptype = REPTYPE_MAX;
1232
0
    Lmin = 0;
1233
0
    Lmax = GET2(Fecode, 1);
1234
0
    Fecode += 1 + IMM2_SIZE;
1235
0
    goto REPEATCHAR;
1236
1237
0
    case OP_MINUPTO:
1238
0
    case OP_MINUPTOI:
1239
0
    reptype = REPTYPE_MIN;
1240
0
    Lmin = 0;
1241
0
    Lmax = GET2(Fecode, 1);
1242
0
    Fecode += 1 + IMM2_SIZE;
1243
0
    goto REPEATCHAR;
1244
1245
11.6k
    case OP_POSSTAR:
1246
31.9k
    case OP_POSSTARI:
1247
31.9k
    reptype = REPTYPE_POS;
1248
31.9k
    Lmin = 0;
1249
31.9k
    Lmax = UINT32_MAX;
1250
31.9k
    Fecode++;
1251
31.9k
    goto REPEATCHAR;
1252
1253
48.0k
    case OP_POSPLUS:
1254
83.5k
    case OP_POSPLUSI:
1255
83.5k
    reptype = REPTYPE_POS;
1256
83.5k
    Lmin = 1;
1257
83.5k
    Lmax = UINT32_MAX;
1258
83.5k
    Fecode++;
1259
83.5k
    goto REPEATCHAR;
1260
1261
14.8M
    case OP_POSQUERY:
1262
16.2M
    case OP_POSQUERYI:
1263
16.2M
    reptype = REPTYPE_POS;
1264
16.2M
    Lmin = 0;
1265
16.2M
    Lmax = 1;
1266
16.2M
    Fecode++;
1267
16.2M
    goto REPEATCHAR;
1268
1269
327
    case OP_STAR:
1270
2.64k
    case OP_STARI:
1271
4.04k
    case OP_MINSTAR:
1272
8.00k
    case OP_MINSTARI:
1273
10.8k
    case OP_PLUS:
1274
12.2k
    case OP_PLUSI:
1275
12.8k
    case OP_MINPLUS:
1276
13.7k
    case OP_MINPLUSI:
1277
4.66M
    case OP_QUERY:
1278
5.45M
    case OP_QUERYI:
1279
6.59M
    case OP_MINQUERY:
1280
6.65M
    case OP_MINQUERYI:
1281
6.65M
    fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
1282
6.65M
    Lmin = rep_min[fc];
1283
6.65M
    Lmax = rep_max[fc];
1284
6.65M
    reptype = rep_typ[fc];
1285
1286
    /* Common code for all repeated single-character matches. We first check
1287
    for the minimum number of characters. If the minimum equals the maximum, we
1288
    are done. Otherwise, if minimizing, check the rest of the pattern for a
1289
    match; if there isn't one, advance up to the maximum, one character at a
1290
    time.
1291
1292
    If maximizing, advance up to the maximum number of matching characters,
1293
    until Feptr is past the end of the maximum run. If possessive, we are
1294
    then done (no backing up). Otherwise, match at this position; anything
1295
    other than no match is immediately returned. For nomatch, back up one
1296
    character, unless we are matching \R and the last thing matched was
1297
    \r\n, in which case, back up two code units until we reach the first
1298
    optional character position.
1299
1300
    The various UTF/non-UTF and caseful/caseless cases are handled separately,
1301
    for speed. */
1302
1303
22.9M
    REPEATCHAR:
1304
22.9M
#ifdef SUPPORT_UNICODE
1305
22.9M
    if (utf)
1306
13.2M
      {
1307
13.2M
      Flength = 1;
1308
13.2M
      Lcharptr = Fecode;
1309
13.2M
      GETCHARLEN(fc, Fecode, Flength);
1310
13.2M
      Fecode += Flength;
1311
1312
      /* Handle multi-code-unit character matching, caseful and caseless. */
1313
1314
13.2M
      if (Flength > 1)
1315
2.78M
        {
1316
2.78M
        uint32_t othercase;
1317
1318
2.78M
        if (Fop >= OP_STARI &&     /* Caseless */
1319
4.37k
            (othercase = UCD_OTHERCASE(fc)) != fc)
1320
0
          Loclength = PRIV(ord2utf)(othercase, Foccu);
1321
2.78M
        else Loclength = 0;
1322
1323
2.78M
        for (i = 1; i <= Lmin; i++)
1324
4.37k
          {
1325
4.37k
          if (Feptr <= mb->end_subject - Flength &&
1326
4.37k
            memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1327
4.37k
          else if (Loclength > 0 &&
1328
0
                   Feptr <= mb->end_subject - Loclength &&
1329
0
                   memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1330
0
            Feptr += Loclength;
1331
4.37k
          else
1332
4.37k
            {
1333
4.37k
            CHECK_PARTIAL();
1334
4.37k
            RRETURN(MATCH_NOMATCH);
1335
4.37k
            }
1336
4.37k
          }
1337
1338
2.78M
        if (Lmin == Lmax) continue;
1339
1340
2.78M
        if (reptype == REPTYPE_MIN)
1341
0
          {
1342
0
          for (;;)
1343
0
            {
1344
0
            RMATCH(Fecode, RM202);
1345
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1346
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1347
0
            if (Feptr <= mb->end_subject - Flength &&
1348
0
              memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1349
0
            else if (Loclength > 0 &&
1350
0
                     Feptr <= mb->end_subject - Loclength &&
1351
0
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1352
0
              Feptr += Loclength;
1353
0
            else
1354
0
              {
1355
0
              CHECK_PARTIAL();
1356
0
              RRETURN(MATCH_NOMATCH);
1357
0
              }
1358
0
            }
1359
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
1360
0
          }
1361
1362
2.78M
        else  /* Maximize */
1363
2.78M
          {
1364
2.78M
          Lstart_eptr = Feptr;
1365
2.78M
          for (i = Lmin; i < Lmax; i++)
1366
2.78M
            {
1367
2.78M
            if (Feptr <= mb->end_subject - Flength &&
1368
2.78M
                memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
1369
0
              Feptr += Flength;
1370
2.78M
            else if (Loclength > 0 &&
1371
0
                     Feptr <= mb->end_subject - Loclength &&
1372
0
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1373
0
              Feptr += Loclength;
1374
2.78M
            else
1375
2.78M
              {
1376
2.78M
              CHECK_PARTIAL();
1377
2.78M
              break;
1378
2.78M
              }
1379
2.78M
            }
1380
1381
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1382
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1383
          go too far. */
1384
1385
2.78M
          if (reptype != REPTYPE_POS) for(;;)
1386
0
            {
1387
0
            if (Feptr <= Lstart_eptr) break;
1388
0
            RMATCH(Fecode, RM203);
1389
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1390
0
            Feptr--;
1391
0
            BACKCHAR(Feptr);
1392
0
            }
1393
2.78M
          }
1394
2.78M
        break;   /* End of repeated wide character handling */
1395
2.78M
        }
1396
1397
      /* Length of UTF character is 1. Put it into the preserved variable and
1398
      fall through to the non-UTF code. */
1399
1400
10.4M
      Lc = fc;
1401
10.4M
      }
1402
9.71M
    else
1403
9.71M
#endif  /* SUPPORT_UNICODE */
1404
1405
    /* When not in UTF mode, load a single-code-unit character. Then proceed as
1406
    above, using Unicode casing if either UTF or UCP is set. */
1407
1408
9.71M
    Lc = *Fecode++;
1409
1410
    /* Caseless comparison */
1411
1412
20.1M
    if (Fop >= OP_STARI)
1413
2.31M
      {
1414
2.31M
#if PCRE2_CODE_UNIT_WIDTH == 8
1415
2.31M
#ifdef SUPPORT_UNICODE
1416
2.31M
      if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1417
2.31M
      else
1418
2.31M
#endif  /* SUPPORT_UNICODE */
1419
      /* Lc will be < 128 in UTF-8 mode. */
1420
2.31M
      Loc = mb->fcc[Lc];
1421
#else /* 16-bit & 32-bit */
1422
#ifdef SUPPORT_UNICODE
1423
      if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1424
      else
1425
#endif  /* SUPPORT_UNICODE */
1426
      Loc = TABLE_GET(Lc, mb->fcc, Lc);
1427
#endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
1428
1429
2.32M
      for (i = 1; i <= Lmin; i++)
1430
33.4k
        {
1431
33.4k
        uint32_t cc;                 /* Faster than PCRE2_UCHAR */
1432
33.4k
        if (Feptr >= mb->end_subject)
1433
1.04k
          {
1434
1.04k
          SCHECK_PARTIAL();
1435
1.04k
          RRETURN(MATCH_NOMATCH);
1436
1.04k
          }
1437
32.3k
        cc = UCHAR21TEST(Feptr);
1438
32.3k
        if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1439
2.26k
        Feptr++;
1440
2.26k
        }
1441
2.28M
      if (Lmin == Lmax) continue;
1442
1443
2.28M
      if (reptype == REPTYPE_MIN)
1444
65.4k
        {
1445
65.4k
        for (;;)
1446
68.5k
          {
1447
68.5k
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1448
68.5k
          RMATCH(Fecode, RM25);
1449
68.5k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1450
68.5k
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1451
65.6k
          if (Feptr >= mb->end_subject)
1452
1.51k
            {
1453
1.51k
            SCHECK_PARTIAL();
1454
1.51k
            RRETURN(MATCH_NOMATCH);
1455
1.51k
            }
1456
64.1k
          cc = UCHAR21TEST(Feptr);
1457
64.1k
          if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1458
3.09k
          Feptr++;
1459
3.09k
          }
1460
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1461
0
        }
1462
1463
2.22M
      else  /* Maximize */
1464
2.22M
        {
1465
2.22M
        Lstart_eptr = Feptr;
1466
2.25M
        for (i = Lmin; i < Lmax; i++)
1467
2.24M
          {
1468
2.24M
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1469
2.24M
          if (Feptr >= mb->end_subject)
1470
9.06k
            {
1471
9.06k
            SCHECK_PARTIAL();
1472
9.06k
            break;
1473
9.06k
            }
1474
2.23M
          cc = UCHAR21TEST(Feptr);
1475
2.23M
          if (Lc != cc && Loc != cc) break;
1476
33.0k
          Feptr++;
1477
33.0k
          }
1478
2.22M
        if (reptype != REPTYPE_POS) for (;;)
1479
793k
          {
1480
793k
          if (Feptr == Lstart_eptr) break;
1481
947
          RMATCH(Fecode, RM26);
1482
947
          Feptr--;
1483
947
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1484
947
          }
1485
2.22M
        }
1486
2.28M
      }
1487
1488
    /* Caseful comparisons (includes all multi-byte characters) */
1489
1490
17.8M
    else
1491
17.8M
      {
1492
17.8M
      for (i = 1; i <= Lmin; i++)
1493
51.4k
        {
1494
51.4k
        if (Feptr >= mb->end_subject)
1495
334
          {
1496
334
          SCHECK_PARTIAL();
1497
334
          RRETURN(MATCH_NOMATCH);
1498
334
          }
1499
51.1k
        if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1500
51.1k
        }
1501
1502
17.8M
      if (Lmin == Lmax) continue;
1503
1504
17.8M
      if (reptype == REPTYPE_MIN)
1505
1.13M
        {
1506
1.13M
        for (;;)
1507
1.14M
          {
1508
1.14M
          RMATCH(Fecode, RM27);
1509
1.14M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1510
1.14M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1511
1.13M
          if (Feptr >= mb->end_subject)
1512
5.54k
            {
1513
5.54k
            SCHECK_PARTIAL();
1514
5.54k
            RRETURN(MATCH_NOMATCH);
1515
5.54k
            }
1516
1.13M
          if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1517
1.13M
          }
1518
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1519
0
        }
1520
16.6M
      else  /* Maximize */
1521
16.6M
        {
1522
16.6M
        Lstart_eptr = Feptr;
1523
16.7M
        for (i = Lmin; i < Lmax; i++)
1524
16.6M
          {
1525
16.6M
          if (Feptr >= mb->end_subject)
1526
82.1k
            {
1527
82.1k
            SCHECK_PARTIAL();
1528
82.1k
            break;
1529
82.1k
            }
1530
1531
16.6M
          if (Lc != UCHAR21TEST(Feptr)) break;
1532
41.2k
          Feptr++;
1533
41.2k
          }
1534
1535
16.6M
        if (reptype != REPTYPE_POS) for (;;)
1536
4.66M
          {
1537
4.66M
          if (Feptr <= Lstart_eptr) break;
1538
7.20k
          RMATCH(Fecode, RM28);
1539
7.19k
          Feptr--;
1540
7.19k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1541
7.19k
          }
1542
16.6M
        }
1543
17.8M
      }
1544
18.9M
    break;
1545
1546
18.9M
#undef Loclength
1547
18.9M
#undef Lstart_eptr
1548
18.9M
#undef Lcharptr
1549
18.9M
#undef Lmin
1550
18.9M
#undef Lmax
1551
18.9M
#undef Lc
1552
18.9M
#undef Loc
1553
1554
1555
    /* ===================================================================== */
1556
    /* Match a negated single one-byte character repeatedly. This is almost a
1557
    repeat of the code for a repeated single character, but I haven't found a
1558
    nice way of commoning these up that doesn't require a test of the
1559
    positive/negative option for each character match. Maybe that wouldn't add
1560
    very much to the time taken, but character matching *is* what this is all
1561
    about... */
1562
1563
18.9M
#define Lstart_eptr  F->temp_sptr[0]
1564
18.9M
#define Lmin         F->temp_32[0]
1565
18.9M
#define Lmax         F->temp_32[1]
1566
18.9M
#define Lc           F->temp_32[2]
1567
18.9M
#define Loc          F->temp_32[3]
1568
1569
18.9M
    case OP_NOTEXACT:
1570
0
    case OP_NOTEXACTI:
1571
0
    Lmin = Lmax = GET2(Fecode, 1);
1572
0
    Fecode += 1 + IMM2_SIZE;
1573
0
    goto REPEATNOTCHAR;
1574
1575
0
    case OP_NOTUPTO:
1576
0
    case OP_NOTUPTOI:
1577
0
    Lmin = 0;
1578
0
    Lmax = GET2(Fecode, 1);
1579
0
    reptype = REPTYPE_MAX;
1580
0
    Fecode += 1 + IMM2_SIZE;
1581
0
    goto REPEATNOTCHAR;
1582
1583
0
    case OP_NOTMINUPTO:
1584
0
    case OP_NOTMINUPTOI:
1585
0
    Lmin = 0;
1586
0
    Lmax = GET2(Fecode, 1);
1587
0
    reptype = REPTYPE_MIN;
1588
0
    Fecode += 1 + IMM2_SIZE;
1589
0
    goto REPEATNOTCHAR;
1590
1591
0
    case OP_NOTPOSSTAR:
1592
0
    case OP_NOTPOSSTARI:
1593
0
    reptype = REPTYPE_POS;
1594
0
    Lmin = 0;
1595
0
    Lmax = UINT32_MAX;
1596
0
    Fecode++;
1597
0
    goto REPEATNOTCHAR;
1598
1599
345
    case OP_NOTPOSPLUS:
1600
2.39k
    case OP_NOTPOSPLUSI:
1601
2.39k
    reptype = REPTYPE_POS;
1602
2.39k
    Lmin = 1;
1603
2.39k
    Lmax = UINT32_MAX;
1604
2.39k
    Fecode++;
1605
2.39k
    goto REPEATNOTCHAR;
1606
1607
0
    case OP_NOTPOSQUERY:
1608
0
    case OP_NOTPOSQUERYI:
1609
0
    reptype = REPTYPE_POS;
1610
0
    Lmin = 0;
1611
0
    Lmax = 1;
1612
0
    Fecode++;
1613
0
    goto REPEATNOTCHAR;
1614
1615
0
    case OP_NOTPOSUPTO:
1616
0
    case OP_NOTPOSUPTOI:
1617
0
    reptype = REPTYPE_POS;
1618
0
    Lmin = 0;
1619
0
    Lmax = GET2(Fecode, 1);
1620
0
    Fecode += 1 + IMM2_SIZE;
1621
0
    goto REPEATNOTCHAR;
1622
1623
135
    case OP_NOTSTAR:
1624
135
    case OP_NOTSTARI:
1625
135
    case OP_NOTMINSTAR:
1626
135
    case OP_NOTMINSTARI:
1627
2.58k
    case OP_NOTPLUS:
1628
97.2k
    case OP_NOTPLUSI:
1629
97.9k
    case OP_NOTMINPLUS:
1630
105k
    case OP_NOTMINPLUSI:
1631
105k
    case OP_NOTQUERY:
1632
113k
    case OP_NOTQUERYI:
1633
113k
    case OP_NOTMINQUERY:
1634
121k
    case OP_NOTMINQUERYI:
1635
121k
    fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1636
121k
    Lmin = rep_min[fc];
1637
121k
    Lmax = rep_max[fc];
1638
121k
    reptype = rep_typ[fc];
1639
1640
    /* Common code for all repeated single-character non-matches. */
1641
1642
123k
    REPEATNOTCHAR:
1643
123k
    GETCHARINCTEST(Lc, Fecode);
1644
1645
    /* The code is duplicated for the caseless and caseful cases, for speed,
1646
    since matching characters is likely to be quite common. First, ensure the
1647
    minimum number of matches are present. If Lmin = Lmax, we are done.
1648
    Otherwise, if minimizing, keep trying the rest of the expression and
1649
    advancing one matching character if failing, up to the maximum.
1650
    Alternatively, if maximizing, find the maximum number of characters and
1651
    work backwards. */
1652
1653
123k
    if (Fop >= OP_NOTSTARI)     /* Caseless */
1654
119k
      {
1655
119k
#ifdef SUPPORT_UNICODE
1656
119k
      if ((utf || ucp) && Lc > 127)
1657
0
        Loc = UCD_OTHERCASE(Lc);
1658
119k
      else
1659
119k
#endif /* SUPPORT_UNICODE */
1660
1661
119k
      Loc = TABLE_GET(Lc, mb->fcc, Lc);  /* Other case from table */
1662
1663
119k
#ifdef SUPPORT_UNICODE
1664
119k
      if (utf)
1665
11.2k
        {
1666
11.2k
        uint32_t d;
1667
12.1k
        for (i = 1; i <= Lmin; i++)
1668
1.00k
          {
1669
1.00k
          if (Feptr >= mb->end_subject)
1670
42
            {
1671
42
            SCHECK_PARTIAL();
1672
42
            RRETURN(MATCH_NOMATCH);
1673
42
            }
1674
965
          GETCHARINC(d, Feptr);
1675
965
          if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1676
965
          }
1677
11.2k
        }
1678
108k
      else
1679
108k
#endif  /* SUPPORT_UNICODE */
1680
1681
      /* Not UTF mode */
1682
108k
        {
1683
210k
        for (i = 1; i <= Lmin; i++)
1684
103k
          {
1685
103k
          if (Feptr >= mb->end_subject)
1686
390
            {
1687
390
            SCHECK_PARTIAL();
1688
390
            RRETURN(MATCH_NOMATCH);
1689
390
            }
1690
102k
          if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1691
101k
          Feptr++;
1692
101k
          }
1693
108k
        }
1694
1695
118k
      if (Lmin == Lmax) continue;  /* Finished for exact count */
1696
1697
118k
      if (reptype == REPTYPE_MIN)
1698
15.3k
        {
1699
15.3k
#ifdef SUPPORT_UNICODE
1700
15.3k
        if (utf)
1701
8.01k
          {
1702
8.01k
          uint32_t d;
1703
8.01k
          for (;;)
1704
28.1k
            {
1705
28.1k
            RMATCH(Fecode, RM204);
1706
28.1k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1707
28.1k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1708
22.4k
            if (Feptr >= mb->end_subject)
1709
1.00k
              {
1710
1.00k
              SCHECK_PARTIAL();
1711
1.00k
              RRETURN(MATCH_NOMATCH);
1712
1.00k
              }
1713
21.4k
            GETCHARINC(d, Feptr);
1714
21.4k
            if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1715
21.4k
            }
1716
8.01k
          }
1717
7.31k
        else
1718
7.31k
#endif  /*SUPPORT_UNICODE */
1719
1720
        /* Not UTF mode */
1721
7.31k
          {
1722
7.31k
          for (;;)
1723
224k
            {
1724
224k
            RMATCH(Fecode, RM29);
1725
224k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1726
224k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1727
223k
            if (Feptr >= mb->end_subject)
1728
1.36k
              {
1729
1.36k
              SCHECK_PARTIAL();
1730
1.36k
              RRETURN(MATCH_NOMATCH);
1731
1.36k
              }
1732
222k
            if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1733
217k
            Feptr++;
1734
217k
            }
1735
7.31k
          }
1736
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1737
0
        }
1738
1739
      /* Maximize case */
1740
1741
102k
      else
1742
102k
        {
1743
102k
        Lstart_eptr = Feptr;
1744
1745
102k
#ifdef SUPPORT_UNICODE
1746
102k
        if (utf)
1747
3.16k
          {
1748
3.16k
          uint32_t d;
1749
7.46k
          for (i = Lmin; i < Lmax; i++)
1750
4.86k
            {
1751
4.86k
            int len = 1;
1752
4.86k
            if (Feptr >= mb->end_subject)
1753
315
              {
1754
315
              SCHECK_PARTIAL();
1755
315
              break;
1756
315
              }
1757
4.55k
            GETCHARLEN(d, Feptr, len);
1758
4.55k
            if (Lc == d || Loc == d) break;
1759
4.29k
            Feptr += len;
1760
4.29k
            }
1761
1762
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1763
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1764
          go too far. */
1765
1766
3.16k
          if (reptype != REPTYPE_POS) for(;;)
1767
7.46k
            {
1768
7.46k
            if (Feptr <= Lstart_eptr) break;
1769
4.29k
            RMATCH(Fecode, RM205);
1770
4.29k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1771
4.29k
            Feptr--;
1772
4.29k
            BACKCHAR(Feptr);
1773
4.29k
            }
1774
3.16k
          }
1775
99.6k
        else
1776
99.6k
#endif  /* SUPPORT_UNICODE */
1777
1778
        /* Not UTF mode */
1779
99.6k
          {
1780
5.17M
          for (i = Lmin; i < Lmax; i++)
1781
5.17M
            {
1782
5.17M
            if (Feptr >= mb->end_subject)
1783
14.6k
              {
1784
14.6k
              SCHECK_PARTIAL();
1785
14.6k
              break;
1786
14.6k
              }
1787
5.15M
            if (Lc == *Feptr || Loc == *Feptr) break;
1788
5.07M
            Feptr++;
1789
5.07M
            }
1790
99.6k
          if (reptype != REPTYPE_POS) for (;;)
1791
5.15M
            {
1792
5.15M
            if (Feptr == Lstart_eptr) break;
1793
5.05M
            RMATCH(Fecode, RM30);
1794
5.05M
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1795
5.05M
            Feptr--;
1796
5.05M
            }
1797
99.6k
          }
1798
102k
        }
1799
118k
      }
1800
1801
    /* Caseful comparisons */
1802
1803
4.03k
    else
1804
4.03k
      {
1805
4.03k
#ifdef SUPPORT_UNICODE
1806
4.03k
      if (utf)
1807
58
        {
1808
58
        uint32_t d;
1809
58
        for (i = 1; i <= Lmin; i++)
1810
0
          {
1811
0
          if (Feptr >= mb->end_subject)
1812
0
            {
1813
0
            SCHECK_PARTIAL();
1814
0
            RRETURN(MATCH_NOMATCH);
1815
0
            }
1816
0
          GETCHARINC(d, Feptr);
1817
0
          if (Lc == d) RRETURN(MATCH_NOMATCH);
1818
0
          }
1819
58
        }
1820
3.97k
      else
1821
3.97k
#endif
1822
      /* Not UTF mode */
1823
3.97k
        {
1824
7.41k
        for (i = 1; i <= Lmin; i++)
1825
3.52k
          {
1826
3.52k
          if (Feptr >= mb->end_subject)
1827
0
            {
1828
0
            SCHECK_PARTIAL();
1829
0
            RRETURN(MATCH_NOMATCH);
1830
0
            }
1831
3.52k
          if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1832
3.52k
          }
1833
3.97k
        }
1834
1835
3.94k
      if (Lmin == Lmax) continue;
1836
1837
3.94k
      if (reptype == REPTYPE_MIN)
1838
765
        {
1839
765
#ifdef SUPPORT_UNICODE
1840
765
        if (utf)
1841
58
          {
1842
58
          uint32_t d;
1843
58
          for (;;)
1844
112
            {
1845
112
            RMATCH(Fecode, RM206);
1846
112
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1847
112
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1848
58
            if (Feptr >= mb->end_subject)
1849
0
              {
1850
0
              SCHECK_PARTIAL();
1851
0
              RRETURN(MATCH_NOMATCH);
1852
0
              }
1853
58
            GETCHARINC(d, Feptr);
1854
58
            if (Lc == d) RRETURN(MATCH_NOMATCH);
1855
58
            }
1856
58
          }
1857
707
        else
1858
707
#endif
1859
        /* Not UTF mode */
1860
707
          {
1861
707
          for (;;)
1862
54.4k
            {
1863
54.4k
            RMATCH(Fecode, RM31);
1864
54.4k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1865
54.4k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1866
54.4k
            if (Feptr >= mb->end_subject)
1867
346
              {
1868
346
              SCHECK_PARTIAL();
1869
346
              RRETURN(MATCH_NOMATCH);
1870
346
              }
1871
54.0k
            if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1872
54.0k
            }
1873
707
          }
1874
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1875
0
        }
1876
1877
      /* Maximize case */
1878
1879
3.17k
      else
1880
3.17k
        {
1881
3.17k
        Lstart_eptr = Feptr;
1882
1883
3.17k
#ifdef SUPPORT_UNICODE
1884
3.17k
        if (utf)
1885
0
          {
1886
0
          uint32_t d;
1887
0
          for (i = Lmin; i < Lmax; i++)
1888
0
            {
1889
0
            int len = 1;
1890
0
            if (Feptr >= mb->end_subject)
1891
0
              {
1892
0
              SCHECK_PARTIAL();
1893
0
              break;
1894
0
              }
1895
0
            GETCHARLEN(d, Feptr, len);
1896
0
            if (Lc == d) break;
1897
0
            Feptr += len;
1898
0
            }
1899
1900
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1901
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1902
          go too far. */
1903
1904
0
          if (reptype != REPTYPE_POS) for(;;)
1905
0
            {
1906
0
            if (Feptr <= Lstart_eptr) break;
1907
0
            RMATCH(Fecode, RM207);
1908
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1909
0
            Feptr--;
1910
0
            BACKCHAR(Feptr);
1911
0
            }
1912
0
          }
1913
3.17k
        else
1914
3.17k
#endif
1915
        /* Not UTF mode */
1916
3.17k
          {
1917
72.0k
          for (i = Lmin; i < Lmax; i++)
1918
71.7k
            {
1919
71.7k
            if (Feptr >= mb->end_subject)
1920
452
              {
1921
452
              SCHECK_PARTIAL();
1922
452
              break;
1923
452
              }
1924
71.3k
            if (Lc == *Feptr) break;
1925
68.8k
            Feptr++;
1926
68.8k
            }
1927
3.17k
          if (reptype != REPTYPE_POS) for (;;)
1928
69.3k
            {
1929
69.3k
            if (Feptr == Lstart_eptr) break;
1930
66.4k
            RMATCH(Fecode, RM32);
1931
66.4k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1932
66.4k
            Feptr--;
1933
66.4k
            }
1934
3.17k
          }
1935
3.17k
        }
1936
3.94k
      }
1937
105k
    break;
1938
1939
105k
#undef Lstart_eptr
1940
105k
#undef Lmin
1941
105k
#undef Lmax
1942
105k
#undef Lc
1943
105k
#undef Loc
1944
1945
1946
    /* ===================================================================== */
1947
    /* Match a bit-mapped character class, possibly repeatedly. These opcodes
1948
    are used when all the characters in the class have values in the range
1949
    0-255, and either the matching is caseful, or the characters are in the
1950
    range 0-127 when UTF processing is enabled. The only difference between
1951
    OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1952
    encountered. */
1953
1954
1.12M
#define Lmin               F->temp_32[0]
1955
1.15M
#define Lmax               F->temp_32[1]
1956
526k
#define Lstart_eptr        F->temp_sptr[0]
1957
959k
#define Lbyte_map_address  F->temp_sptr[1]
1958
700k
#define Lbyte_map          ((const unsigned char *)Lbyte_map_address)
1959
1960
105k
    case OP_NCLASS:
1961
259k
    case OP_CLASS:
1962
259k
      {
1963
259k
      Lbyte_map_address = Fecode + 1;           /* Save for matching */
1964
259k
      Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
1965
1966
      /* Look past the end of the item to see if there is repeat information
1967
      following. Then obey similar code to character type repeats. */
1968
1969
259k
      switch (*Fecode)
1970
259k
        {
1971
22.1k
        case OP_CRSTAR:
1972
36.1k
        case OP_CRMINSTAR:
1973
49.7k
        case OP_CRPLUS:
1974
50.3k
        case OP_CRMINPLUS:
1975
64.0k
        case OP_CRQUERY:
1976
93.9k
        case OP_CRMINQUERY:
1977
192k
        case OP_CRPOSSTAR:
1978
201k
        case OP_CRPOSPLUS:
1979
207k
        case OP_CRPOSQUERY:
1980
207k
        fc = *Fecode++ - OP_CRSTAR;
1981
207k
        Lmin = rep_min[fc];
1982
207k
        Lmax = rep_max[fc];
1983
207k
        reptype = rep_typ[fc];
1984
207k
        break;
1985
1986
0
        case OP_CRRANGE:
1987
0
        case OP_CRMINRANGE:
1988
0
        case OP_CRPOSRANGE:
1989
0
        Lmin = GET2(Fecode, 1);
1990
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
1991
0
        if (Lmax == 0) Lmax = UINT32_MAX;       /* Max 0 => infinity */
1992
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
1993
0
        Fecode += 1 + 2 * IMM2_SIZE;
1994
0
        break;
1995
1996
51.5k
        default:               /* No repeat follows */
1997
51.5k
        Lmin = Lmax = 1;
1998
51.5k
        break;
1999
259k
        }
2000
2001
      /* First, ensure the minimum number of matches are present. */
2002
2003
259k
#ifdef SUPPORT_UNICODE
2004
259k
      if (utf)
2005
56.0k
        {
2006
87.1k
        for (i = 1; i <= Lmin; i++)
2007
47.0k
          {
2008
47.0k
          if (Feptr >= mb->end_subject)
2009
402
            {
2010
402
            SCHECK_PARTIAL();
2011
402
            RRETURN(MATCH_NOMATCH);
2012
402
            }
2013
46.6k
          GETCHARINC(fc, Feptr);
2014
46.6k
          if (fc > 255)
2015
857
            {
2016
857
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2017
857
            }
2018
45.8k
          else
2019
45.8k
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2020
46.6k
          }
2021
56.0k
        }
2022
203k
      else
2023
203k
#endif
2024
      /* Not UTF mode */
2025
203k
        {
2026
219k
        for (i = 1; i <= Lmin; i++)
2027
27.8k
          {
2028
27.8k
          if (Feptr >= mb->end_subject)
2029
343
            {
2030
343
            SCHECK_PARTIAL();
2031
343
            RRETURN(MATCH_NOMATCH);
2032
343
            }
2033
27.5k
          fc = *Feptr++;
2034
#if PCRE2_CODE_UNIT_WIDTH != 8
2035
          if (fc > 255)
2036
            {
2037
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2038
            }
2039
          else
2040
#endif
2041
27.5k
          if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2042
27.5k
          }
2043
203k
        }
2044
2045
      /* If Lmax == Lmin we are done. Continue with main loop. */
2046
2047
232k
      if (Lmin == Lmax) continue;
2048
2049
      /* If minimizing, keep testing the rest of the expression and advancing
2050
      the pointer while it matches the class. */
2051
2052
196k
      if (reptype == REPTYPE_MIN)
2053
44.4k
        {
2054
44.4k
#ifdef SUPPORT_UNICODE
2055
44.4k
        if (utf)
2056
5.86k
          {
2057
5.86k
          for (;;)
2058
18.6k
            {
2059
18.6k
            RMATCH(Fecode, RM200);
2060
18.6k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2061
18.6k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2062
15.5k
            if (Feptr >= mb->end_subject)
2063
226
              {
2064
226
              SCHECK_PARTIAL();
2065
226
              RRETURN(MATCH_NOMATCH);
2066
226
              }
2067
15.3k
            GETCHARINC(fc, Feptr);
2068
15.3k
            if (fc > 255)
2069
238
              {
2070
238
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2071
238
              }
2072
15.0k
            else
2073
15.0k
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2074
15.3k
            }
2075
5.86k
          }
2076
38.5k
        else
2077
38.5k
#endif
2078
        /* Not UTF mode */
2079
38.5k
          {
2080
38.5k
          for (;;)
2081
152k
            {
2082
152k
            RMATCH(Fecode, RM23);
2083
152k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2084
152k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2085
131k
            if (Feptr >= mb->end_subject)
2086
3.24k
              {
2087
3.24k
              SCHECK_PARTIAL();
2088
3.24k
              RRETURN(MATCH_NOMATCH);
2089
3.24k
              }
2090
128k
            fc = *Feptr++;
2091
#if PCRE2_CODE_UNIT_WIDTH != 8
2092
            if (fc > 255)
2093
              {
2094
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2095
              }
2096
            else
2097
#endif
2098
128k
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2099
128k
            }
2100
38.5k
          }
2101
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
2102
0
        }
2103
2104
      /* If maximizing, find the longest possible run, then work backwards. */
2105
2106
152k
      else
2107
152k
        {
2108
152k
        Lstart_eptr = Feptr;
2109
2110
152k
#ifdef SUPPORT_UNICODE
2111
152k
        if (utf)
2112
6.55k
          {
2113
92.2k
          for (i = Lmin; i < Lmax; i++)
2114
89.7k
            {
2115
89.7k
            int len = 1;
2116
89.7k
            if (Feptr >= mb->end_subject)
2117
1.31k
              {
2118
1.31k
              SCHECK_PARTIAL();
2119
1.31k
              break;
2120
1.31k
              }
2121
88.4k
            GETCHARLEN(fc, Feptr, len);
2122
88.4k
            if (fc > 255)
2123
1.52k
              {
2124
1.52k
              if (Fop == OP_CLASS) break;
2125
1.52k
              }
2126
86.9k
            else
2127
86.9k
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2128
85.7k
            Feptr += len;
2129
85.7k
            }
2130
2131
6.55k
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2132
2133
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2134
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2135
          go too far. */
2136
2137
5.74k
          for (;;)
2138
90.7k
            {
2139
90.7k
            RMATCH(Fecode, RM201);
2140
90.7k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2141
90.7k
            if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2142
84.9k
            BACKCHAR(Feptr);
2143
84.9k
            }
2144
5.74k
          }
2145
145k
        else
2146
145k
#endif
2147
          /* Not UTF mode */
2148
145k
          {
2149
404k
          for (i = Lmin; i < Lmax; i++)
2150
400k
            {
2151
400k
            if (Feptr >= mb->end_subject)
2152
3.80k
              {
2153
3.80k
              SCHECK_PARTIAL();
2154
3.80k
              break;
2155
3.80k
              }
2156
396k
            fc = *Feptr;
2157
#if PCRE2_CODE_UNIT_WIDTH != 8
2158
            if (fc > 255)
2159
              {
2160
              if (Fop == OP_CLASS) break;
2161
              }
2162
            else
2163
#endif
2164
396k
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2165
259k
            Feptr++;
2166
259k
            }
2167
2168
145k
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2169
2170
283k
          while (Feptr >= Lstart_eptr)
2171
243k
            {
2172
243k
            RMATCH(Fecode, RM24);
2173
243k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2174
243k
            Feptr--;
2175
243k
            }
2176
40.2k
          }
2177
2178
46.0k
        RRETURN(MATCH_NOMATCH);
2179
46.0k
        }
2180
196k
      }
2181
2182
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
2183
2184
0
#undef Lbyte_map_address
2185
0
#undef Lbyte_map
2186
0
#undef Lstart_eptr
2187
0
#undef Lmin
2188
0
#undef Lmax
2189
2190
2191
    /* ===================================================================== */
2192
    /* Match an extended character class. In the 8-bit library, this opcode is
2193
    encountered only when UTF-8 mode mode is supported. In the 16-bit and
2194
    32-bit libraries, codepoints greater than 255 may be encountered even when
2195
    UTF is not supported. */
2196
2197
303k
#define Lstart_eptr  F->temp_sptr[0]
2198
572k
#define Lxclass_data F->temp_sptr[1]
2199
463k
#define Lmin         F->temp_32[0]
2200
649k
#define Lmax         F->temp_32[1]
2201
2202
0
#ifdef SUPPORT_WIDE_CHARS
2203
103k
    case OP_XCLASS:
2204
103k
      {
2205
103k
      Lxclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2206
103k
      Fecode += GET(Fecode, 1);               /* Advance past the item */
2207
2208
103k
      switch (*Fecode)
2209
103k
        {
2210
5.48k
        case OP_CRSTAR:
2211
24.2k
        case OP_CRMINSTAR:
2212
33.9k
        case OP_CRPLUS:
2213
34.8k
        case OP_CRMINPLUS:
2214
36.5k
        case OP_CRQUERY:
2215
55.6k
        case OP_CRMINQUERY:
2216
80.3k
        case OP_CRPOSSTAR:
2217
82.1k
        case OP_CRPOSPLUS:
2218
83.7k
        case OP_CRPOSQUERY:
2219
83.7k
        fc = *Fecode++ - OP_CRSTAR;
2220
83.7k
        Lmin = rep_min[fc];
2221
83.7k
        Lmax = rep_max[fc];
2222
83.7k
        reptype = rep_typ[fc];
2223
83.7k
        break;
2224
2225
0
        case OP_CRRANGE:
2226
0
        case OP_CRMINRANGE:
2227
0
        case OP_CRPOSRANGE:
2228
0
        Lmin = GET2(Fecode, 1);
2229
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2230
0
        if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2231
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
2232
0
        Fecode += 1 + 2 * IMM2_SIZE;
2233
0
        break;
2234
2235
19.5k
        default:               /* No repeat follows */
2236
19.5k
        Lmin = Lmax = 1;
2237
19.5k
        break;
2238
103k
        }
2239
2240
      /* First, ensure the minimum number of matches are present. */
2241
2242
120k
      for (i = 1; i <= Lmin; i++)
2243
31.8k
        {
2244
31.8k
        if (Feptr >= mb->end_subject)
2245
283
          {
2246
283
          SCHECK_PARTIAL();
2247
283
          RRETURN(MATCH_NOMATCH);
2248
283
          }
2249
31.5k
        GETCHARINCTEST(fc, Feptr);
2250
31.5k
        if (!PRIV(xclass)(fc, Lxclass_data,
2251
31.5k
            (const uint8_t*)mb->start_code, utf))
2252
13.9k
          RRETURN(MATCH_NOMATCH);
2253
31.5k
        }
2254
2255
      /* If Lmax == Lmin we can just continue with the main loop. */
2256
2257
89.0k
      if (Lmin == Lmax) continue;
2258
2259
      /* If minimizing, keep testing the rest of the expression and advancing
2260
      the pointer while it matches the class. */
2261
2262
78.6k
      if (reptype == REPTYPE_MIN)
2263
38.6k
        {
2264
38.6k
        for (;;)
2265
110k
          {
2266
110k
          RMATCH(Fecode, RM100);
2267
110k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2268
110k
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2269
95.1k
          if (Feptr >= mb->end_subject)
2270
699
            {
2271
699
            SCHECK_PARTIAL();
2272
699
            RRETURN(MATCH_NOMATCH);
2273
699
            }
2274
94.5k
          GETCHARINCTEST(fc, Feptr);
2275
94.5k
          if (!PRIV(xclass)(fc, Lxclass_data,
2276
94.5k
              (const uint8_t*)mb->start_code, utf))
2277
22.7k
            RRETURN(MATCH_NOMATCH);
2278
94.5k
          }
2279
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
2280
0
        }
2281
2282
      /* If maximizing, find the longest possible run, then work backwards. */
2283
2284
40.0k
      else
2285
40.0k
        {
2286
40.0k
        Lstart_eptr = Feptr;
2287
346k
        for (i = Lmin; i < Lmax; i++)
2288
345k
          {
2289
345k
          int len = 1;
2290
345k
          if (Feptr >= mb->end_subject)
2291
2.30k
            {
2292
2.30k
            SCHECK_PARTIAL();
2293
2.30k
            break;
2294
2.30k
            }
2295
343k
#ifdef SUPPORT_UNICODE
2296
343k
          GETCHARLENTEST(fc, Feptr, len);
2297
#else
2298
          fc = *Feptr;
2299
#endif
2300
343k
          if (!PRIV(xclass)(fc, Lxclass_data,
2301
343k
              (const uint8_t*)mb->start_code, utf)) break;
2302
306k
          Feptr += len;
2303
306k
          }
2304
2305
40.0k
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2306
2307
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2308
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2309
        go too far. */
2310
2311
13.3k
        for(;;)
2312
263k
          {
2313
263k
          RMATCH(Fecode, RM101);
2314
263k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2315
263k
          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2316
249k
#ifdef SUPPORT_UNICODE
2317
249k
          if (utf) BACKCHAR(Feptr);
2318
249k
#endif
2319
249k
          }
2320
13.3k
        RRETURN(MATCH_NOMATCH);
2321
13.3k
        }
2322
2323
0
      PCRE2_UNREACHABLE(); /* Control never reaches here */
2324
0
      }
2325
0
#endif  /* SUPPORT_WIDE_CHARS: end of XCLASS */
2326
2327
0
#undef Lstart_eptr
2328
0
#undef Lxclass_data
2329
0
#undef Lmin
2330
0
#undef Lmax
2331
2332
2333
    /* ===================================================================== */
2334
    /* Match a complex, set-based character class. This opcodes are used when
2335
    there is complex nesting or logical operations within the character
2336
    class. */
2337
2338
0
#define Lstart_eptr  F->temp_sptr[0]
2339
0
#define Leclass_data F->temp_sptr[1]
2340
0
#define Leclass_len  F->temp_size
2341
0
#define Lmin         F->temp_32[0]
2342
0
#define Lmax         F->temp_32[1]
2343
2344
0
#ifdef SUPPORT_WIDE_CHARS
2345
0
    case OP_ECLASS:
2346
0
      {
2347
0
      Leclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2348
0
      Fecode += GET(Fecode, 1);               /* Advance past the item */
2349
0
      Leclass_len = (PCRE2_SIZE)(Fecode - Leclass_data);
2350
2351
0
      switch (*Fecode)
2352
0
        {
2353
0
        case OP_CRSTAR:
2354
0
        case OP_CRMINSTAR:
2355
0
        case OP_CRPLUS:
2356
0
        case OP_CRMINPLUS:
2357
0
        case OP_CRQUERY:
2358
0
        case OP_CRMINQUERY:
2359
0
        case OP_CRPOSSTAR:
2360
0
        case OP_CRPOSPLUS:
2361
0
        case OP_CRPOSQUERY:
2362
0
        fc = *Fecode++ - OP_CRSTAR;
2363
0
        Lmin = rep_min[fc];
2364
0
        Lmax = rep_max[fc];
2365
0
        reptype = rep_typ[fc];
2366
0
        break;
2367
2368
0
        case OP_CRRANGE:
2369
0
        case OP_CRMINRANGE:
2370
0
        case OP_CRPOSRANGE:
2371
0
        Lmin = GET2(Fecode, 1);
2372
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2373
0
        if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2374
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
2375
0
        Fecode += 1 + 2 * IMM2_SIZE;
2376
0
        break;
2377
2378
0
        default:               /* No repeat follows */
2379
0
        Lmin = Lmax = 1;
2380
0
        break;
2381
0
        }
2382
2383
      /* First, ensure the minimum number of matches are present. */
2384
2385
0
      for (i = 1; i <= Lmin; i++)
2386
0
        {
2387
0
        if (Feptr >= mb->end_subject)
2388
0
          {
2389
0
          SCHECK_PARTIAL();
2390
0
          RRETURN(MATCH_NOMATCH);
2391
0
          }
2392
0
        GETCHARINCTEST(fc, Feptr);
2393
0
        if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len,
2394
0
                          (const uint8_t*)mb->start_code, utf))
2395
0
          RRETURN(MATCH_NOMATCH);
2396
0
        }
2397
2398
      /* If Lmax == Lmin we can just continue with the main loop. */
2399
2400
0
      if (Lmin == Lmax) continue;
2401
2402
      /* If minimizing, keep testing the rest of the expression and advancing
2403
      the pointer while it matches the class. */
2404
2405
0
      if (reptype == REPTYPE_MIN)
2406
0
        {
2407
0
        for (;;)
2408
0
          {
2409
0
          RMATCH(Fecode, RM102);
2410
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2411
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2412
0
          if (Feptr >= mb->end_subject)
2413
0
            {
2414
0
            SCHECK_PARTIAL();
2415
0
            RRETURN(MATCH_NOMATCH);
2416
0
            }
2417
0
          GETCHARINCTEST(fc, Feptr);
2418
0
          if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len,
2419
0
                            (const uint8_t*)mb->start_code, utf))
2420
0
            RRETURN(MATCH_NOMATCH);
2421
0
          }
2422
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
2423
0
        }
2424
2425
      /* If maximizing, find the longest possible run, then work backwards. */
2426
2427
0
      else
2428
0
        {
2429
0
        Lstart_eptr = Feptr;
2430
0
        for (i = Lmin; i < Lmax; i++)
2431
0
          {
2432
0
          int len = 1;
2433
0
          if (Feptr >= mb->end_subject)
2434
0
            {
2435
0
            SCHECK_PARTIAL();
2436
0
            break;
2437
0
            }
2438
0
#ifdef SUPPORT_UNICODE
2439
0
          GETCHARLENTEST(fc, Feptr, len);
2440
#else
2441
          fc = *Feptr;
2442
#endif
2443
0
          if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len,
2444
0
                            (const uint8_t*)mb->start_code, utf))
2445
0
            break;
2446
0
          Feptr += len;
2447
0
          }
2448
2449
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2450
2451
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2452
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2453
        go too far. */
2454
2455
0
        for(;;)
2456
0
          {
2457
0
          RMATCH(Fecode, RM103);
2458
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2459
0
          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2460
0
#ifdef SUPPORT_UNICODE
2461
0
          if (utf) BACKCHAR(Feptr);
2462
0
#endif
2463
0
          }
2464
0
        RRETURN(MATCH_NOMATCH);
2465
0
        }
2466
2467
0
      PCRE2_UNREACHABLE(); /* Control never reaches here */
2468
0
      }
2469
0
#endif  /* SUPPORT_WIDE_CHARS: end of ECLASS */
2470
2471
0
#undef Lstart_eptr
2472
0
#undef Leclass_data
2473
0
#undef Leclass_len
2474
0
#undef Lmin
2475
0
#undef Lmax
2476
2477
2478
    /* ===================================================================== */
2479
    /* Match various character types when PCRE2_UCP is not set. These opcodes
2480
    are not generated when PCRE2_UCP is set - instead appropriate property
2481
    tests are compiled. */
2482
2483
45.3k
    case OP_NOT_DIGIT:
2484
45.3k
    if (Feptr >= mb->end_subject)
2485
583
      {
2486
583
      SCHECK_PARTIAL();
2487
583
      RRETURN(MATCH_NOMATCH);
2488
583
      }
2489
44.7k
    GETCHARINCTEST(fc, Feptr);
2490
44.7k
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
2491
8.30k
      RRETURN(MATCH_NOMATCH);
2492
36.4k
    Fecode++;
2493
36.4k
    break;
2494
2495
9.83k
    case OP_DIGIT:
2496
9.83k
    if (Feptr >= mb->end_subject)
2497
360
      {
2498
360
      SCHECK_PARTIAL();
2499
360
      RRETURN(MATCH_NOMATCH);
2500
360
      }
2501
9.47k
    GETCHARINCTEST(fc, Feptr);
2502
9.47k
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
2503
8.80k
      RRETURN(MATCH_NOMATCH);
2504
668
    Fecode++;
2505
668
    break;
2506
2507
667k
    case OP_NOT_WHITESPACE:
2508
667k
    if (Feptr >= mb->end_subject)
2509
4.39k
      {
2510
4.39k
      SCHECK_PARTIAL();
2511
4.39k
      RRETURN(MATCH_NOMATCH);
2512
4.39k
      }
2513
663k
    GETCHARINCTEST(fc, Feptr);
2514
663k
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
2515
22.8k
      RRETURN(MATCH_NOMATCH);
2516
640k
    Fecode++;
2517
640k
    break;
2518
2519
11.7k
    case OP_WHITESPACE:
2520
11.7k
    if (Feptr >= mb->end_subject)
2521
18
      {
2522
18
      SCHECK_PARTIAL();
2523
18
      RRETURN(MATCH_NOMATCH);
2524
18
      }
2525
11.7k
    GETCHARINCTEST(fc, Feptr);
2526
11.7k
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
2527
10.2k
      RRETURN(MATCH_NOMATCH);
2528
1.47k
    Fecode++;
2529
1.47k
    break;
2530
2531
366k
    case OP_NOT_WORDCHAR:
2532
366k
    if (Feptr >= mb->end_subject)
2533
823
      {
2534
823
      SCHECK_PARTIAL();
2535
823
      RRETURN(MATCH_NOMATCH);
2536
823
      }
2537
365k
    GETCHARINCTEST(fc, Feptr);
2538
365k
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
2539
194k
      RRETURN(MATCH_NOMATCH);
2540
170k
    Fecode++;
2541
170k
    break;
2542
2543
5.45M
    case OP_WORDCHAR:
2544
5.45M
    if (Feptr >= mb->end_subject)
2545
1.21k
      {
2546
1.21k
      SCHECK_PARTIAL();
2547
1.21k
      RRETURN(MATCH_NOMATCH);
2548
1.21k
      }
2549
5.45M
    GETCHARINCTEST(fc, Feptr);
2550
5.45M
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
2551
3.13M
      RRETURN(MATCH_NOMATCH);
2552
2.31M
    Fecode++;
2553
2.31M
    break;
2554
2555
204M
    case OP_ANYNL:
2556
204M
    if (Feptr >= mb->end_subject)
2557
2.54M
      {
2558
2.54M
      SCHECK_PARTIAL();
2559
2.54M
      RRETURN(MATCH_NOMATCH);
2560
2.54M
      }
2561
201M
    GETCHARINCTEST(fc, Feptr);
2562
201M
    switch(fc)
2563
201M
      {
2564
189M
      default: RRETURN(MATCH_NOMATCH);
2565
2566
31.1k
      case CHAR_CR:
2567
31.1k
      if (Feptr >= mb->end_subject)
2568
765
        {
2569
765
        SCHECK_PARTIAL();
2570
765
        }
2571
30.3k
      else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
2572
31.1k
      break;
2573
2574
7.35M
      case CHAR_LF:
2575
7.35M
      break;
2576
2577
3.04M
      case CHAR_VT:
2578
4.76M
      case CHAR_FF:
2579
4.79M
      case CHAR_NEL:
2580
4.79M
#ifndef EBCDIC
2581
4.79M
      case 0x2028:
2582
4.79M
      case 0x2029:
2583
4.79M
#endif  /* Not EBCDIC */
2584
4.79M
      if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2585
4.79M
      break;
2586
201M
      }
2587
12.1M
    Fecode++;
2588
12.1M
    break;
2589
2590
3.54M
    case OP_NOT_HSPACE:
2591
3.54M
    if (Feptr >= mb->end_subject)
2592
85.4k
      {
2593
85.4k
      SCHECK_PARTIAL();
2594
85.4k
      RRETURN(MATCH_NOMATCH);
2595
85.4k
      }
2596
3.45M
    GETCHARINCTEST(fc, Feptr);
2597
3.45M
    switch(fc)
2598
3.45M
      {
2599
3.07M
      HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2600
3.28M
      default: break;
2601
3.45M
      }
2602
3.28M
    Fecode++;
2603
3.28M
    break;
2604
2605
78.4k
    case OP_HSPACE:
2606
78.4k
    if (Feptr >= mb->end_subject)
2607
2.15k
      {
2608
2.15k
      SCHECK_PARTIAL();
2609
2.15k
      RRETURN(MATCH_NOMATCH);
2610
2.15k
      }
2611
76.2k
    GETCHARINCTEST(fc, Feptr);
2612
76.2k
    switch(fc)
2613
76.2k
      {
2614
1.45k
      HSPACE_CASES: break;  /* Byte and multibyte cases */
2615
74.8k
      default: RRETURN(MATCH_NOMATCH);
2616
76.2k
      }
2617
1.45k
    Fecode++;
2618
1.45k
    break;
2619
2620
1.56M
    case OP_NOT_VSPACE:
2621
1.56M
    if (Feptr >= mb->end_subject)
2622
2.08k
      {
2623
2.08k
      SCHECK_PARTIAL();
2624
2.08k
      RRETURN(MATCH_NOMATCH);
2625
2.08k
      }
2626
1.56M
    GETCHARINCTEST(fc, Feptr);
2627
1.56M
    switch(fc)
2628
1.56M
      {
2629
268k
      VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2630
1.52M
      default: break;
2631
1.56M
      }
2632
1.52M
    Fecode++;
2633
1.52M
    break;
2634
2635
1.76M
    case OP_VSPACE:
2636
1.76M
    if (Feptr >= mb->end_subject)
2637
11.9k
      {
2638
11.9k
      SCHECK_PARTIAL();
2639
11.9k
      RRETURN(MATCH_NOMATCH);
2640
11.9k
      }
2641
1.75M
    GETCHARINCTEST(fc, Feptr);
2642
1.75M
    switch(fc)
2643
1.75M
      {
2644
73.7k
      VSPACE_CASES: break;
2645
1.68M
      default: RRETURN(MATCH_NOMATCH);
2646
1.75M
      }
2647
73.7k
    Fecode++;
2648
73.7k
    break;
2649
2650
2651
0
#ifdef SUPPORT_UNICODE
2652
2653
    /* ===================================================================== */
2654
    /* Check the next character by Unicode property. We will get here only
2655
    if the support is in the binary; otherwise a compile-time error occurs. */
2656
2657
88.2k
    case OP_PROP:
2658
125k
    case OP_NOTPROP:
2659
125k
    if (Feptr >= mb->end_subject)
2660
1.98k
      {
2661
1.98k
      SCHECK_PARTIAL();
2662
1.98k
      RRETURN(MATCH_NOMATCH);
2663
1.98k
      }
2664
123k
    GETCHARINCTEST(fc, Feptr);
2665
123k
      {
2666
123k
      const uint32_t *cp;
2667
123k
      uint32_t chartype;
2668
123k
      const ucd_record *prop = GET_UCD(fc);
2669
123k
      BOOL notmatch = Fop == OP_NOTPROP;
2670
2671
123k
      switch(Fecode[1])
2672
123k
        {
2673
0
        case PT_LAMP:
2674
0
        chartype = prop->chartype;
2675
0
        if ((chartype == ucp_Lu ||
2676
0
             chartype == ucp_Ll ||
2677
0
             chartype == ucp_Lt) == notmatch)
2678
0
          RRETURN(MATCH_NOMATCH);
2679
0
        break;
2680
2681
4.50k
        case PT_GC:
2682
4.50k
        if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch)
2683
752
          RRETURN(MATCH_NOMATCH);
2684
3.75k
        break;
2685
2686
14.7k
        case PT_PC:
2687
14.7k
        if ((Fecode[2] == prop->chartype) == notmatch)
2688
14.7k
          RRETURN(MATCH_NOMATCH);
2689
2
        break;
2690
2691
2
        case PT_SC:
2692
0
        if ((Fecode[2] == prop->script) == notmatch)
2693
0
          RRETURN(MATCH_NOMATCH);
2694
0
        break;
2695
2696
0
        case PT_SCX:
2697
0
          {
2698
0
          BOOL ok = (Fecode[2] == prop->script ||
2699
0
                     MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
2700
0
          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2701
0
          }
2702
0
        break;
2703
2704
        /* These are specials */
2705
2706
0
        case PT_ALNUM:
2707
0
        chartype = prop->chartype;
2708
0
        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2709
0
             PRIV(ucp_gentype)[chartype] == ucp_N) == notmatch)
2710
0
          RRETURN(MATCH_NOMATCH);
2711
0
        break;
2712
2713
        /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2714
        which means that Perl space and POSIX space are now identical. PCRE
2715
        was changed at release 8.34. */
2716
2717
31.7k
        case PT_SPACE:    /* Perl space */
2718
31.7k
        case PT_PXSPACE:  /* POSIX space */
2719
31.7k
        switch(fc)
2720
31.7k
          {
2721
50.3k
          HSPACE_CASES:
2722
50.3k
          VSPACE_CASES:
2723
41.8k
          if (notmatch) RRETURN(MATCH_NOMATCH);
2724
0
          break;
2725
2726
25.3k
          default:
2727
25.3k
          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch)
2728
464
            RRETURN(MATCH_NOMATCH);
2729
24.8k
          break;
2730
31.7k
          }
2731
24.8k
        break;
2732
2733
45.7k
        case PT_WORD:
2734
45.7k
        chartype = prop->chartype;
2735
45.7k
        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2736
35.1k
             PRIV(ucp_gentype)[chartype] == ucp_N ||
2737
34.2k
             chartype == ucp_Mn ||
2738
34.2k
             chartype == ucp_Pc) == notmatch)
2739
33.9k
          RRETURN(MATCH_NOMATCH);
2740
11.7k
        break;
2741
2742
26.8k
        case PT_CLIST:
2743
#if PCRE2_CODE_UNIT_WIDTH == 32
2744
            if (fc > MAX_UTF_CODE_POINT)
2745
              {
2746
              if (notmatch) break;;
2747
              RRETURN(MATCH_NOMATCH);
2748
              }
2749
#endif
2750
26.8k
        cp = PRIV(ucd_caseless_sets) + Fecode[2];
2751
26.8k
        for (;;)
2752
35.8k
          {
2753
35.8k
          if (fc < *cp)
2754
25.9k
            { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } }
2755
9.92k
          if (fc == *cp++)
2756
922
            { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; }
2757
9.92k
          }
2758
1.65k
        break;
2759
2760
1.65k
        case PT_UCNC:
2761
0
        if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2762
0
             fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2763
0
             fc >= 0xe000) == notmatch)
2764
0
          RRETURN(MATCH_NOMATCH);
2765
0
        break;
2766
2767
0
        case PT_BIDICL:
2768
0
        if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
2769
0
          RRETURN(MATCH_NOMATCH);
2770
0
        break;
2771
2772
0
        case PT_BOOL:
2773
0
          {
2774
0
          BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2775
0
            UCD_BPROPS_PROP(prop), Fecode[2]) != 0;
2776
0
          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2777
0
          }
2778
0
        break;
2779
2780
        /* This should never occur */
2781
2782
0
        default:
2783
0
        PCRE2_DEBUG_UNREACHABLE();
2784
0
        return PCRE2_ERROR_INTERNAL;
2785
123k
        }
2786
2787
42.0k
      Fecode += 3;
2788
42.0k
      }
2789
0
    break;
2790
2791
2792
    /* ===================================================================== */
2793
    /* Match an extended Unicode sequence. We will get here only if the support
2794
    is in the binary; otherwise a compile-time error occurs. */
2795
2796
28.0k
    case OP_EXTUNI:
2797
28.0k
    if (Feptr >= mb->end_subject)
2798
578
      {
2799
578
      SCHECK_PARTIAL();
2800
578
      RRETURN(MATCH_NOMATCH);
2801
578
      }
2802
27.4k
    else
2803
27.4k
      {
2804
27.4k
      GETCHARINCTEST(fc, Feptr);
2805
27.4k
      Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
2806
27.4k
        NULL);
2807
27.4k
      }
2808
27.4k
    CHECK_PARTIAL();
2809
27.4k
    Fecode++;
2810
27.4k
    break;
2811
2812
0
#endif  /* SUPPORT_UNICODE */
2813
2814
2815
    /* ===================================================================== */
2816
    /* Match a single character type repeatedly. Note that the property type
2817
    does not need to be in a stack frame as it is not used within an RMATCH()
2818
    loop. */
2819
2820
708M
#define Lstart_eptr  F->temp_sptr[0]
2821
240M
#define Lmin         F->temp_32[0]
2822
241M
#define Lmax         F->temp_32[1]
2823
617M
#define Lctype       F->temp_32[2]
2824
2.45M
#define Lpropvalue   F->temp_32[3]
2825
2826
0
    case OP_TYPEEXACT:
2827
0
    Lmin = Lmax = GET2(Fecode, 1);
2828
0
    Fecode += 1 + IMM2_SIZE;
2829
0
    goto REPEATTYPE;
2830
2831
0
    case OP_TYPEUPTO:
2832
0
    case OP_TYPEMINUPTO:
2833
0
    Lmin = 0;
2834
0
    Lmax = GET2(Fecode, 1);
2835
0
    reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
2836
0
    Fecode += 1 + IMM2_SIZE;
2837
0
    goto REPEATTYPE;
2838
2839
420k
    case OP_TYPEPOSSTAR:
2840
420k
    reptype = REPTYPE_POS;
2841
420k
    Lmin = 0;
2842
420k
    Lmax = UINT32_MAX;
2843
420k
    Fecode++;
2844
420k
    goto REPEATTYPE;
2845
2846
4.54M
    case OP_TYPEPOSPLUS:
2847
4.54M
    reptype = REPTYPE_POS;
2848
4.54M
    Lmin = 1;
2849
4.54M
    Lmax = UINT32_MAX;
2850
4.54M
    Fecode++;
2851
4.54M
    goto REPEATTYPE;
2852
2853
34.3M
    case OP_TYPEPOSQUERY:
2854
34.3M
    reptype = REPTYPE_POS;
2855
34.3M
    Lmin = 0;
2856
34.3M
    Lmax = 1;
2857
34.3M
    Fecode++;
2858
34.3M
    goto REPEATTYPE;
2859
2860
0
    case OP_TYPEPOSUPTO:
2861
0
    reptype = REPTYPE_POS;
2862
0
    Lmin = 0;
2863
0
    Lmax = GET2(Fecode, 1);
2864
0
    Fecode += 1 + IMM2_SIZE;
2865
0
    goto REPEATTYPE;
2866
2867
15.1k
    case OP_TYPESTAR:
2868
16.0k
    case OP_TYPEMINSTAR:
2869
6.50M
    case OP_TYPEPLUS:
2870
6.67M
    case OP_TYPEMINPLUS:
2871
16.8M
    case OP_TYPEQUERY:
2872
17.0M
    case OP_TYPEMINQUERY:
2873
17.0M
    fc = *Fecode++ - OP_TYPESTAR;
2874
17.0M
    Lmin = rep_min[fc];
2875
17.0M
    Lmax = rep_max[fc];
2876
17.0M
    reptype = rep_typ[fc];
2877
2878
    /* Common code for all repeated character type matches. */
2879
2880
56.3M
    REPEATTYPE:
2881
56.3M
    Lctype = *Fecode++;      /* Code for the character type */
2882
2883
56.3M
#ifdef SUPPORT_UNICODE
2884
56.3M
    if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
2885
2.23M
      {
2886
2.23M
      proptype = *Fecode++;
2887
2.23M
      Lpropvalue = *Fecode++;
2888
2.23M
      }
2889
54.0M
    else proptype = -1;
2890
56.3M
#endif
2891
2892
    /* First, ensure the minimum number of matches are present. Use inline
2893
    code for maximizing the speed, and do the type test once at the start
2894
    (i.e. keep it out of the loops). As there are no calls to RMATCH in the
2895
    loops, we can use an ordinary variable for "notmatch". The code for UTF
2896
    mode is separated out for tidiness, except for Unicode property tests. */
2897
2898
56.3M
    if (Lmin > 0)
2899
11.1M
      {
2900
11.1M
#ifdef SUPPORT_UNICODE
2901
11.1M
      if (proptype >= 0)  /* Property tests in all modes */
2902
1.66M
        {
2903
1.66M
        BOOL notmatch = Lctype == OP_NOTPROP;
2904
1.66M
        switch(proptype)
2905
1.66M
          {
2906
0
          case PT_LAMP:
2907
0
          for (i = 1; i <= Lmin; i++)
2908
0
            {
2909
0
            int chartype;
2910
0
            if (Feptr >= mb->end_subject)
2911
0
              {
2912
0
              SCHECK_PARTIAL();
2913
0
              RRETURN(MATCH_NOMATCH);
2914
0
              }
2915
0
            GETCHARINCTEST(fc, Feptr);
2916
0
            chartype = UCD_CHARTYPE(fc);
2917
0
            if ((chartype == ucp_Lu ||
2918
0
                 chartype == ucp_Ll ||
2919
0
                 chartype == ucp_Lt) == notmatch)
2920
0
              RRETURN(MATCH_NOMATCH);
2921
0
            }
2922
0
          break;
2923
2924
488
          case PT_GC:
2925
952
          for (i = 1; i <= Lmin; i++)
2926
488
            {
2927
488
            if (Feptr >= mb->end_subject)
2928
0
              {
2929
0
              SCHECK_PARTIAL();
2930
0
              RRETURN(MATCH_NOMATCH);
2931
0
              }
2932
488
            GETCHARINCTEST(fc, Feptr);
2933
488
            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch)
2934
24
              RRETURN(MATCH_NOMATCH);
2935
488
            }
2936
464
          break;
2937
2938
151k
          case PT_PC:
2939
153k
          for (i = 1; i <= Lmin; i++)
2940
151k
            {
2941
151k
            if (Feptr >= mb->end_subject)
2942
0
              {
2943
0
              SCHECK_PARTIAL();
2944
0
              RRETURN(MATCH_NOMATCH);
2945
0
              }
2946
151k
            GETCHARINCTEST(fc, Feptr);
2947
151k
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch)
2948
148k
              RRETURN(MATCH_NOMATCH);
2949
151k
            }
2950
2.72k
          break;
2951
2952
2.72k
          case PT_SC:
2953
0
          for (i = 1; i <= Lmin; i++)
2954
0
            {
2955
0
            if (Feptr >= mb->end_subject)
2956
0
              {
2957
0
              SCHECK_PARTIAL();
2958
0
              RRETURN(MATCH_NOMATCH);
2959
0
              }
2960
0
            GETCHARINCTEST(fc, Feptr);
2961
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch)
2962
0
              RRETURN(MATCH_NOMATCH);
2963
0
            }
2964
0
          break;
2965
2966
0
          case PT_SCX:
2967
0
          for (i = 1; i <= Lmin; i++)
2968
0
            {
2969
0
            BOOL ok;
2970
0
            const ucd_record *prop;
2971
0
            if (Feptr >= mb->end_subject)
2972
0
              {
2973
0
              SCHECK_PARTIAL();
2974
0
              RRETURN(MATCH_NOMATCH);
2975
0
              }
2976
0
            GETCHARINCTEST(fc, Feptr);
2977
0
            prop = GET_UCD(fc);
2978
0
            ok = (prop->script == Lpropvalue ||
2979
0
                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
2980
0
            if (ok == notmatch)
2981
0
              RRETURN(MATCH_NOMATCH);
2982
0
            }
2983
0
          break;
2984
2985
0
          case PT_ALNUM:
2986
0
          for (i = 1; i <= Lmin; i++)
2987
0
            {
2988
0
            int category;
2989
0
            if (Feptr >= mb->end_subject)
2990
0
              {
2991
0
              SCHECK_PARTIAL();
2992
0
              RRETURN(MATCH_NOMATCH);
2993
0
              }
2994
0
            GETCHARINCTEST(fc, Feptr);
2995
0
            category = UCD_CATEGORY(fc);
2996
0
            if ((category == ucp_L || category == ucp_N) == notmatch)
2997
0
              RRETURN(MATCH_NOMATCH);
2998
0
            }
2999
0
          break;
3000
3001
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3002
          which means that Perl space and POSIX space are now identical. PCRE
3003
          was changed at release 8.34. */
3004
3005
1.48M
          case PT_SPACE:    /* Perl space */
3006
1.48M
          case PT_PXSPACE:  /* POSIX space */
3007
2.12M
          for (i = 1; i <= Lmin; i++)
3008
1.48M
            {
3009
1.48M
            if (Feptr >= mb->end_subject)
3010
11.0k
              {
3011
11.0k
              SCHECK_PARTIAL();
3012
11.0k
              RRETURN(MATCH_NOMATCH);
3013
11.0k
              }
3014
1.47M
            GETCHARINCTEST(fc, Feptr);
3015
1.47M
            switch(fc)
3016
1.47M
              {
3017
6.28M
              HSPACE_CASES:
3018
6.28M
              VSPACE_CASES:
3019
2.91M
              if (notmatch) RRETURN(MATCH_NOMATCH);
3020
376k
              break;
3021
3022
1.05M
              default:
3023
1.05M
              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
3024
793k
                RRETURN(MATCH_NOMATCH);
3025
263k
              break;
3026
1.47M
              }
3027
1.47M
            }
3028
639k
          break;
3029
3030
639k
          case PT_WORD:
3031
46.5k
          for (i = 1; i <= Lmin; i++)
3032
30.2k
            {
3033
30.2k
            int chartype, category;
3034
30.2k
            if (Feptr >= mb->end_subject)
3035
671
              {
3036
671
              SCHECK_PARTIAL();
3037
671
              RRETURN(MATCH_NOMATCH);
3038
671
              }
3039
29.5k
            GETCHARINCTEST(fc, Feptr);
3040
29.5k
            chartype = UCD_CHARTYPE(fc);
3041
29.5k
            category = PRIV(ucp_gentype)[chartype];
3042
29.5k
            if ((category == ucp_L || category == ucp_N ||
3043
13.8k
                 chartype == ucp_Mn || chartype == ucp_Pc) == notmatch)
3044
13.1k
              RRETURN(MATCH_NOMATCH);
3045
29.5k
            }
3046
16.3k
          break;
3047
3048
16.3k
          case PT_CLIST:
3049
57
          for (i = 1; i <= Lmin; i++)
3050
29
            {
3051
29
            const uint32_t *cp;
3052
29
            if (Feptr >= mb->end_subject)
3053
0
              {
3054
0
              SCHECK_PARTIAL();
3055
0
              RRETURN(MATCH_NOMATCH);
3056
0
              }
3057
29
            GETCHARINCTEST(fc, Feptr);
3058
#if PCRE2_CODE_UNIT_WIDTH == 32
3059
            if (fc > MAX_UTF_CODE_POINT)
3060
              {
3061
              if (notmatch) continue;
3062
              RRETURN(MATCH_NOMATCH);
3063
              }
3064
#endif
3065
29
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3066
29
            for (;;)
3067
32
              {
3068
32
              if (fc < *cp)
3069
29
                {
3070
29
                if (notmatch) break;
3071
1
                RRETURN(MATCH_NOMATCH);
3072
1
                }
3073
3
              if (fc == *cp++)
3074
0
                {
3075
0
                if (notmatch) RRETURN(MATCH_NOMATCH);
3076
0
                break;
3077
0
                }
3078
3
              }
3079
29
            }
3080
28
          break;
3081
3082
28
          case PT_UCNC:
3083
0
          for (i = 1; i <= Lmin; i++)
3084
0
            {
3085
0
            if (Feptr >= mb->end_subject)
3086
0
              {
3087
0
              SCHECK_PARTIAL();
3088
0
              RRETURN(MATCH_NOMATCH);
3089
0
              }
3090
0
            GETCHARINCTEST(fc, Feptr);
3091
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3092
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3093
0
                 fc >= 0xe000) == notmatch)
3094
0
              RRETURN(MATCH_NOMATCH);
3095
0
            }
3096
0
          break;
3097
3098
0
          case PT_BIDICL:
3099
0
          for (i = 1; i <= Lmin; i++)
3100
0
            {
3101
0
            if (Feptr >= mb->end_subject)
3102
0
              {
3103
0
              SCHECK_PARTIAL();
3104
0
              RRETURN(MATCH_NOMATCH);
3105
0
              }
3106
0
            GETCHARINCTEST(fc, Feptr);
3107
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch)
3108
0
              RRETURN(MATCH_NOMATCH);
3109
0
            }
3110
0
          break;
3111
3112
0
          case PT_BOOL:
3113
0
          for (i = 1; i <= Lmin; i++)
3114
0
            {
3115
0
            BOOL ok;
3116
0
            const ucd_record *prop;
3117
0
            if (Feptr >= mb->end_subject)
3118
0
              {
3119
0
              SCHECK_PARTIAL();
3120
0
              RRETURN(MATCH_NOMATCH);
3121
0
              }
3122
0
            GETCHARINCTEST(fc, Feptr);
3123
0
            prop = GET_UCD(fc);
3124
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
3125
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
3126
0
            if (ok == notmatch)
3127
0
              RRETURN(MATCH_NOMATCH);
3128
0
            }
3129
0
          break;
3130
3131
          /* This should not occur */
3132
3133
0
          default:
3134
0
          PCRE2_DEBUG_UNREACHABLE();
3135
0
          return PCRE2_ERROR_INTERNAL;
3136
1.66M
          }
3137
1.66M
        }
3138
3139
      /* Match extended Unicode sequences. We will get here only if the
3140
      support is in the binary; otherwise a compile-time error occurs. */
3141
3142
9.53M
      else if (Lctype == OP_EXTUNI)
3143
91.7k
        {
3144
183k
        for (i = 1; i <= Lmin; i++)
3145
91.7k
          {
3146
91.7k
          if (Feptr >= mb->end_subject)
3147
294
            {
3148
294
            SCHECK_PARTIAL();
3149
294
            RRETURN(MATCH_NOMATCH);
3150
294
            }
3151
91.4k
          else
3152
91.4k
            {
3153
91.4k
            GETCHARINCTEST(fc, Feptr);
3154
91.4k
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
3155
91.4k
              mb->end_subject, utf, NULL);
3156
91.4k
            }
3157
91.4k
          CHECK_PARTIAL();
3158
91.4k
          }
3159
91.7k
        }
3160
9.43M
      else
3161
9.43M
#endif     /* SUPPORT_UNICODE */
3162
3163
/* Handle all other cases in UTF mode */
3164
3165
9.43M
#ifdef SUPPORT_UNICODE
3166
9.43M
      if (utf) switch(Lctype)
3167
4.71M
        {
3168
939
        case OP_ANY:
3169
1.87k
        for (i = 1; i <= Lmin; i++)
3170
939
          {
3171
939
          if (Feptr >= mb->end_subject)
3172
0
            {
3173
0
            SCHECK_PARTIAL();
3174
0
            RRETURN(MATCH_NOMATCH);
3175
0
            }
3176
939
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3177
934
          if (mb->partial != 0 &&
3178
0
              Feptr + 1 >= mb->end_subject &&
3179
0
              NLBLOCK->nltype == NLTYPE_FIXED &&
3180
0
              NLBLOCK->nllen == 2 &&
3181
0
              UCHAR21(Feptr) == NLBLOCK->nl[0])
3182
0
            {
3183
0
            mb->hitend = TRUE;
3184
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3185
0
            }
3186
934
          Feptr++;
3187
934
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3188
934
          }
3189
934
        break;
3190
3191
3.67M
        case OP_ALLANY:
3192
7.32M
        for (i = 1; i <= Lmin; i++)
3193
3.67M
          {
3194
3.67M
          if (Feptr >= mb->end_subject)
3195
33.7k
            {
3196
33.7k
            SCHECK_PARTIAL();
3197
33.7k
            RRETURN(MATCH_NOMATCH);
3198
33.7k
            }
3199
3.64M
          Feptr++;
3200
3.64M
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3201
3.64M
          }
3202
3.64M
        break;
3203
3204
3.64M
        case OP_ANYBYTE:
3205
13.2k
        if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
3206
13.2k
        Feptr += Lmin;
3207
13.2k
        break;
3208
3209
1.00M
        case OP_ANYNL:
3210
1.04M
        for (i = 1; i <= Lmin; i++)
3211
1.00M
          {
3212
1.00M
          if (Feptr >= mb->end_subject)
3213
29.8k
            {
3214
29.8k
            SCHECK_PARTIAL();
3215
29.8k
            RRETURN(MATCH_NOMATCH);
3216
29.8k
            }
3217
972k
          GETCHARINC(fc, Feptr);
3218
972k
          switch(fc)
3219
972k
            {
3220
929k
            default: RRETURN(MATCH_NOMATCH);
3221
3222
0
            case CHAR_CR:
3223
0
            if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3224
0
            break;
3225
3226
20.6k
            case CHAR_LF:
3227
20.6k
            break;
3228
3229
22.4k
            case CHAR_VT:
3230
22.4k
            case CHAR_FF:
3231
22.4k
            case CHAR_NEL:
3232
22.4k
#ifndef EBCDIC
3233
22.4k
            case 0x2028:
3234
22.4k
            case 0x2029:
3235
22.4k
#endif  /* Not EBCDIC */
3236
22.4k
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3237
22.4k
            break;
3238
972k
            }
3239
972k
          }
3240
43.1k
        break;
3241
3242
43.1k
        case OP_NOT_HSPACE:
3243
16.1k
        for (i = 1; i <= Lmin; i++)
3244
8.71k
          {
3245
8.71k
          if (Feptr >= mb->end_subject)
3246
2
            {
3247
2
            SCHECK_PARTIAL();
3248
2
            RRETURN(MATCH_NOMATCH);
3249
2
            }
3250
8.71k
          GETCHARINC(fc, Feptr);
3251
8.71k
          switch(fc)
3252
8.71k
            {
3253
23.2k
            HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3254
7.42k
            default: break;
3255
8.71k
            }
3256
8.71k
          }
3257
7.42k
        break;
3258
3259
7.42k
        case OP_HSPACE:
3260
687
        for (i = 1; i <= Lmin; i++)
3261
603
          {
3262
603
          if (Feptr >= mb->end_subject)
3263
12
            {
3264
12
            SCHECK_PARTIAL();
3265
12
            RRETURN(MATCH_NOMATCH);
3266
12
            }
3267
591
          GETCHARINC(fc, Feptr);
3268
591
          switch(fc)
3269
591
            {
3270
84
            HSPACE_CASES: break;
3271
507
            default: RRETURN(MATCH_NOMATCH);
3272
591
            }
3273
591
          }
3274
84
        break;
3275
3276
5.14k
        case OP_NOT_VSPACE:
3277
9.95k
        for (i = 1; i <= Lmin; i++)
3278
5.14k
          {
3279
5.14k
          if (Feptr >= mb->end_subject)
3280
0
            {
3281
0
            SCHECK_PARTIAL();
3282
0
            RRETURN(MATCH_NOMATCH);
3283
0
            }
3284
5.14k
          GETCHARINC(fc, Feptr);
3285
5.14k
          switch(fc)
3286
5.14k
            {
3287
2.03k
            VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3288
4.81k
            default: break;
3289
5.14k
            }
3290
5.14k
          }
3291
4.81k
        break;
3292
3293
4.81k
        case OP_VSPACE:
3294
1.80k
        for (i = 1; i <= Lmin; i++)
3295
1.68k
          {
3296
1.68k
          if (Feptr >= mb->end_subject)
3297
0
            {
3298
0
            SCHECK_PARTIAL();
3299
0
            RRETURN(MATCH_NOMATCH);
3300
0
            }
3301
1.68k
          GETCHARINC(fc, Feptr);
3302
1.68k
          switch(fc)
3303
1.68k
            {
3304
120
            VSPACE_CASES: break;
3305
1.56k
            default: RRETURN(MATCH_NOMATCH);
3306
1.68k
            }
3307
1.68k
          }
3308
120
        break;
3309
3310
120
        case OP_NOT_DIGIT:
3311
0
        for (i = 1; i <= Lmin; i++)
3312
0
          {
3313
0
          if (Feptr >= mb->end_subject)
3314
0
            {
3315
0
            SCHECK_PARTIAL();
3316
0
            RRETURN(MATCH_NOMATCH);
3317
0
            }
3318
0
          GETCHARINC(fc, Feptr);
3319
0
          if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
3320
0
            RRETURN(MATCH_NOMATCH);
3321
0
          }
3322
0
        break;
3323
3324
0
        case OP_DIGIT:
3325
0
        for (i = 1; i <= Lmin; i++)
3326
0
          {
3327
0
          uint32_t cc;
3328
0
          if (Feptr >= mb->end_subject)
3329
0
            {
3330
0
            SCHECK_PARTIAL();
3331
0
            RRETURN(MATCH_NOMATCH);
3332
0
            }
3333
0
          cc = UCHAR21(Feptr);
3334
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
3335
0
            RRETURN(MATCH_NOMATCH);
3336
0
          Feptr++;
3337
          /* No need to skip more code units - we know it has only one. */
3338
0
          }
3339
0
        break;
3340
3341
0
        case OP_NOT_WHITESPACE:
3342
0
        for (i = 1; i <= Lmin; i++)
3343
0
          {
3344
0
          uint32_t cc;
3345
0
          if (Feptr >= mb->end_subject)
3346
0
            {
3347
0
            SCHECK_PARTIAL();
3348
0
            RRETURN(MATCH_NOMATCH);
3349
0
            }
3350
0
          cc = UCHAR21(Feptr);
3351
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
3352
0
            RRETURN(MATCH_NOMATCH);
3353
0
          Feptr++;
3354
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3355
0
          }
3356
0
        break;
3357
3358
0
        case OP_WHITESPACE:
3359
0
        for (i = 1; i <= Lmin; i++)
3360
0
          {
3361
0
          uint32_t cc;
3362
0
          if (Feptr >= mb->end_subject)
3363
0
            {
3364
0
            SCHECK_PARTIAL();
3365
0
            RRETURN(MATCH_NOMATCH);
3366
0
            }
3367
0
          cc = UCHAR21(Feptr);
3368
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
3369
0
            RRETURN(MATCH_NOMATCH);
3370
0
          Feptr++;
3371
          /* No need to skip more code units - we know it has only one. */
3372
0
          }
3373
0
        break;
3374
3375
0
        case OP_NOT_WORDCHAR:
3376
0
        for (i = 1; i <= Lmin; i++)
3377
0
          {
3378
0
          uint32_t cc;
3379
0
          if (Feptr >= mb->end_subject)
3380
0
            {
3381
0
            SCHECK_PARTIAL();
3382
0
            RRETURN(MATCH_NOMATCH);
3383
0
            }
3384
0
          cc = UCHAR21(Feptr);
3385
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
3386
0
            RRETURN(MATCH_NOMATCH);
3387
0
          Feptr++;
3388
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3389
0
          }
3390
0
        break;
3391
3392
0
        case OP_WORDCHAR:
3393
0
        for (i = 1; i <= Lmin; i++)
3394
0
          {
3395
0
          uint32_t cc;
3396
0
          if (Feptr >= mb->end_subject)
3397
0
            {
3398
0
            SCHECK_PARTIAL();
3399
0
            RRETURN(MATCH_NOMATCH);
3400
0
            }
3401
0
          cc = UCHAR21(Feptr);
3402
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
3403
0
            RRETURN(MATCH_NOMATCH);
3404
0
          Feptr++;
3405
          /* No need to skip more code units - we know it has only one. */
3406
0
          }
3407
0
        break;
3408
3409
0
        default:
3410
0
        PCRE2_DEBUG_UNREACHABLE();
3411
0
        return PCRE2_ERROR_INTERNAL;
3412
4.71M
        }  /* End switch(Lctype) */
3413
3414
4.72M
      else
3415
4.72M
#endif     /* SUPPORT_UNICODE */
3416
3417
      /* Code for the non-UTF case for minimum matching of operators other
3418
      than OP_PROP and OP_NOTPROP. */
3419
3420
4.72M
      switch(Lctype)
3421
4.72M
        {
3422
74.3k
        case OP_ANY:
3423
147k
        for (i = 1; i <= Lmin; i++)
3424
74.3k
          {
3425
74.3k
          if (Feptr >= mb->end_subject)
3426
131
            {
3427
131
            SCHECK_PARTIAL();
3428
131
            RRETURN(MATCH_NOMATCH);
3429
131
            }
3430
74.2k
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3431
72.9k
          if (mb->partial != 0 &&
3432
0
              Feptr + 1 >= mb->end_subject &&
3433
0
              NLBLOCK->nltype == NLTYPE_FIXED &&
3434
0
              NLBLOCK->nllen == 2 &&
3435
0
              *Feptr == NLBLOCK->nl[0])
3436
0
            {
3437
0
            mb->hitend = TRUE;
3438
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3439
0
            }
3440
72.9k
          Feptr++;
3441
72.9k
          }
3442
72.9k
        break;
3443
3444
72.9k
        case OP_ALLANY:
3445
12.1k
        if (Feptr > mb->end_subject - Lmin)
3446
186
          {
3447
186
          SCHECK_PARTIAL();
3448
186
          RRETURN(MATCH_NOMATCH);
3449
186
          }
3450
11.9k
        Feptr += Lmin;
3451
11.9k
        break;
3452
3453
        /* This OP_ANYBYTE case will never be reached because \C gets turned
3454
        into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
3455
        reports don't complain about it's never being used. */
3456
3457
/*        case OP_ANYBYTE:
3458
*        if (Feptr > mb->end_subject - Lmin)
3459
*          {
3460
*          SCHECK_PARTIAL();
3461
*          RRETURN(MATCH_NOMATCH);
3462
*          }
3463
*        Feptr += Lmin;
3464
*        break;
3465
*/
3466
3.32M
        case OP_ANYNL:
3467
3.37M
        for (i = 1; i <= Lmin; i++)
3468
3.32M
          {
3469
3.32M
          if (Feptr >= mb->end_subject)
3470
5.37k
            {
3471
5.37k
            SCHECK_PARTIAL();
3472
5.37k
            RRETURN(MATCH_NOMATCH);
3473
5.37k
            }
3474
3.32M
          switch(*Feptr++)
3475
3.32M
            {
3476
3.27M
            default: RRETURN(MATCH_NOMATCH);
3477
3478
11.6k
            case CHAR_CR:
3479
11.6k
            if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3480
11.6k
            break;
3481
3482
21.4k
            case CHAR_LF:
3483
21.4k
            break;
3484
3485
3.18k
            case CHAR_VT:
3486
9.89k
            case CHAR_FF:
3487
10.6k
            case CHAR_NEL:
3488
#if PCRE2_CODE_UNIT_WIDTH != 8
3489
            case 0x2028:
3490
            case 0x2029:
3491
#endif
3492
10.6k
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3493
10.6k
            break;
3494
3.32M
            }
3495
3.32M
          }
3496
43.6k
        break;
3497
3498
43.6k
        case OP_NOT_HSPACE:
3499
39.5k
        for (i = 1; i <= Lmin; i++)
3500
20.0k
          {
3501
20.0k
          if (Feptr >= mb->end_subject)
3502
0
            {
3503
0
            SCHECK_PARTIAL();
3504
0
            RRETURN(MATCH_NOMATCH);
3505
0
            }
3506
20.0k
          switch(*Feptr++)
3507
20.0k
            {
3508
19.5k
            default: break;
3509
19.5k
            HSPACE_BYTE_CASES:
3510
#if PCRE2_CODE_UNIT_WIDTH != 8
3511
            HSPACE_MULTIBYTE_CASES:
3512
#endif
3513
1.16k
            RRETURN(MATCH_NOMATCH);
3514
20.0k
            }
3515
20.0k
          }
3516
19.5k
        break;
3517
3518
19.5k
        case OP_HSPACE:
3519
2.50k
        for (i = 1; i <= Lmin; i++)
3520
1.54k
          {
3521
1.54k
          if (Feptr >= mb->end_subject)
3522
16
            {
3523
16
            SCHECK_PARTIAL();
3524
16
            RRETURN(MATCH_NOMATCH);
3525
16
            }
3526
1.52k
          switch(*Feptr++)
3527
1.52k
            {
3528
565
            default: RRETURN(MATCH_NOMATCH);
3529
2.83k
            HSPACE_BYTE_CASES:
3530
#if PCRE2_CODE_UNIT_WIDTH != 8
3531
            HSPACE_MULTIBYTE_CASES:
3532
#endif
3533
2.83k
            break;
3534
1.52k
            }
3535
1.52k
          }
3536
963
        break;
3537
3538
984k
        case OP_NOT_VSPACE:
3539
1.94M
        for (i = 1; i <= Lmin; i++)
3540
984k
          {
3541
984k
          if (Feptr >= mb->end_subject)
3542
3.93k
            {
3543
3.93k
            SCHECK_PARTIAL();
3544
3.93k
            RRETURN(MATCH_NOMATCH);
3545
3.93k
            }
3546
980k
          switch(*Feptr++)
3547
980k
            {
3548
89.1k
            VSPACE_BYTE_CASES:
3549
#if PCRE2_CODE_UNIT_WIDTH != 8
3550
            VSPACE_MULTIBYTE_CASES:
3551
#endif
3552
89.1k
            RRETURN(MATCH_NOMATCH);
3553
955k
            default: break;
3554
980k
            }
3555
980k
          }
3556
955k
        break;
3557
3558
955k
        case OP_VSPACE:
3559
0
        for (i = 1; i <= Lmin; i++)
3560
0
          {
3561
0
          if (Feptr >= mb->end_subject)
3562
0
            {
3563
0
            SCHECK_PARTIAL();
3564
0
            RRETURN(MATCH_NOMATCH);
3565
0
            }
3566
0
          switch(*Feptr++)
3567
0
            {
3568
0
            default: RRETURN(MATCH_NOMATCH);
3569
0
            VSPACE_BYTE_CASES:
3570
#if PCRE2_CODE_UNIT_WIDTH != 8
3571
            VSPACE_MULTIBYTE_CASES:
3572
#endif
3573
0
            break;
3574
0
            }
3575
0
          }
3576
0
        break;
3577
3578
6.91k
        case OP_NOT_DIGIT:
3579
12.1k
        for (i = 1; i <= Lmin; i++)
3580
6.91k
          {
3581
6.91k
          if (Feptr >= mb->end_subject)
3582
337
            {
3583
337
            SCHECK_PARTIAL();
3584
337
            RRETURN(MATCH_NOMATCH);
3585
337
            }
3586
6.57k
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
3587
1.37k
            RRETURN(MATCH_NOMATCH);
3588
5.20k
          Feptr++;
3589
5.20k
          }
3590
5.20k
        break;
3591
3592
30.4k
        case OP_DIGIT:
3593
31.6k
        for (i = 1; i <= Lmin; i++)
3594
30.4k
          {
3595
30.4k
          if (Feptr >= mb->end_subject)
3596
404
            {
3597
404
            SCHECK_PARTIAL();
3598
404
            RRETURN(MATCH_NOMATCH);
3599
404
            }
3600
30.0k
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
3601
28.9k
            RRETURN(MATCH_NOMATCH);
3602
1.15k
          Feptr++;
3603
1.15k
          }
3604
1.15k
        break;
3605
3606
235k
        case OP_NOT_WHITESPACE:
3607
465k
        for (i = 1; i <= Lmin; i++)
3608
235k
          {
3609
235k
          if (Feptr >= mb->end_subject)
3610
1.46k
            {
3611
1.46k
            SCHECK_PARTIAL();
3612
1.46k
            RRETURN(MATCH_NOMATCH);
3613
1.46k
            }
3614
234k
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
3615
4.41k
            RRETURN(MATCH_NOMATCH);
3616
229k
          Feptr++;
3617
229k
          }
3618
229k
        break;
3619
3620
229k
        case OP_WHITESPACE:
3621
277
        for (i = 1; i <= Lmin; i++)
3622
243
          {
3623
243
          if (Feptr >= mb->end_subject)
3624
2
            {
3625
2
            SCHECK_PARTIAL();
3626
2
            RRETURN(MATCH_NOMATCH);
3627
2
            }
3628
241
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
3629
207
            RRETURN(MATCH_NOMATCH);
3630
34
          Feptr++;
3631
34
          }
3632
34
        break;
3633
3634
25.1k
        case OP_NOT_WORDCHAR:
3635
46.0k
        for (i = 1; i <= Lmin; i++)
3636
25.1k
          {
3637
25.1k
          if (Feptr >= mb->end_subject)
3638
240
            {
3639
240
            SCHECK_PARTIAL();
3640
240
            RRETURN(MATCH_NOMATCH);
3641
240
            }
3642
24.8k
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
3643
3.96k
            RRETURN(MATCH_NOMATCH);
3644
20.9k
          Feptr++;
3645
20.9k
          }
3646
20.9k
        break;
3647
3648
20.9k
        case OP_WORDCHAR:
3649
13.9k
        for (i = 1; i <= Lmin; i++)
3650
9.03k
          {
3651
9.03k
          if (Feptr >= mb->end_subject)
3652
3
            {
3653
3
            SCHECK_PARTIAL();
3654
3
            RRETURN(MATCH_NOMATCH);
3655
3
            }
3656
9.03k
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
3657
4.08k
            RRETURN(MATCH_NOMATCH);
3658
4.94k
          Feptr++;
3659
4.94k
          }
3660
4.94k
        break;
3661
3662
4.94k
        default:
3663
0
        PCRE2_DEBUG_UNREACHABLE();
3664
0
        return PCRE2_ERROR_INTERNAL;
3665
4.72M
        }
3666
11.1M
      }
3667
3668
    /* If Lmin = Lmax we are done. Continue with the main loop. */
3669
3670
50.9M
    if (Lmin == Lmax) continue;
3671
3672
    /* If minimizing, we have to test the rest of the pattern before each
3673
    subsequent match. This means we cannot use a local "notmatch" variable as
3674
    in the other cases. As all 4 temporary 32-bit values in the frame are
3675
    already in use, just test the type each time. */
3676
3677
50.9M
    if (reptype == REPTYPE_MIN)
3678
312k
      {
3679
312k
#ifdef SUPPORT_UNICODE
3680
312k
      if (proptype >= 0)
3681
143k
        {
3682
143k
        switch(proptype)
3683
143k
          {
3684
0
          case PT_LAMP:
3685
0
          for (;;)
3686
0
            {
3687
0
            int chartype;
3688
0
            RMATCH(Fecode, RM208);
3689
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3690
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3691
0
            if (Feptr >= mb->end_subject)
3692
0
              {
3693
0
              SCHECK_PARTIAL();
3694
0
              RRETURN(MATCH_NOMATCH);
3695
0
              }
3696
0
            GETCHARINCTEST(fc, Feptr);
3697
0
            chartype = UCD_CHARTYPE(fc);
3698
0
            if ((chartype == ucp_Lu ||
3699
0
                 chartype == ucp_Ll ||
3700
0
                 chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3701
0
              RRETURN(MATCH_NOMATCH);
3702
0
            }
3703
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3704
3705
658
          case PT_GC:
3706
658
          for (;;)
3707
28.6k
            {
3708
28.6k
            RMATCH(Fecode, RM209);
3709
28.6k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3710
28.6k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3711
28.6k
            if (Feptr >= mb->end_subject)
3712
6
              {
3713
6
              SCHECK_PARTIAL();
3714
6
              RRETURN(MATCH_NOMATCH);
3715
6
              }
3716
28.6k
            GETCHARINCTEST(fc, Feptr);
3717
28.6k
            if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3718
652
              RRETURN(MATCH_NOMATCH);
3719
28.6k
            }
3720
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3721
3722
0
          case PT_PC:
3723
0
          for (;;)
3724
0
            {
3725
0
            RMATCH(Fecode, RM210);
3726
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3727
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3728
0
            if (Feptr >= mb->end_subject)
3729
0
              {
3730
0
              SCHECK_PARTIAL();
3731
0
              RRETURN(MATCH_NOMATCH);
3732
0
              }
3733
0
            GETCHARINCTEST(fc, Feptr);
3734
0
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3735
0
              RRETURN(MATCH_NOMATCH);
3736
0
            }
3737
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3738
3739
0
          case PT_SC:
3740
0
          for (;;)
3741
0
            {
3742
0
            RMATCH(Fecode, RM211);
3743
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3744
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3745
0
            if (Feptr >= mb->end_subject)
3746
0
              {
3747
0
              SCHECK_PARTIAL();
3748
0
              RRETURN(MATCH_NOMATCH);
3749
0
              }
3750
0
            GETCHARINCTEST(fc, Feptr);
3751
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3752
0
              RRETURN(MATCH_NOMATCH);
3753
0
            }
3754
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3755
3756
0
          case PT_SCX:
3757
0
          for (;;)
3758
0
            {
3759
0
            BOOL ok;
3760
0
            const ucd_record *prop;
3761
0
            RMATCH(Fecode, RM224);
3762
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3763
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3764
0
            if (Feptr >= mb->end_subject)
3765
0
              {
3766
0
              SCHECK_PARTIAL();
3767
0
              RRETURN(MATCH_NOMATCH);
3768
0
              }
3769
0
            GETCHARINCTEST(fc, Feptr);
3770
0
            prop = GET_UCD(fc);
3771
0
            ok = (prop->script == Lpropvalue
3772
0
                  || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
3773
0
            if (ok == (Lctype == OP_NOTPROP))
3774
0
              RRETURN(MATCH_NOMATCH);
3775
0
            }
3776
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3777
3778
0
          case PT_ALNUM:
3779
0
          for (;;)
3780
0
            {
3781
0
            int category;
3782
0
            RMATCH(Fecode, RM212);
3783
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3784
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3785
0
            if (Feptr >= mb->end_subject)
3786
0
              {
3787
0
              SCHECK_PARTIAL();
3788
0
              RRETURN(MATCH_NOMATCH);
3789
0
              }
3790
0
            GETCHARINCTEST(fc, Feptr);
3791
0
            category = UCD_CATEGORY(fc);
3792
0
            if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
3793
0
              RRETURN(MATCH_NOMATCH);
3794
0
            }
3795
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3796
3797
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3798
          which means that Perl space and POSIX space are now identical. PCRE
3799
          was changed at release 8.34. */
3800
3801
142k
          case PT_SPACE:    /* Perl space */
3802
142k
          case PT_PXSPACE:  /* POSIX space */
3803
142k
          for (;;)
3804
281k
            {
3805
281k
            RMATCH(Fecode, RM213);
3806
281k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3807
281k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3808
154k
            if (Feptr >= mb->end_subject)
3809
8.20k
              {
3810
8.20k
              SCHECK_PARTIAL();
3811
8.20k
              RRETURN(MATCH_NOMATCH);
3812
8.20k
              }
3813
146k
            GETCHARINCTEST(fc, Feptr);
3814
146k
            switch(fc)
3815
146k
              {
3816
18.5k
              HSPACE_CASES:
3817
47.6k
              VSPACE_CASES:
3818
47.6k
              if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3819
0
              break;
3820
3821
138k
              default:
3822
138k
              if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3823
0
                RRETURN(MATCH_NOMATCH);
3824
138k
              break;
3825
146k
              }
3826
146k
            }
3827
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3828
3829
55
          case PT_WORD:
3830
55
          for (;;)
3831
393
            {
3832
393
            int chartype, category;
3833
393
            RMATCH(Fecode, RM214);
3834
393
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3835
393
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3836
393
            if (Feptr >= mb->end_subject)
3837
0
              {
3838
0
              SCHECK_PARTIAL();
3839
0
              RRETURN(MATCH_NOMATCH);
3840
0
              }
3841
393
            GETCHARINCTEST(fc, Feptr);
3842
393
            chartype = UCD_CHARTYPE(fc);
3843
393
            category = PRIV(ucp_gentype)[chartype];
3844
393
            if ((category == ucp_L ||
3845
185
                 category == ucp_N ||
3846
160
                 chartype == ucp_Mn ||
3847
393
                 chartype == ucp_Pc) == (Lctype == OP_NOTPROP))
3848
55
              RRETURN(MATCH_NOMATCH);
3849
393
            }
3850
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3851
3852
480
          case PT_CLIST:
3853
480
          for (;;)
3854
480
            {
3855
480
            const uint32_t *cp;
3856
480
            RMATCH(Fecode, RM215);
3857
480
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3858
480
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3859
480
            if (Feptr >= mb->end_subject)
3860
0
              {
3861
0
              SCHECK_PARTIAL();
3862
0
              RRETURN(MATCH_NOMATCH);
3863
0
              }
3864
480
            GETCHARINCTEST(fc, Feptr);
3865
#if PCRE2_CODE_UNIT_WIDTH == 32
3866
            if (fc > MAX_UTF_CODE_POINT)
3867
              {
3868
              if (Lctype == OP_NOTPROP) continue;
3869
              RRETURN(MATCH_NOMATCH);
3870
              }
3871
#endif
3872
480
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3873
480
            for (;;)
3874
542
              {
3875
542
              if (fc < *cp)
3876
480
                {
3877
480
                if (Lctype == OP_NOTPROP) break;
3878
480
                RRETURN(MATCH_NOMATCH);
3879
480
                }
3880
62
              if (fc == *cp++)
3881
0
                {
3882
0
                if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3883
0
                break;
3884
0
                }
3885
62
              }
3886
480
            }
3887
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3888
3889
0
          case PT_UCNC:
3890
0
          for (;;)
3891
0
            {
3892
0
            RMATCH(Fecode, RM216);
3893
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3894
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3895
0
            if (Feptr >= mb->end_subject)
3896
0
              {
3897
0
              SCHECK_PARTIAL();
3898
0
              RRETURN(MATCH_NOMATCH);
3899
0
              }
3900
0
            GETCHARINCTEST(fc, Feptr);
3901
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3902
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3903
0
                 fc >= 0xe000) == (Lctype == OP_NOTPROP))
3904
0
              RRETURN(MATCH_NOMATCH);
3905
0
            }
3906
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3907
3908
0
          case PT_BIDICL:
3909
0
          for (;;)
3910
0
            {
3911
0
            RMATCH(Fecode, RM223);
3912
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3913
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3914
0
            if (Feptr >= mb->end_subject)
3915
0
              {
3916
0
              SCHECK_PARTIAL();
3917
0
              RRETURN(MATCH_NOMATCH);
3918
0
              }
3919
0
            GETCHARINCTEST(fc, Feptr);
3920
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3921
0
              RRETURN(MATCH_NOMATCH);
3922
0
            }
3923
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3924
3925
0
          case PT_BOOL:
3926
0
          for (;;)
3927
0
            {
3928
0
            BOOL ok;
3929
0
            const ucd_record *prop;
3930
0
            RMATCH(Fecode, RM222);
3931
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3932
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3933
0
            if (Feptr >= mb->end_subject)
3934
0
              {
3935
0
              SCHECK_PARTIAL();
3936
0
              RRETURN(MATCH_NOMATCH);
3937
0
              }
3938
0
            GETCHARINCTEST(fc, Feptr);
3939
0
            prop = GET_UCD(fc);
3940
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
3941
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
3942
0
            if (ok == (Lctype == OP_NOTPROP))
3943
0
              RRETURN(MATCH_NOMATCH);
3944
0
            }
3945
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3946
3947
          /* This should never occur */
3948
0
          default:
3949
0
          PCRE2_DEBUG_UNREACHABLE();
3950
0
          return PCRE2_ERROR_INTERNAL;
3951
143k
          }
3952
143k
        }
3953
3954
      /* Match extended Unicode sequences. We will get here only if the
3955
      support is in the binary; otherwise a compile-time error occurs. */
3956
3957
169k
      else if (Lctype == OP_EXTUNI)
3958
14.3k
        {
3959
14.3k
        for (;;)
3960
2.05M
          {
3961
2.05M
          RMATCH(Fecode, RM217);
3962
2.05M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3963
2.05M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3964
2.05M
          if (Feptr >= mb->end_subject)
3965
14.3k
            {
3966
14.3k
            SCHECK_PARTIAL();
3967
14.3k
            RRETURN(MATCH_NOMATCH);
3968
14.3k
            }
3969
2.04M
          else
3970
2.04M
            {
3971
2.04M
            GETCHARINCTEST(fc, Feptr);
3972
2.04M
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
3973
2.04M
              utf, NULL);
3974
2.04M
            }
3975
2.04M
          CHECK_PARTIAL();
3976
2.04M
          }
3977
14.3k
        }
3978
154k
      else
3979
154k
#endif     /* SUPPORT_UNICODE */
3980
3981
      /* UTF mode for non-property testing character types. */
3982
3983
154k
#ifdef SUPPORT_UNICODE
3984
154k
      if (utf)
3985
36.4k
        {
3986
36.4k
        for (;;)
3987
3.90M
          {
3988
3.90M
          RMATCH(Fecode, RM218);
3989
3.90M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3990
3.90M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3991
3.89M
          if (Feptr >= mb->end_subject)
3992
23.6k
            {
3993
23.6k
            SCHECK_PARTIAL();
3994
23.6k
            RRETURN(MATCH_NOMATCH);
3995
23.6k
            }
3996
3.87M
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3997
3.87M
          GETCHARINC(fc, Feptr);
3998
3.87M
          switch(Lctype)
3999
3.87M
            {
4000
42.0k
            case OP_ANY:               /* This is the non-NL case */
4001
42.0k
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4002
0
                Feptr >= mb->end_subject &&
4003
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
4004
0
                NLBLOCK->nllen == 2 &&
4005
0
                fc == NLBLOCK->nl[0])
4006
0
              {
4007
0
              mb->hitend = TRUE;
4008
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4009
0
              }
4010
42.0k
            break;
4011
4012
1.44M
            case OP_ALLANY:
4013
3.73M
            case OP_ANYBYTE:
4014
3.73M
            break;
4015
4016
2.36k
            case OP_ANYNL:
4017
2.36k
            switch(fc)
4018
2.36k
              {
4019
2.33k
              default: RRETURN(MATCH_NOMATCH);
4020
4021
0
              case CHAR_CR:
4022
0
              if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
4023
0
              break;
4024
4025
0
              case CHAR_LF:
4026
0
              break;
4027
4028
36
              case CHAR_VT:
4029
36
              case CHAR_FF:
4030
36
              case CHAR_NEL:
4031
36
#ifndef EBCDIC
4032
36
              case 0x2028:
4033
36
              case 0x2029:
4034
36
#endif  /* Not EBCDIC */
4035
36
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
4036
0
                RRETURN(MATCH_NOMATCH);
4037
36
              break;
4038
2.36k
              }
4039
36
            break;
4040
4041
86.9k
            case OP_NOT_HSPACE:
4042
86.9k
            switch(fc)
4043
86.9k
              {
4044
103k
              HSPACE_CASES: RRETURN(MATCH_NOMATCH);
4045
81.1k
              default: break;
4046
86.9k
              }
4047
81.1k
            break;
4048
4049
81.1k
            case OP_HSPACE:
4050
0
            switch(fc)
4051
0
              {
4052
0
              HSPACE_CASES: break;
4053
0
              default: RRETURN(MATCH_NOMATCH);
4054
0
              }
4055
0
            break;
4056
4057
5.51k
            case OP_NOT_VSPACE:
4058
5.51k
            switch(fc)
4059
5.51k
              {
4060
1.94k
              VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4061
5.18k
              default: break;
4062
5.51k
              }
4063
5.18k
            break;
4064
4065
5.18k
            case OP_VSPACE:
4066
15
            switch(fc)
4067
15
              {
4068
6
              VSPACE_CASES: break;
4069
9
              default: RRETURN(MATCH_NOMATCH);
4070
15
              }
4071
6
            break;
4072
4073
6
            case OP_NOT_DIGIT:
4074
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
4075
0
              RRETURN(MATCH_NOMATCH);
4076
0
            break;
4077
4078
0
            case OP_DIGIT:
4079
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
4080
0
              RRETURN(MATCH_NOMATCH);
4081
0
            break;
4082
4083
0
            case OP_NOT_WHITESPACE:
4084
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
4085
0
              RRETURN(MATCH_NOMATCH);
4086
0
            break;
4087
4088
0
            case OP_WHITESPACE:
4089
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
4090
0
              RRETURN(MATCH_NOMATCH);
4091
0
            break;
4092
4093
0
            case OP_NOT_WORDCHAR:
4094
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
4095
0
              RRETURN(MATCH_NOMATCH);
4096
0
            break;
4097
4098
0
            case OP_WORDCHAR:
4099
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
4100
0
              RRETURN(MATCH_NOMATCH);
4101
0
            break;
4102
4103
0
            default:
4104
0
            PCRE2_DEBUG_UNREACHABLE();
4105
0
            return PCRE2_ERROR_INTERNAL;
4106
3.87M
            }
4107
3.87M
          }
4108
36.4k
        }
4109
118k
      else
4110
118k
#endif  /* SUPPORT_UNICODE */
4111
4112
      /* Not UTF mode */
4113
118k
        {
4114
118k
        for (;;)
4115
6.32M
          {
4116
6.32M
          RMATCH(Fecode, RM33);
4117
6.32M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4118
6.32M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
4119
6.31M
          if (Feptr >= mb->end_subject)
4120
20.1k
            {
4121
20.1k
            SCHECK_PARTIAL();
4122
20.1k
            RRETURN(MATCH_NOMATCH);
4123
20.1k
            }
4124
6.29M
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
4125
844
            RRETURN(MATCH_NOMATCH);
4126
6.29M
          fc = *Feptr++;
4127
6.29M
          switch(Lctype)
4128
6.29M
            {
4129
191k
            case OP_ANY:               /* This is the non-NL case */
4130
191k
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4131
0
                Feptr >= mb->end_subject &&
4132
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
4133
0
                NLBLOCK->nllen == 2 &&
4134
0
                fc == NLBLOCK->nl[0])
4135
0
              {
4136
0
              mb->hitend = TRUE;
4137
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4138
0
              }
4139
191k
            break;
4140
4141
1.36M
            case OP_ALLANY:
4142
1.36M
            case OP_ANYBYTE:
4143
1.36M
            break;
4144
4145
1.17k
            case OP_ANYNL:
4146
1.17k
            switch(fc)
4147
1.17k
              {
4148
1.08k
              default: RRETURN(MATCH_NOMATCH);
4149
4150
24
              case CHAR_CR:
4151
24
              if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
4152
24
              break;
4153
4154
33
              case CHAR_LF:
4155
33
              break;
4156
4157
24
              case CHAR_VT:
4158
24
              case CHAR_FF:
4159
42
              case CHAR_NEL:
4160
#if PCRE2_CODE_UNIT_WIDTH != 8
4161
              case 0x2028:
4162
              case 0x2029:
4163
#endif
4164
42
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
4165
0
                RRETURN(MATCH_NOMATCH);
4166
42
              break;
4167
1.17k
              }
4168
99
            break;
4169
4170
803k
            case OP_NOT_HSPACE:
4171
803k
            switch(fc)
4172
803k
              {
4173
792k
              default: break;
4174
792k
              HSPACE_BYTE_CASES:
4175
#if PCRE2_CODE_UNIT_WIDTH != 8
4176
              HSPACE_MULTIBYTE_CASES:
4177
#endif
4178
25.0k
              RRETURN(MATCH_NOMATCH);
4179
803k
              }
4180
792k
            break;
4181
4182
792k
            case OP_HSPACE:
4183
0
            switch(fc)
4184
0
              {
4185
0
              default: RRETURN(MATCH_NOMATCH);
4186
0
              HSPACE_BYTE_CASES:
4187
#if PCRE2_CODE_UNIT_WIDTH != 8
4188
              HSPACE_MULTIBYTE_CASES:
4189
#endif
4190
0
              break;
4191
0
              }
4192
0
            break;
4193
4194
3.31M
            case OP_NOT_VSPACE:
4195
3.31M
            switch(fc)
4196
3.31M
              {
4197
3.25M
              default: break;
4198
3.25M
              VSPACE_BYTE_CASES:
4199
#if PCRE2_CODE_UNIT_WIDTH != 8
4200
              VSPACE_MULTIBYTE_CASES:
4201
#endif
4202
227k
              RRETURN(MATCH_NOMATCH);
4203
3.31M
              }
4204
3.25M
            break;
4205
4206
3.25M
            case OP_VSPACE:
4207
0
            switch(fc)
4208
0
              {
4209
0
              default: RRETURN(MATCH_NOMATCH);
4210
0
              VSPACE_BYTE_CASES:
4211
#if PCRE2_CODE_UNIT_WIDTH != 8
4212
              VSPACE_MULTIBYTE_CASES:
4213
#endif
4214
0
              break;
4215
0
              }
4216
0
            break;
4217
4218
46.3k
            case OP_NOT_DIGIT:
4219
46.3k
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
4220
2.74k
              RRETURN(MATCH_NOMATCH);
4221
43.6k
            break;
4222
4223
43.6k
            case OP_DIGIT:
4224
98
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
4225
60
              RRETURN(MATCH_NOMATCH);
4226
38
            break;
4227
4228
378k
            case OP_NOT_WHITESPACE:
4229
378k
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
4230
5.30k
              RRETURN(MATCH_NOMATCH);
4231
373k
            break;
4232
4233
373k
            case OP_WHITESPACE:
4234
10
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
4235
8
              RRETURN(MATCH_NOMATCH);
4236
2
            break;
4237
4238
164k
            case OP_NOT_WORDCHAR:
4239
164k
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
4240
16.2k
              RRETURN(MATCH_NOMATCH);
4241
148k
            break;
4242
4243
148k
            case OP_WORDCHAR:
4244
29.8k
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
4245
1.11k
              RRETURN(MATCH_NOMATCH);
4246
28.7k
            break;
4247
4248
28.7k
            default:
4249
0
            PCRE2_DEBUG_UNREACHABLE();
4250
0
            return PCRE2_ERROR_INTERNAL;
4251
6.29M
            }
4252
6.29M
          }
4253
118k
        }
4254
4255
0
      PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
4256
0
      }
4257
4258
    /* If maximizing, it is worth using inline code for speed, doing the type
4259
    test once at the start (i.e. keep it out of the loops). Once again,
4260
    "notmatch" can be an ordinary local variable because the loops do not call
4261
    RMATCH. */
4262
4263
50.6M
    else
4264
50.6M
      {
4265
50.6M
      Lstart_eptr = Feptr;  /* Remember where we started */
4266
4267
50.6M
#ifdef SUPPORT_UNICODE
4268
50.6M
      if (proptype >= 0)
4269
1.07M
        {
4270
1.07M
        BOOL notmatch = Lctype == OP_NOTPROP;
4271
1.07M
        switch(proptype)
4272
1.07M
          {
4273
0
          case PT_LAMP:
4274
0
          for (i = Lmin; i < Lmax; i++)
4275
0
            {
4276
0
            int chartype;
4277
0
            int len = 1;
4278
0
            if (Feptr >= mb->end_subject)
4279
0
              {
4280
0
              SCHECK_PARTIAL();
4281
0
              break;
4282
0
              }
4283
0
            GETCHARLENTEST(fc, Feptr, len);
4284
0
            chartype = UCD_CHARTYPE(fc);
4285
0
            if ((chartype == ucp_Lu ||
4286
0
                 chartype == ucp_Ll ||
4287
0
                 chartype == ucp_Lt) == notmatch)
4288
0
              break;
4289
0
            Feptr+= len;
4290
0
            }
4291
0
          break;
4292
4293
526
          case PT_GC:
4294
20.8k
          for (i = Lmin; i < Lmax; i++)
4295
20.8k
            {
4296
20.8k
            int len = 1;
4297
20.8k
            if (Feptr >= mb->end_subject)
4298
12
              {
4299
12
              SCHECK_PARTIAL();
4300
12
              break;
4301
12
              }
4302
20.8k
            GETCHARLENTEST(fc, Feptr, len);
4303
20.8k
            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break;
4304
20.3k
            Feptr+= len;
4305
20.3k
            }
4306
526
          break;
4307
4308
2.73k
          case PT_PC:
4309
19.4k
          for (i = Lmin; i < Lmax; i++)
4310
19.4k
            {
4311
19.4k
            int len = 1;
4312
19.4k
            if (Feptr >= mb->end_subject)
4313
3
              {
4314
3
              SCHECK_PARTIAL();
4315
3
              break;
4316
3
              }
4317
19.4k
            GETCHARLENTEST(fc, Feptr, len);
4318
19.4k
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break;
4319
16.7k
            Feptr+= len;
4320
16.7k
            }
4321
2.73k
          break;
4322
4323
2.73k
          case PT_SC:
4324
0
          for (i = Lmin; i < Lmax; i++)
4325
0
            {
4326
0
            int len = 1;
4327
0
            if (Feptr >= mb->end_subject)
4328
0
              {
4329
0
              SCHECK_PARTIAL();
4330
0
              break;
4331
0
              }
4332
0
            GETCHARLENTEST(fc, Feptr, len);
4333
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break;
4334
0
            Feptr+= len;
4335
0
            }
4336
0
          break;
4337
4338
0
          case PT_SCX:
4339
0
          for (i = Lmin; i < Lmax; i++)
4340
0
            {
4341
0
            BOOL ok;
4342
0
            const ucd_record *prop;
4343
0
            int len = 1;
4344
0
            if (Feptr >= mb->end_subject)
4345
0
              {
4346
0
              SCHECK_PARTIAL();
4347
0
              break;
4348
0
              }
4349
0
            GETCHARLENTEST(fc, Feptr, len);
4350
0
            prop = GET_UCD(fc);
4351
0
            ok = (prop->script == Lpropvalue ||
4352
0
                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
4353
0
            if (ok == notmatch) break;
4354
0
            Feptr+= len;
4355
0
            }
4356
0
          break;
4357
4358
0
          case PT_ALNUM:
4359
0
          for (i = Lmin; i < Lmax; i++)
4360
0
            {
4361
0
            int category;
4362
0
            int len = 1;
4363
0
            if (Feptr >= mb->end_subject)
4364
0
              {
4365
0
              SCHECK_PARTIAL();
4366
0
              break;
4367
0
              }
4368
0
            GETCHARLENTEST(fc, Feptr, len);
4369
0
            category = UCD_CATEGORY(fc);
4370
0
            if ((category == ucp_L || category == ucp_N) == notmatch)
4371
0
              break;
4372
0
            Feptr+= len;
4373
0
            }
4374
0
          break;
4375
4376
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4377
          which means that Perl space and POSIX space are now identical. PCRE
4378
          was changed at release 8.34. */
4379
4380
644k
          case PT_SPACE:    /* Perl space */
4381
644k
          case PT_PXSPACE:  /* POSIX space */
4382
5.03M
          for (i = Lmin; i < Lmax; i++)
4383
5.03M
            {
4384
5.03M
            int len = 1;
4385
5.03M
            if (Feptr >= mb->end_subject)
4386
33.8k
              {
4387
33.8k
              SCHECK_PARTIAL();
4388
33.8k
              break;
4389
33.8k
              }
4390
4.99M
            GETCHARLENTEST(fc, Feptr, len);
4391
4.99M
            switch(fc)
4392
4.99M
              {
4393
18.9M
              HSPACE_CASES:
4394
18.9M
              VSPACE_CASES:
4395
8.22M
              if (notmatch) goto ENDLOOP99;  /* Break the loop */
4396
954k
              break;
4397
4398
3.81M
              default:
4399
3.81M
              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
4400
376k
                goto ENDLOOP99;   /* Break the loop */
4401
3.43M
              break;
4402
4.99M
              }
4403
4.38M
            Feptr+= len;
4404
4.38M
            }
4405
644k
          ENDLOOP99:
4406
644k
          break;
4407
4408
432k
          case PT_WORD:
4409
2.58M
          for (i = Lmin; i < Lmax; i++)
4410
2.58M
            {
4411
2.58M
            int chartype, category;
4412
2.58M
            int len = 1;
4413
2.58M
            if (Feptr >= mb->end_subject)
4414
107k
              {
4415
107k
              SCHECK_PARTIAL();
4416
107k
              break;
4417
107k
              }
4418
2.47M
            GETCHARLENTEST(fc, Feptr, len);
4419
2.47M
            chartype = UCD_CHARTYPE(fc);
4420
2.47M
            category = PRIV(ucp_gentype)[chartype];
4421
2.47M
            if ((category == ucp_L ||
4422
2.17M
                 category == ucp_N ||
4423
2.12M
                 chartype == ucp_Mn ||
4424
2.12M
                 chartype == ucp_Pc) == notmatch)
4425
324k
              break;
4426
2.15M
            Feptr+= len;
4427
2.15M
            }
4428
432k
          break;
4429
4430
432k
          case PT_CLIST:
4431
783
          for (i = Lmin; i < Lmax; i++)
4432
783
            {
4433
783
            const uint32_t *cp;
4434
783
            int len = 1;
4435
783
            if (Feptr >= mb->end_subject)
4436
28
              {
4437
28
              SCHECK_PARTIAL();
4438
28
              break;
4439
28
              }
4440
755
            GETCHARLENTEST(fc, Feptr, len);
4441
#if PCRE2_CODE_UNIT_WIDTH == 32
4442
            if (fc > MAX_UTF_CODE_POINT)
4443
              {
4444
              if (!notmatch) goto GOT_MAX;
4445
              }
4446
            else
4447
#endif
4448
755
              {
4449
755
              cp = PRIV(ucd_caseless_sets) + Lpropvalue;
4450
755
              for (;;)
4451
925
                {
4452
925
                if (fc < *cp)
4453
755
                  { if (notmatch) break; else goto GOT_MAX; }
4454
170
                if (fc == *cp++)
4455
0
                  { if (notmatch) goto GOT_MAX; else break; }
4456
170
                }
4457
755
              }
4458
4459
749
            Feptr += len;
4460
749
            }
4461
34
          GOT_MAX:
4462
34
          break;
4463
4464
28
          case PT_UCNC:
4465
0
          for (i = Lmin; i < Lmax; i++)
4466
0
            {
4467
0
            int len = 1;
4468
0
            if (Feptr >= mb->end_subject)
4469
0
              {
4470
0
              SCHECK_PARTIAL();
4471
0
              break;
4472
0
              }
4473
0
            GETCHARLENTEST(fc, Feptr, len);
4474
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
4475
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
4476
0
                 fc >= 0xe000) == notmatch)
4477
0
              break;
4478
0
            Feptr += len;
4479
0
            }
4480
0
          break;
4481
4482
0
          case PT_BIDICL:
4483
0
          for (i = Lmin; i < Lmax; i++)
4484
0
            {
4485
0
            int len = 1;
4486
0
            if (Feptr >= mb->end_subject)
4487
0
              {
4488
0
              SCHECK_PARTIAL();
4489
0
              break;
4490
0
              }
4491
0
            GETCHARLENTEST(fc, Feptr, len);
4492
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break;
4493
0
            Feptr+= len;
4494
0
            }
4495
0
          break;
4496
4497
0
          case PT_BOOL:
4498
0
          for (i = Lmin; i < Lmax; i++)
4499
0
            {
4500
0
            BOOL ok;
4501
0
            const ucd_record *prop;
4502
0
            int len = 1;
4503
0
            if (Feptr >= mb->end_subject)
4504
0
              {
4505
0
              SCHECK_PARTIAL();
4506
0
              break;
4507
0
              }
4508
0
            GETCHARLENTEST(fc, Feptr, len);
4509
0
            prop = GET_UCD(fc);
4510
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
4511
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
4512
0
            if (ok == notmatch) break;
4513
0
            Feptr+= len;
4514
0
            }
4515
0
          break;
4516
4517
0
          default:
4518
0
          PCRE2_DEBUG_UNREACHABLE();
4519
0
          return PCRE2_ERROR_INTERNAL;
4520
1.07M
          }
4521
4522
        /* Feptr is now past the end of the maximum run */
4523
4524
1.07M
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4525
4526
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4527
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
4528
        go too far. */
4529
4530
646k
        for(;;)
4531
5.08M
          {
4532
5.08M
          if (Feptr <= Lstart_eptr) break;
4533
4.43M
          RMATCH(Fecode, RM221);
4534
4.43M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4535
4.43M
          Feptr--;
4536
4.43M
          if (utf) BACKCHAR(Feptr);
4537
4.43M
          }
4538
646k
        }
4539
4540
      /* Match extended Unicode grapheme clusters. We will get here only if the
4541
      support is in the binary; otherwise a compile-time error occurs. */
4542
4543
49.5M
      else if (Lctype == OP_EXTUNI)
4544
84.3k
        {
4545
10.4M
        for (i = Lmin; i < Lmax; i++)
4546
10.4M
          {
4547
10.4M
          if (Feptr >= mb->end_subject)
4548
84.2k
            {
4549
84.2k
            SCHECK_PARTIAL();
4550
84.2k
            break;
4551
84.2k
            }
4552
10.3M
          else
4553
10.3M
            {
4554
10.3M
            GETCHARINCTEST(fc, Feptr);
4555
10.3M
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4556
10.3M
              utf, NULL);
4557
10.3M
            }
4558
10.3M
          CHECK_PARTIAL();
4559
10.3M
          }
4560
4561
        /* Feptr is now past the end of the maximum run */
4562
4563
84.3k
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4564
4565
        /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
4566
        of the run while backtracking because the use of \C in UTF mode can
4567
        cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
4568
        the use of \C in UTF mode is fraught with danger. */
4569
4570
84.3k
        for(;;)
4571
10.4M
          {
4572
10.4M
          int lgb, rgb;
4573
10.4M
          PCRE2_SPTR fptr;
4574
4575
10.4M
          if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4576
10.3M
          RMATCH(Fecode, RM219);
4577
10.3M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4578
4579
          /* Backtracking over an extended grapheme cluster involves inspecting
4580
          the previous two characters (if present) to see if a break is
4581
          permitted between them. */
4582
4583
10.3M
          Feptr--;
4584
10.3M
          if (!utf) fc = *Feptr; else
4585
2.41M
            {
4586
2.41M
            BACKCHAR(Feptr);
4587
2.41M
            GETCHAR(fc, Feptr);
4588
2.41M
            }
4589
10.3M
          rgb = UCD_GRAPHBREAK(fc);
4590
4591
10.3M
          for (;;)
4592
10.3M
            {
4593
10.3M
            if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4594
10.3M
            fptr = Feptr - 1;
4595
10.3M
            if (!utf) fc = *fptr; else
4596
2.41M
              {
4597
2.41M
              BACKCHAR(fptr);
4598
2.41M
              GETCHAR(fc, fptr);
4599
2.41M
              }
4600
10.3M
            lgb = UCD_GRAPHBREAK(fc);
4601
10.3M
            if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
4602
6.05k
            Feptr = fptr;
4603
6.05k
            rgb = lgb;
4604
6.05k
            }
4605
10.3M
          }
4606
84.3k
        }
4607
4608
49.4M
      else
4609
49.4M
#endif   /* SUPPORT_UNICODE */
4610
4611
49.4M
#ifdef SUPPORT_UNICODE
4612
49.4M
      if (utf)
4613
37.8M
        {
4614
37.8M
        switch(Lctype)
4615
37.8M
          {
4616
683
          case OP_ANY:
4617
6.26k
          for (i = Lmin; i < Lmax; i++)
4618
5.70k
            {
4619
5.70k
            if (Feptr >= mb->end_subject)
4620
125
              {
4621
125
              SCHECK_PARTIAL();
4622
125
              break;
4623
125
              }
4624
5.58k
            if (IS_NEWLINE(Feptr)) break;
4625
5.58k
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4626
0
                Feptr + 1 >= mb->end_subject &&
4627
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
4628
0
                NLBLOCK->nllen == 2 &&
4629
0
                UCHAR21(Feptr) == NLBLOCK->nl[0])
4630
0
              {
4631
0
              mb->hitend = TRUE;
4632
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4633
0
              }
4634
5.58k
            Feptr++;
4635
5.58k
            ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4636
5.58k
            }
4637
683
          break;
4638
4639
3.63M
          case OP_ALLANY:
4640
3.63M
          if (Lmax < UINT32_MAX)
4641
1.24k
            {
4642
2.46k
            for (i = Lmin; i < Lmax; i++)
4643
1.24k
              {
4644
1.24k
              if (Feptr >= mb->end_subject)
4645
30
                {
4646
30
                SCHECK_PARTIAL();
4647
30
                break;
4648
30
                }
4649
1.21k
              Feptr++;
4650
1.21k
              ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4651
1.21k
              }
4652
1.24k
            }
4653
3.63M
          else
4654
3.63M
            {
4655
3.63M
            Feptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
4656
3.63M
            SCHECK_PARTIAL();
4657
3.63M
            }
4658
3.63M
          break;
4659
4660
          /* The "byte" (i.e. "code unit") case is the same as non-UTF */
4661
4662
3.63M
          case OP_ANYBYTE:
4663
267
          fc = Lmax - Lmin;
4664
267
          if (fc > (uint32_t)(mb->end_subject - Feptr))
4665
267
            {
4666
267
            Feptr = mb->end_subject;
4667
267
            SCHECK_PARTIAL();
4668
267
            }
4669
0
          else Feptr += fc;
4670
267
          break;
4671
4672
34.2M
          case OP_ANYNL:
4673
38.8M
          for (i = Lmin; i < Lmax; i++)
4674
34.2M
            {
4675
34.2M
            int len = 1;
4676
34.2M
            if (Feptr >= mb->end_subject)
4677
550k
              {
4678
550k
              SCHECK_PARTIAL();
4679
550k
              break;
4680
550k
              }
4681
33.6M
            GETCHARLEN(fc, Feptr, len);
4682
33.6M
            if (fc == CHAR_CR)
4683
0
              {
4684
0
              if (++Feptr >= mb->end_subject) break;
4685
0
              if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
4686
0
              }
4687
33.6M
            else
4688
33.6M
              {
4689
33.6M
              if (fc != CHAR_LF &&
4690
31.7M
                  (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4691
31.7M
                   (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4692
29.0M
#ifndef EBCDIC
4693
29.0M
                    && fc != 0x2028 && fc != 0x2029
4694
31.7M
#endif  /* Not EBCDIC */
4695
31.7M
                    )))
4696
29.0M
                break;
4697
4.59M
              Feptr += len;
4698
4.59M
              }
4699
33.6M
            }
4700
34.2M
          break;
4701
4702
34.2M
          case OP_NOT_HSPACE:
4703
7.52k
          case OP_HSPACE:
4704
92.8k
          for (i = Lmin; i < Lmax; i++)
4705
87.2k
            {
4706
87.2k
            BOOL gotspace;
4707
87.2k
            int len = 1;
4708
87.2k
            if (Feptr >= mb->end_subject)
4709
314
              {
4710
314
              SCHECK_PARTIAL();
4711
314
              break;
4712
314
              }
4713
86.9k
            GETCHARLEN(fc, Feptr, len);
4714
86.9k
            switch(fc)
4715
86.9k
              {
4716
1.64k
              HSPACE_CASES: gotspace = TRUE; break;
4717
85.3k
              default: gotspace = FALSE; break;
4718
86.9k
              }
4719
86.9k
            if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
4720
85.3k
            Feptr += len;
4721
85.3k
            }
4722
7.52k
          break;
4723
4724
7.52k
          case OP_NOT_VSPACE:
4725
6.84k
          case OP_VSPACE:
4726
124k
          for (i = Lmin; i < Lmax; i++)
4727
124k
            {
4728
124k
            BOOL gotspace;
4729
124k
            int len = 1;
4730
124k
            if (Feptr >= mb->end_subject)
4731
99
              {
4732
99
              SCHECK_PARTIAL();
4733
99
              break;
4734
99
              }
4735
124k
            GETCHARLEN(fc, Feptr, len);
4736
124k
            switch(fc)
4737
124k
              {
4738
4.76k
              VSPACE_CASES: gotspace = TRUE; break;
4739
119k
              default: gotspace = FALSE; break;
4740
124k
              }
4741
124k
            if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
4742
117k
            Feptr += len;
4743
117k
            }
4744
6.84k
          break;
4745
4746
6.84k
          case OP_NOT_DIGIT:
4747
0
          for (i = Lmin; i < Lmax; i++)
4748
0
            {
4749
0
            int len = 1;
4750
0
            if (Feptr >= mb->end_subject)
4751
0
              {
4752
0
              SCHECK_PARTIAL();
4753
0
              break;
4754
0
              }
4755
0
            GETCHARLEN(fc, Feptr, len);
4756
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
4757
0
            Feptr+= len;
4758
0
            }
4759
0
          break;
4760
4761
0
          case OP_DIGIT:
4762
0
          for (i = Lmin; i < Lmax; i++)
4763
0
            {
4764
0
            int len = 1;
4765
0
            if (Feptr >= mb->end_subject)
4766
0
              {
4767
0
              SCHECK_PARTIAL();
4768
0
              break;
4769
0
              }
4770
0
            GETCHARLEN(fc, Feptr, len);
4771
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
4772
0
            Feptr+= len;
4773
0
            }
4774
0
          break;
4775
4776
0
          case OP_NOT_WHITESPACE:
4777
0
          for (i = Lmin; i < Lmax; i++)
4778
0
            {
4779
0
            int len = 1;
4780
0
            if (Feptr >= mb->end_subject)
4781
0
              {
4782
0
              SCHECK_PARTIAL();
4783
0
              break;
4784
0
              }
4785
0
            GETCHARLEN(fc, Feptr, len);
4786
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
4787
0
            Feptr+= len;
4788
0
            }
4789
0
          break;
4790
4791
0
          case OP_WHITESPACE:
4792
0
          for (i = Lmin; i < Lmax; i++)
4793
0
            {
4794
0
            int len = 1;
4795
0
            if (Feptr >= mb->end_subject)
4796
0
              {
4797
0
              SCHECK_PARTIAL();
4798
0
              break;
4799
0
              }
4800
0
            GETCHARLEN(fc, Feptr, len);
4801
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
4802
0
            Feptr+= len;
4803
0
            }
4804
0
          break;
4805
4806
0
          case OP_NOT_WORDCHAR:
4807
0
          for (i = Lmin; i < Lmax; i++)
4808
0
            {
4809
0
            int len = 1;
4810
0
            if (Feptr >= mb->end_subject)
4811
0
              {
4812
0
              SCHECK_PARTIAL();
4813
0
              break;
4814
0
              }
4815
0
            GETCHARLEN(fc, Feptr, len);
4816
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
4817
0
            Feptr+= len;
4818
0
            }
4819
0
          break;
4820
4821
0
          case OP_WORDCHAR:
4822
0
          for (i = Lmin; i < Lmax; i++)
4823
0
            {
4824
0
            int len = 1;
4825
0
            if (Feptr >= mb->end_subject)
4826
0
              {
4827
0
              SCHECK_PARTIAL();
4828
0
              break;
4829
0
              }
4830
0
            GETCHARLEN(fc, Feptr, len);
4831
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
4832
0
            Feptr+= len;
4833
0
            }
4834
0
          break;
4835
4836
0
          default:
4837
0
          PCRE2_DEBUG_UNREACHABLE();
4838
0
          return PCRE2_ERROR_INTERNAL;
4839
37.8M
          }
4840
4841
37.8M
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4842
4843
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4844
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
4845
        too far. */
4846
4847
3.65M
        for(;;)
4848
254M
          {
4849
254M
          if (Feptr <= Lstart_eptr) break;
4850
250M
          RMATCH(Fecode, RM220);
4851
250M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4852
250M
          Feptr--;
4853
250M
          BACKCHAR(Feptr);
4854
250M
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
4855
0
              UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
4856
0
            Feptr--;
4857
250M
          }
4858
3.65M
        }
4859
11.5M
      else
4860
11.5M
#endif  /* SUPPORT_UNICODE */
4861
4862
      /* Not UTF mode */
4863
11.5M
        {
4864
11.5M
        switch(Lctype)
4865
11.5M
          {
4866
1.31M
          case OP_ANY:
4867
8.57M
          for (i = Lmin; i < Lmax; i++)
4868
7.33M
            {
4869
7.33M
            if (Feptr >= mb->end_subject)
4870
19.3k
              {
4871
19.3k
              SCHECK_PARTIAL();
4872
19.3k
              break;
4873
19.3k
              }
4874
7.31M
            if (IS_NEWLINE(Feptr)) break;
4875
7.25M
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4876
0
                Feptr + 1 >= mb->end_subject &&
4877
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
4878
0
                NLBLOCK->nllen == 2 &&
4879
0
                *Feptr == NLBLOCK->nl[0])
4880
0
              {
4881
0
              mb->hitend = TRUE;
4882
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4883
0
              }
4884
7.25M
            Feptr++;
4885
7.25M
            }
4886
1.31M
          break;
4887
4888
8.83M
          case OP_ALLANY:
4889
8.83M
          case OP_ANYBYTE:
4890
8.83M
          fc = Lmax - Lmin;
4891
8.83M
          if (fc > (uint32_t)(mb->end_subject - Feptr))
4892
43.9k
            {
4893
43.9k
            Feptr = mb->end_subject;
4894
43.9k
            SCHECK_PARTIAL();
4895
43.9k
            }
4896
8.78M
          else Feptr += fc;
4897
8.83M
          break;
4898
4899
8.83M
          case OP_ANYNL:
4900
100k
          for (i = Lmin; i < Lmax; i++)
4901
98.0k
            {
4902
98.0k
            if (Feptr >= mb->end_subject)
4903
6.39k
              {
4904
6.39k
              SCHECK_PARTIAL();
4905
6.39k
              break;
4906
6.39k
              }
4907
91.6k
            fc = *Feptr;
4908
91.6k
            if (fc == CHAR_CR)
4909
6.34k
              {
4910
6.34k
              if (++Feptr >= mb->end_subject) break;
4911
942
              if (*Feptr == CHAR_LF) Feptr++;
4912
942
              }
4913
85.3k
            else
4914
85.3k
              {
4915
85.3k
              if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4916
83.4k
                 (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4917
#if PCRE2_CODE_UNIT_WIDTH != 8
4918
                 && fc != 0x2028 && fc != 0x2029
4919
#endif
4920
83.4k
                 ))) break;
4921
18.9k
              Feptr++;
4922
18.9k
              }
4923
91.6k
            }
4924
80.1k
          break;
4925
4926
80.1k
          case OP_NOT_HSPACE:
4927
523k
          for (i = Lmin; i < Lmax; i++)
4928
523k
            {
4929
523k
            if (Feptr >= mb->end_subject)
4930
1.25k
              {
4931
1.25k
              SCHECK_PARTIAL();
4932
1.25k
              break;
4933
1.25k
              }
4934
522k
            switch(*Feptr)
4935
522k
              {
4936
516k
              default: Feptr++; break;
4937
11.9k
              HSPACE_BYTE_CASES:
4938
#if PCRE2_CODE_UNIT_WIDTH != 8
4939
              HSPACE_MULTIBYTE_CASES:
4940
#endif
4941
11.9k
              goto ENDLOOP00;
4942
522k
              }
4943
522k
            }
4944
7.18k
          ENDLOOP00:
4945
7.18k
          break;
4946
4947
120k
          case OP_HSPACE:
4948
130k
          for (i = Lmin; i < Lmax; i++)
4949
126k
            {
4950
126k
            if (Feptr >= mb->end_subject)
4951
536
              {
4952
536
              SCHECK_PARTIAL();
4953
536
              break;
4954
536
              }
4955
126k
            switch(*Feptr)
4956
126k
              {
4957
116k
              default: goto ENDLOOP01;
4958
116k
              HSPACE_BYTE_CASES:
4959
#if PCRE2_CODE_UNIT_WIDTH != 8
4960
              HSPACE_MULTIBYTE_CASES:
4961
#endif
4962
26.1k
              Feptr++; break;
4963
126k
              }
4964
126k
            }
4965
120k
          ENDLOOP01:
4966
120k
          break;
4967
4968
898k
          case OP_NOT_VSPACE:
4969
33.7M
          for (i = Lmin; i < Lmax; i++)
4970
33.7M
            {
4971
33.7M
            if (Feptr >= mb->end_subject)
4972
503
              {
4973
503
              SCHECK_PARTIAL();
4974
503
              break;
4975
503
              }
4976
33.7M
            switch(*Feptr)
4977
33.7M
              {
4978
32.8M
              default: Feptr++; break;
4979
3.29M
              VSPACE_BYTE_CASES:
4980
#if PCRE2_CODE_UNIT_WIDTH != 8
4981
              VSPACE_MULTIBYTE_CASES:
4982
#endif
4983
3.29M
              goto ENDLOOP02;
4984
33.7M
              }
4985
33.7M
            }
4986
898k
          ENDLOOP02:
4987
898k
          break;
4988
4989
555
          case OP_VSPACE:
4990
8
          for (i = Lmin; i < Lmax; i++)
4991
8
            {
4992
8
            if (Feptr >= mb->end_subject)
4993
0
              {
4994
0
              SCHECK_PARTIAL();
4995
0
              break;
4996
0
              }
4997
8
            switch(*Feptr)
4998
8
              {
4999
8
              default: goto ENDLOOP03;
5000
8
              VSPACE_BYTE_CASES:
5001
#if PCRE2_CODE_UNIT_WIDTH != 8
5002
              VSPACE_MULTIBYTE_CASES:
5003
#endif
5004
0
              Feptr++; break;
5005
8
              }
5006
8
            }
5007
8
          ENDLOOP03:
5008
8
          break;
5009
5010
22
          case OP_NOT_DIGIT:
5011
157
          for (i = Lmin; i < Lmax; i++)
5012
157
            {
5013
157
            if (Feptr >= mb->end_subject)
5014
6
              {
5015
6
              SCHECK_PARTIAL();
5016
6
              break;
5017
6
              }
5018
151
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
5019
16
              break;
5020
135
            Feptr++;
5021
135
            }
5022
22
          break;
5023
5024
11.4k
          case OP_DIGIT:
5025
15.5k
          for (i = Lmin; i < Lmax; i++)
5026
15.0k
            {
5027
15.0k
            if (Feptr >= mb->end_subject)
5028
278
              {
5029
278
              SCHECK_PARTIAL();
5030
278
              break;
5031
278
              }
5032
14.7k
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
5033
10.7k
              break;
5034
4.08k
            Feptr++;
5035
4.08k
            }
5036
11.4k
          break;
5037
5038
224k
          case OP_NOT_WHITESPACE:
5039
8.86M
          for (i = Lmin; i < Lmax; i++)
5040
8.86M
            {
5041
8.86M
            if (Feptr >= mb->end_subject)
5042
43.8k
              {
5043
43.8k
              SCHECK_PARTIAL();
5044
43.8k
              break;
5045
43.8k
              }
5046
8.82M
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
5047
179k
              break;
5048
8.64M
            Feptr++;
5049
8.64M
            }
5050
224k
          break;
5051
5052
224k
          case OP_WHITESPACE:
5053
11.5k
          for (i = Lmin; i < Lmax; i++)
5054
9.79k
            {
5055
9.79k
            if (Feptr >= mb->end_subject)
5056
10
              {
5057
10
              SCHECK_PARTIAL();
5058
10
              break;
5059
10
              }
5060
9.78k
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
5061
8.02k
              break;
5062
1.75k
            Feptr++;
5063
1.75k
            }
5064
9.76k
          break;
5065
5066
9.76k
          case OP_NOT_WORDCHAR:
5067
124k
          for (i = Lmin; i < Lmax; i++)
5068
124k
            {
5069
124k
            if (Feptr >= mb->end_subject)
5070
1.27k
              {
5071
1.27k
              SCHECK_PARTIAL();
5072
1.27k
              break;
5073
1.27k
              }
5074
122k
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
5075
3.07k
              break;
5076
119k
            Feptr++;
5077
119k
            }
5078
4.39k
          break;
5079
5080
81.1k
          case OP_WORDCHAR:
5081
172k
          for (i = Lmin; i < Lmax; i++)
5082
139k
            {
5083
139k
            if (Feptr >= mb->end_subject)
5084
31
              {
5085
31
              SCHECK_PARTIAL();
5086
31
              break;
5087
31
              }
5088
139k
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
5089
48.5k
              break;
5090
90.9k
            Feptr++;
5091
90.9k
            }
5092
81.1k
          break;
5093
5094
81.1k
          default:
5095
0
          PCRE2_DEBUG_UNREACHABLE();
5096
0
          return PCRE2_ERROR_INTERNAL;
5097
11.5M
          }
5098
5099
11.5M
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
5100
5101
11.4M
        for (;;)
5102
69.5M
          {
5103
69.5M
          if (Feptr == Lstart_eptr) break;
5104
58.0M
          RMATCH(Fecode, RM34);
5105
58.0M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5106
58.0M
          Feptr--;
5107
58.0M
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
5108
48
              Feptr[-1] == CHAR_CR) Feptr--;
5109
58.0M
          }
5110
11.4M
        }
5111
50.6M
      }
5112
15.7M
    break;  /* End of repeat character type processing */
5113
5114
15.7M
#undef Lstart_eptr
5115
15.7M
#undef Lmin
5116
15.7M
#undef Lmax
5117
15.7M
#undef Lctype
5118
15.7M
#undef Lpropvalue
5119
5120
5121
    /* ===================================================================== */
5122
    /* Match a back reference, possibly repeatedly. Look past the end of the
5123
    item to see if there is repeat information following. The OP_REF and
5124
    OP_REFI opcodes are used for a reference to a numbered group or to a
5125
    non-duplicated named group. For a duplicated named group, OP_DNREF and
5126
    OP_DNREFI are used. In this case we must scan the list of groups to which
5127
    the name refers, and use the first one that is set. */
5128
5129
15.7M
#define Lmin      F->temp_32[0]
5130
15.7M
#define Lmax      F->temp_32[1]
5131
15.7M
#define Lcaseless F->temp_32[2]
5132
15.7M
#define Lcaseopts F->temp_32[3]
5133
15.7M
#define Lstart    F->temp_sptr[0]
5134
15.7M
#define Loffset   F->temp_size
5135
5136
15.7M
    case OP_DNREF:
5137
0
    case OP_DNREFI:
5138
0
    Lcaseless = (Fop == OP_DNREFI);
5139
0
    Lcaseopts = (Fop == OP_DNREFI)? Fecode[1 + 2*IMM2_SIZE] : 0;
5140
0
      {
5141
0
      int count = GET2(Fecode, 1+IMM2_SIZE);
5142
0
      PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5143
0
      Fecode += 1 + 2*IMM2_SIZE + (Fop == OP_DNREFI? 1 : 0);
5144
5145
0
      while (count-- > 0)
5146
0
        {
5147
0
        Loffset = (GET2(slot, 0) << 1) - 2;
5148
0
        if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
5149
0
        slot += mb->name_entry_size;
5150
0
        }
5151
0
      }
5152
0
    goto REF_REPEAT;
5153
5154
15
    case OP_REF:
5155
15
    case OP_REFI:
5156
15
    Lcaseless = (Fop == OP_REFI);
5157
15
    Lcaseopts = (Fop == OP_REFI)? Fecode[1 + IMM2_SIZE] : 0;
5158
15
    Loffset = (GET2(Fecode, 1) << 1) - 2;
5159
15
    Fecode += 1 + IMM2_SIZE + (Fop == OP_REFI? 1 : 0);
5160
5161
    /* Set up for repetition, or handle the non-repeated case. The maximum and
5162
    minimum must be in the heap frame, but as they are short-term values, we
5163
    use temporary fields. */
5164
5165
15
    REF_REPEAT:
5166
15
    switch (*Fecode)
5167
15
      {
5168
0
      case OP_CRSTAR:
5169
0
      case OP_CRMINSTAR:
5170
15
      case OP_CRPLUS:
5171
15
      case OP_CRMINPLUS:
5172
15
      case OP_CRQUERY:
5173
15
      case OP_CRMINQUERY:
5174
15
      fc = *Fecode++ - OP_CRSTAR;
5175
15
      Lmin = rep_min[fc];
5176
15
      Lmax = rep_max[fc];
5177
15
      reptype = rep_typ[fc];
5178
15
      break;
5179
5180
0
      case OP_CRRANGE:
5181
0
      case OP_CRMINRANGE:
5182
0
      Lmin = GET2(Fecode, 1);
5183
0
      Lmax = GET2(Fecode, 1 + IMM2_SIZE);
5184
0
      reptype = rep_typ[*Fecode - OP_CRSTAR];
5185
0
      if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
5186
0
      Fecode += 1 + 2 * IMM2_SIZE;
5187
0
      break;
5188
5189
0
      default:                  /* No repeat follows */
5190
0
        {
5191
0
        rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &length);
5192
0
        if (rrc != 0)
5193
0
          {
5194
0
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5195
0
          CHECK_PARTIAL();
5196
0
          RRETURN(MATCH_NOMATCH);
5197
0
          }
5198
0
        }
5199
0
      Feptr += length;
5200
0
      continue;              /* With the main loop */
5201
15
      }
5202
5203
    /* Handle repeated back references. If a set group has length zero, just
5204
    continue with the main loop, because it matches however many times. For an
5205
    unset reference, if the minimum is zero, we can also just continue. We can
5206
    also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
5207
    group behave as a zero-length group. For any other unset cases, carrying
5208
    on will result in NOMATCH. */
5209
5210
15
    if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
5211
0
      {
5212
0
      if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
5213
0
      }
5214
15
    else  /* Group is not set */
5215
15
      {
5216
15
      if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
5217
0
        continue;
5218
15
      }
5219
5220
    /* First, ensure the minimum number of matches are present. */
5221
5222
15
    for (i = 1; i <= Lmin; i++)
5223
15
      {
5224
15
      PCRE2_SIZE slength;
5225
15
      rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5226
15
      if (rrc != 0)
5227
15
        {
5228
15
        if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5229
15
        CHECK_PARTIAL();
5230
15
        RRETURN(MATCH_NOMATCH);
5231
15
        }
5232
0
      Feptr += slength;
5233
0
      }
5234
5235
    /* If min = max, we are done. They are not both allowed to be zero. */
5236
5237
0
    if (Lmin == Lmax) continue;
5238
5239
    /* If minimizing, keep trying and advancing the pointer. */
5240
5241
0
    if (reptype == REPTYPE_MIN)
5242
0
      {
5243
0
      for (;;)
5244
0
        {
5245
0
        PCRE2_SIZE slength;
5246
0
        RMATCH(Fecode, RM20);
5247
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5248
0
        if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
5249
0
        rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5250
0
        if (rrc != 0)
5251
0
          {
5252
0
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5253
0
          CHECK_PARTIAL();
5254
0
          RRETURN(MATCH_NOMATCH);
5255
0
          }
5256
0
        Feptr += slength;
5257
0
        }
5258
5259
0
      PCRE2_UNREACHABLE(); /* Control never reaches here */
5260
0
      }
5261
5262
    /* If maximizing, find the longest string and work backwards, as long as
5263
    the matched lengths for each iteration are the same. */
5264
5265
0
    else
5266
0
      {
5267
0
      BOOL samelengths = TRUE;
5268
0
      Lstart = Feptr;     /* Starting position */
5269
0
      Flength = Fovector[Loffset+1] - Fovector[Loffset];
5270
5271
0
      for (i = Lmin; i < Lmax; i++)
5272
0
        {
5273
0
        PCRE2_SIZE slength;
5274
0
        rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5275
0
        if (rrc != 0)
5276
0
          {
5277
          /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
5278
          the soft partial matching case. */
5279
5280
0
          if (rrc > 0 && mb->partial != 0 &&
5281
0
              mb->end_subject > mb->start_used_ptr)
5282
0
            {
5283
0
            mb->hitend = TRUE;
5284
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5285
0
            }
5286
0
          break;
5287
0
          }
5288
5289
0
        if (slength != Flength) samelengths = FALSE;
5290
0
        Feptr += slength;
5291
0
        }
5292
5293
      /* If the length matched for each repetition is the same as the length of
5294
      the captured group, we can easily work backwards. This is the normal
5295
      case. However, in caseless UTF-8 mode there are pairs of case-equivalent
5296
      characters whose lengths (in terms of code units) differ. However, this
5297
      is very rare, so we handle it by re-matching fewer and fewer times. */
5298
5299
0
      if (samelengths)
5300
0
        {
5301
0
        while (Feptr >= Lstart)
5302
0
          {
5303
0
          RMATCH(Fecode, RM21);
5304
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5305
0
          Feptr -= Flength;
5306
0
          }
5307
0
        }
5308
5309
      /* The rare case of non-matching lengths. Re-scan the repetition for each
5310
      iteration. We know that match_ref() will succeed every time. */
5311
5312
0
      else
5313
0
        {
5314
0
        Lmax = i;
5315
0
        for (;;)
5316
0
          {
5317
0
          RMATCH(Fecode, RM22);
5318
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5319
0
          if (Feptr == Lstart) break; /* Failed after minimal repetition */
5320
0
          Feptr = Lstart;
5321
0
          Lmax--;
5322
0
          for (i = Lmin; i < Lmax; i++)
5323
0
            {
5324
0
            PCRE2_SIZE slength;
5325
0
            (void)match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5326
0
            Feptr += slength;
5327
0
            }
5328
0
          }
5329
0
        }
5330
5331
0
      RRETURN(MATCH_NOMATCH);
5332
0
      }
5333
5334
0
    PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
5335
5336
0
#undef Lcaseless
5337
0
#undef Lmin
5338
0
#undef Lmax
5339
0
#undef Lstart
5340
0
#undef Loffset
5341
5342
5343
5344
/* ========================================================================= */
5345
/*           Opcodes for the start of various parenthesized items            */
5346
/* ========================================================================= */
5347
5348
    /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
5349
    (*THEN) is within the current branch by comparing the address of OP_THEN
5350
    that is passed back with the end of the branch. If (*THEN) is within the
5351
    current branch, and the branch is one of two or more alternatives (it
5352
    either starts or ends with OP_ALT), we have reached the limit of THEN's
5353
    action, so convert the return code to NOMATCH, which will cause normal
5354
    backtracking to happen from now on. Otherwise, THEN is passed back to an
5355
    outer alternative. This implements Perl's treatment of parenthesized
5356
    groups, where a group not containing | does not affect the current
5357
    alternative, that is, (X) is NOT the same as (X|(*F)). */
5358
5359
5360
    /* ===================================================================== */
5361
    /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
5362
    bracket group, indicating that it may occur zero times. It may repeat
5363
    infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
5364
    the pattern. Brackets with fixed upper repeat limits are compiled as a
5365
    number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
5366
    Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
5367
5368
4
#define Lnext_ecode F->temp_sptr[0]
5369
5370
1
    case OP_BRAZERO:
5371
1
    Lnext_ecode = Fecode + 1;
5372
1
    RMATCH(Lnext_ecode, RM9);
5373
1
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5374
1
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5375
1
    Fecode = Lnext_ecode + 1 + LINK_SIZE;
5376
1
    break;
5377
5378
0
    case OP_BRAMINZERO:
5379
0
    Lnext_ecode = Fecode + 1;
5380
0
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5381
0
    RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
5382
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5383
0
    Fecode++;
5384
0
    break;
5385
5386
0
#undef Lnext_ecode
5387
5388
0
    case OP_SKIPZERO:
5389
0
    Fecode++;
5390
0
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5391
0
    Fecode += 1 + LINK_SIZE;
5392
0
    break;
5393
5394
5395
    /* ===================================================================== */
5396
    /* Handle possessive brackets with an unlimited repeat. The end of these
5397
    brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
5398
    going further in the pattern. */
5399
5400
2.52k
#define Lframe_type    F->temp_32[0]
5401
4.37k
#define Lmatched_once  F->temp_32[1]
5402
1.36k
#define Lzero_allowed  F->temp_32[2]
5403
2.31k
#define Lstart_eptr    F->temp_sptr[0]
5404
1.20k
#define Lstart_group   F->temp_sptr[1]
5405
5406
0
    case OP_BRAPOSZERO:
5407
0
    Lzero_allowed = TRUE;                /* Zero repeat is allowed */
5408
0
    Fecode += 1;
5409
0
    if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
5410
0
      goto POSSESSIVE_CAPTURE;
5411
0
    goto POSSESSIVE_NON_CAPTURE;
5412
5413
0
    case OP_BRAPOS:
5414
0
    case OP_SBRAPOS:
5415
0
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5416
5417
0
    POSSESSIVE_NON_CAPTURE:
5418
0
    Lframe_type = GF_NOCAPTURE;          /* Remembered frame type */
5419
0
    goto POSSESSIVE_GROUP;
5420
5421
272
    case OP_CBRAPOS:
5422
1.14k
    case OP_SCBRAPOS:
5423
1.14k
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5424
5425
1.14k
    POSSESSIVE_CAPTURE:
5426
1.14k
    number = GET2(Fecode, 1+LINK_SIZE);
5427
1.14k
    Lframe_type = GF_CAPTURE | number;   /* Remembered frame type */
5428
5429
1.14k
    POSSESSIVE_GROUP:
5430
1.14k
    Lmatched_once = FALSE;               /* Never matched */
5431
1.14k
    Lstart_group = Fecode;               /* Start of this group */
5432
5433
1.14k
    for (;;)
5434
1.37k
      {
5435
1.37k
      Lstart_eptr = Feptr;               /* Position at group start */
5436
1.37k
      group_frame_type = Lframe_type;
5437
1.37k
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
5438
1.37k
      if (rrc == MATCH_KETRPOS)
5439
931
        {
5440
931
        Lmatched_once = TRUE;            /* Matched at least once */
5441
931
        if (Feptr == Lstart_eptr)        /* Empty match; skip to end */
5442
875
          {
5443
875
          do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5444
875
          break;
5445
875
          }
5446
5447
56
        Fecode = Lstart_group;
5448
56
        continue;
5449
931
        }
5450
5451
      /* See comment above about handling THEN. */
5452
5453
448
      if (rrc == MATCH_THEN)
5454
0
        {
5455
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5456
0
        if (mb->verb_ecode_ptr < next_ecode &&
5457
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5458
0
          rrc = MATCH_NOMATCH;
5459
0
        }
5460
5461
448
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5462
448
      Fecode += GET(Fecode, 1);
5463
448
      if (*Fecode != OP_ALT) break;
5464
448
      }
5465
5466
    /* Success if matched something or zero repeat allowed */
5467
5468
1.14k
    if (Lmatched_once || Lzero_allowed)
5469
931
      {
5470
931
      Fecode += 1 + LINK_SIZE;
5471
931
      break;
5472
931
      }
5473
5474
216
    RRETURN(MATCH_NOMATCH);
5475
5476
0
#undef Lmatched_once
5477
0
#undef Lzero_allowed
5478
0
#undef Lframe_type
5479
0
#undef Lstart_eptr
5480
0
#undef Lstart_group
5481
5482
5483
    /* ===================================================================== */
5484
    /* Handle non-capturing brackets that cannot match an empty string. When we
5485
    get to the final alternative within the brackets, as long as there are no
5486
    THEN's in the pattern, we can optimize by not recording a new backtracking
5487
    point. (Ideally we should test for a THEN within this group, but we don't
5488
    have that information.) Don't do this if we are at the very top level,
5489
    however, because that would make handling assertions and once-only brackets
5490
    messier when there is nothing to go back to. */
5491
5492
1.02M
#define Lframe_type F->temp_32[0]     /* Set for all that use GROUPLOOP */
5493
74
#define Lnext_branch F->temp_sptr[0]  /* Used only in OP_BRA handling */
5494
5495
239k
    case OP_BRA:
5496
239k
    if (mb->hasthen || Frdepth == 0)
5497
239k
      {
5498
239k
      Lframe_type = 0;
5499
239k
      goto GROUPLOOP;
5500
239k
      }
5501
5502
37
    for (;;)
5503
37
      {
5504
37
      Lnext_branch = Fecode + GET(Fecode, 1);
5505
37
      if (*Lnext_branch != OP_ALT) break;
5506
5507
      /* This is never the final branch. We do not need to test for MATCH_THEN
5508
      here because this code is not used when there is a THEN in the pattern. */
5509
5510
0
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
5511
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5512
0
      Fecode = Lnext_branch;
5513
0
      }
5514
5515
    /* Hit the start of the final branch. Continue at this level. */
5516
5517
37
    Fecode += PRIV(OP_lengths)[*Fecode];
5518
37
    break;
5519
5520
0
#undef Lnext_branch
5521
5522
5523
    /* ===================================================================== */
5524
    /* Handle a capturing bracket, other than those that are possessive with an
5525
    unlimited repeat. */
5526
5527
5.35k
    case OP_CBRA:
5528
6.59k
    case OP_SCBRA:
5529
6.59k
    Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
5530
6.59k
    goto GROUPLOOP;
5531
5532
5533
    /* ===================================================================== */
5534
    /* Atomic groups and non-capturing brackets that can match an empty string
5535
    must record a backtracking point and also set up a chained frame. */
5536
5537
0
    case OP_ONCE:
5538
0
    case OP_SCRIPT_RUN:
5539
0
    case OP_SBRA:
5540
0
    Lframe_type = GF_NOCAPTURE | Fop;
5541
5542
245k
    GROUPLOOP:
5543
245k
    for (;;)
5544
780k
      {
5545
780k
      group_frame_type = Lframe_type;
5546
780k
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
5547
778k
      if (rrc == MATCH_THEN)
5548
0
        {
5549
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5550
0
        if (mb->verb_ecode_ptr < next_ecode &&
5551
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5552
0
          rrc = MATCH_NOMATCH;
5553
0
        }
5554
778k
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5555
778k
      Fecode += GET(Fecode, 1);
5556
778k
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5557
778k
      }
5558
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
5559
5560
0
#undef Lframe_type
5561
5562
5563
    /* ===================================================================== */
5564
    /* Pattern recursion either matches the current regex, or some
5565
    subexpression. The offset data is the offset to the starting bracket from
5566
    the start of the whole pattern. This is so that it works from duplicated
5567
    subpatterns. For a whole-pattern recursion, we have to infer the number
5568
    zero. */
5569
5570
0
#define Lframe_type F->temp_32[0]
5571
0
#define Lstart_branch F->temp_sptr[0]
5572
5573
0
    case OP_RECURSE:
5574
0
    bracode = mb->start_code + GET(Fecode, 1);
5575
0
    number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
5576
5577
    /* If we are already in a pattern recursion, check for repeating the same
5578
    one without changing the subject pointer or the last referenced character
5579
    in the subject. This should catch convoluted mutual recursions; some
5580
    simple cases are caught at compile time. However, there are rare cases when
5581
    this check needs to be turned off. In this case, actual recursion loops
5582
    will be caught by the match or heap limits. */
5583
5584
0
    if (Fcurrent_recurse != RECURSE_UNSET)
5585
0
      {
5586
0
      offset = Flast_group_offset;
5587
0
      while (offset != PCRE2_UNSET)
5588
0
        {
5589
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
5590
0
        P = (heapframe *)((char *)N - frame_size);
5591
0
        if (N->group_frame_type == (GF_RECURSE | number))
5592
0
          {
5593
0
          if (Feptr == P->eptr && mb->last_used_ptr == P->recurse_last_used &&
5594
0
               (mb->moptions & PCRE2_DISABLE_RECURSELOOP_CHECK) == 0)
5595
0
            return PCRE2_ERROR_RECURSELOOP;
5596
0
          break;
5597
0
          }
5598
0
        offset = P->last_group_offset;
5599
0
        }
5600
0
      }
5601
5602
    /* Remember the current last referenced character and then run the
5603
    recursion branch by branch. */
5604
5605
0
    F->recurse_last_used = mb->last_used_ptr;
5606
0
    Lstart_branch = bracode;
5607
0
    Lframe_type = GF_RECURSE | number;
5608
5609
0
    for (;;)
5610
0
      {
5611
0
      PCRE2_SPTR next_ecode;
5612
5613
0
      group_frame_type = Lframe_type;
5614
0
      RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
5615
0
      next_ecode = Lstart_branch + GET(Lstart_branch,1);
5616
5617
      /* Handle backtracking verbs, which are defined in a range that can
5618
      easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
5619
      escape beyond a recursion; they cause a NOMATCH for the entire recursion.
5620
5621
      When one of these verbs triggers, the current recursion group number is
5622
      recorded. If it matches the recursion we are processing, the verb
5623
      happened within the recursion and we must deal with it. Otherwise it must
5624
      have happened after the recursion completed, and so has to be passed
5625
      back. See comment above about handling THEN. */
5626
5627
0
      if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
5628
0
          mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
5629
0
        {
5630
0
        if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
5631
0
            (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
5632
0
          rrc = MATCH_NOMATCH;
5633
0
        else RRETURN(MATCH_NOMATCH);
5634
0
        }
5635
5636
      /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
5637
      OP_ACCEPT code. Nothing needs to be done here. */
5638
5639
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5640
0
      Lstart_branch = next_ecode;
5641
0
      if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
5642
0
      }
5643
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
5644
5645
0
#undef Lframe_type
5646
0
#undef Lstart_branch
5647
5648
5649
    /* ===================================================================== */
5650
    /* Positive assertions are like other groups except that PCRE doesn't allow
5651
    the effect of (*THEN) to escape beyond an assertion; it is therefore
5652
    treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
5653
    captures and mark retained. Any other return is an error. */
5654
5655
36
#define Lframe_type  F->temp_32[0]
5656
5657
0
    case OP_ASSERT:
5658
0
    case OP_ASSERTBACK:
5659
18
    case OP_ASSERT_NA:
5660
18
    case OP_ASSERTBACK_NA:
5661
18
    Lframe_type = GF_NOCAPTURE | Fop;
5662
18
    for (;;)
5663
18
      {
5664
18
      group_frame_type = Lframe_type;
5665
18
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
5666
18
      if (rrc == MATCH_ACCEPT)
5667
0
        {
5668
0
        memcpy(Fovector,
5669
0
              (char *)assert_accept_frame + offsetof(heapframe, ovector),
5670
0
              assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5671
0
        Foffset_top = assert_accept_frame->offset_top;
5672
0
        Fmark = assert_accept_frame->mark;
5673
0
        break;
5674
0
        }
5675
18
      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
5676
18
      Fecode += GET(Fecode, 1);
5677
18
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5678
18
      }
5679
5680
0
    do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5681
0
    Fecode += 1 + LINK_SIZE;
5682
0
    break;
5683
5684
0
#undef Lframe_type
5685
5686
5687
    /* ===================================================================== */
5688
    /* Handle negative assertions. Loop for each non-matching branch as for
5689
    positive assertions. */
5690
5691
0
#define Lframe_type  F->temp_32[0]
5692
5693
0
    case OP_ASSERT_NOT:
5694
0
    case OP_ASSERTBACK_NOT:
5695
0
    Lframe_type  = GF_NOCAPTURE | Fop;
5696
5697
0
    for (;;)
5698
0
      {
5699
0
      group_frame_type = Lframe_type;
5700
0
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
5701
0
      switch(rrc)
5702
0
        {
5703
0
        case MATCH_ACCEPT:   /* Assertion matched, therefore it fails. */
5704
0
        case MATCH_MATCH:
5705
0
        RRETURN (MATCH_NOMATCH);
5706
5707
0
        case MATCH_NOMATCH:  /* Branch failed, try next if present. */
5708
0
        case MATCH_THEN:
5709
0
        Fecode += GET(Fecode, 1);
5710
0
        if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
5711
0
        break;
5712
5713
0
        case MATCH_COMMIT:   /* Assertion forced to fail, therefore continue. */
5714
0
        case MATCH_SKIP:
5715
0
        case MATCH_PRUNE:
5716
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5717
0
        goto ASSERT_NOT_FAILED;
5718
5719
0
        default:             /* Pass back any other return */
5720
0
        RRETURN(rrc);
5721
0
        }
5722
0
      }
5723
5724
    /* None of the branches have matched or there was a backtrack to (*COMMIT),
5725
    (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
5726
    negative assertion, so carry on. */
5727
5728
0
    ASSERT_NOT_FAILED:
5729
0
    Fecode += 1 + LINK_SIZE;
5730
0
    break;
5731
5732
0
#undef Lframe_type
5733
5734
    /* ===================================================================== */
5735
    /* Handle scan substring operation. */
5736
5737
0
#define Lframe_type          F->temp_32[0]
5738
0
#define Lextra_size          F->temp_32[1]
5739
0
#define Lsaved_moptions      F->temp_32[2]
5740
0
#define Lsaved_end_subject   F->temp_sptr[0]
5741
0
#define Lsaved_eptr          F->temp_sptr[1]
5742
0
#define Ltrue_end_extra      F->temp_size
5743
5744
0
    case OP_ASSERT_SCS:
5745
0
      {
5746
0
      PCRE2_SPTR ecode = Fecode + 1 + LINK_SIZE;
5747
0
      uint32_t extra_size = 0;
5748
0
      int count;
5749
0
      PCRE2_SPTR slot;
5750
5751
      /* Disable compiler warning. */
5752
0
      offset = 0;
5753
0
      (void)offset;
5754
5755
0
      for (;;)
5756
0
        {
5757
0
        if (*ecode == OP_CREF)
5758
0
          {
5759
0
          extra_size += 1+IMM2_SIZE;
5760
0
          offset = (GET2(ecode, 1) << 1) - 2;
5761
0
          ecode += 1+IMM2_SIZE;
5762
0
          if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET)
5763
0
            goto SCS_OFFSET_FOUND;
5764
0
          continue;
5765
0
          }
5766
5767
0
        if (*ecode != OP_DNCREF) RRETURN(MATCH_NOMATCH);
5768
5769
0
        count = GET2(ecode, 1 + IMM2_SIZE);
5770
0
        slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size;
5771
0
        extra_size += 1+2*IMM2_SIZE;
5772
0
        ecode += 1+2*IMM2_SIZE;
5773
5774
0
        while (count > 0)
5775
0
          {
5776
0
          offset = (GET2(slot, 0) << 1) - 2;
5777
0
          if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET)
5778
0
            goto SCS_OFFSET_FOUND;
5779
0
          slot += mb->name_entry_size;
5780
0
          count--;
5781
0
          }
5782
0
        }
5783
5784
0
      SCS_OFFSET_FOUND:
5785
5786
      /* Skip remaining options. */
5787
0
      for (;;)
5788
0
        {
5789
0
        if (*ecode == OP_CREF)
5790
0
          {
5791
0
          extra_size += 1+IMM2_SIZE;
5792
0
          ecode += 1+IMM2_SIZE;
5793
0
          }
5794
0
        else if (*ecode == OP_DNCREF)
5795
0
          {
5796
0
          extra_size += 1+2*IMM2_SIZE;
5797
0
          ecode += 1+2*IMM2_SIZE;
5798
0
          }
5799
0
        else break;
5800
0
        }
5801
5802
0
      Lextra_size = extra_size;
5803
0
      }
5804
5805
0
    Lsaved_end_subject = mb->end_subject;
5806
0
    Ltrue_end_extra = mb->true_end_subject - mb->end_subject;
5807
0
    Lsaved_eptr = Feptr;
5808
0
    Lsaved_moptions = mb->moptions;
5809
5810
0
    Feptr = mb->start_subject + Fovector[offset];
5811
0
    mb->true_end_subject = mb->end_subject =
5812
0
      mb->start_subject + Fovector[offset + 1];
5813
0
    mb->moptions &= ~PCRE2_NOTEOL;
5814
5815
0
    Lframe_type = GF_NOCAPTURE | Fop;
5816
0
    for (;;)
5817
0
      {
5818
0
      group_frame_type = Lframe_type;
5819
0
      RMATCH(Fecode + 1 + LINK_SIZE + Lextra_size, RM38);
5820
0
      if (rrc == MATCH_ACCEPT)
5821
0
        {
5822
0
        memcpy(Fovector,
5823
0
              (char *)assert_accept_frame + offsetof(heapframe, ovector),
5824
0
              assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5825
0
        Foffset_top = assert_accept_frame->offset_top;
5826
0
        Fmark = assert_accept_frame->mark;
5827
0
        mb->end_subject = Lsaved_end_subject;
5828
0
        mb->true_end_subject = mb->end_subject + Ltrue_end_extra;
5829
0
        mb->moptions = Lsaved_moptions;
5830
0
        break;
5831
0
        }
5832
5833
0
      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
5834
0
        {
5835
0
        mb->end_subject = Lsaved_end_subject;
5836
0
        mb->true_end_subject = mb->end_subject + Ltrue_end_extra;
5837
0
        mb->moptions = Lsaved_moptions;
5838
0
        RRETURN(rrc);
5839
0
        }
5840
5841
0
      Fecode += GET(Fecode, 1);
5842
0
      if (*Fecode != OP_ALT)
5843
0
        {
5844
0
        mb->end_subject = Lsaved_end_subject;
5845
0
        mb->true_end_subject = mb->end_subject + Ltrue_end_extra;
5846
0
        mb->moptions = Lsaved_moptions;
5847
0
        RRETURN(MATCH_NOMATCH);
5848
0
        }
5849
0
      Lextra_size = 0;
5850
0
      }
5851
5852
0
    do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5853
0
    Fecode += 1 + LINK_SIZE;
5854
0
    Feptr = Lsaved_eptr;
5855
0
    break;
5856
5857
0
#undef Lframe_type
5858
0
#undef Lextra_size
5859
0
#undef Lsaved_end_subject
5860
0
#undef Lsaved_eptr
5861
0
#undef Ltrue_end_extra
5862
0
#undef Lsave_moptions
5863
5864
    /* ===================================================================== */
5865
    /* The callout item calls an external function, if one is provided, passing
5866
    details of the match so far. This is mainly for debugging, though the
5867
    function is able to force a failure. */
5868
5869
0
    case OP_CALLOUT:
5870
0
    case OP_CALLOUT_STR:
5871
0
    rrc = do_callout(F, mb, &length);
5872
0
    if (rrc > 0) RRETURN(MATCH_NOMATCH);
5873
0
    if (rrc < 0) RRETURN(rrc);
5874
0
    Fecode += length;
5875
0
    break;
5876
5877
5878
    /* ===================================================================== */
5879
    /* Conditional group: compilation checked that there are no more than two
5880
    branches. If the condition is false, skipping the first branch takes us
5881
    past the end of the item if there is only one branch, but that's exactly
5882
    what we want. */
5883
5884
0
    case OP_COND:
5885
0
    case OP_SCOND:
5886
5887
    /* The variable Flength will be added to Fecode when the condition is
5888
    false, to get to the second branch. Setting it to the offset to the ALT or
5889
    KET, then incrementing Fecode achieves this effect. However, if the second
5890
    branch is non-existent, we must point to the KET so that the end of the
5891
    group is correctly processed. We now have Fecode pointing to the condition
5892
    or callout. */
5893
5894
0
    Flength = GET(Fecode, 1);    /* Offset to the second branch */
5895
0
    if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
5896
0
    Fecode += 1 + LINK_SIZE;     /* From this opcode */
5897
5898
    /* Because of the way auto-callout works during compile, a callout item is
5899
    inserted between OP_COND and an assertion condition. Such a callout can
5900
    also be inserted manually. */
5901
5902
0
    if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
5903
0
      {
5904
0
      rrc = do_callout(F, mb, &length);
5905
0
      if (rrc > 0) RRETURN(MATCH_NOMATCH);
5906
0
      if (rrc < 0) RRETURN(rrc);
5907
5908
      /* Advance Fecode past the callout, so it now points to the condition. We
5909
      must adjust Flength so that the value of Fecode+Flength is unchanged. */
5910
5911
0
      Fecode += length;
5912
0
      Flength -= length;
5913
0
      }
5914
5915
    /* Test the various possible conditions */
5916
5917
0
    condition = FALSE;
5918
0
    switch(*Fecode)
5919
0
      {
5920
0
      case OP_RREF:                  /* Group recursion test */
5921
0
      if (Fcurrent_recurse != RECURSE_UNSET)
5922
0
        {
5923
0
        number = GET2(Fecode, 1);
5924
0
        condition = (number == RREF_ANY || number == Fcurrent_recurse);
5925
0
        }
5926
0
      break;
5927
5928
0
      case OP_DNRREF:       /* Duplicate named group recursion test */
5929
0
      if (Fcurrent_recurse != RECURSE_UNSET)
5930
0
        {
5931
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
5932
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5933
0
        while (count-- > 0)
5934
0
          {
5935
0
          number = GET2(slot, 0);
5936
0
          condition = number == Fcurrent_recurse;
5937
0
          if (condition) break;
5938
0
          slot += mb->name_entry_size;
5939
0
          }
5940
0
        }
5941
0
      break;
5942
5943
0
      case OP_CREF:                         /* Numbered group used test */
5944
0
      offset = (GET2(Fecode, 1) << 1) - 2;  /* Doubled ref number */
5945
0
      condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5946
0
      break;
5947
5948
0
      case OP_DNCREF:      /* Duplicate named group used test */
5949
0
        {
5950
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
5951
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5952
0
        while (count-- > 0)
5953
0
          {
5954
0
          offset = (GET2(slot, 0) << 1) - 2;
5955
0
          condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5956
0
          if (condition) break;
5957
0
          slot += mb->name_entry_size;
5958
0
          }
5959
0
        }
5960
0
      break;
5961
5962
0
      case OP_FALSE:
5963
0
      case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
5964
0
      break;
5965
5966
0
      case OP_TRUE:
5967
0
      condition = TRUE;
5968
0
      break;
5969
5970
      /* The condition is an assertion. Run code similar to the assertion code
5971
      above. */
5972
5973
0
#define Lpositive      F->temp_32[0]
5974
0
#define Lstart_branch  F->temp_sptr[0]
5975
5976
0
      default:
5977
0
      Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
5978
0
      Lstart_branch = Fecode;
5979
5980
0
      for (;;)
5981
0
        {
5982
0
        group_frame_type = GF_CONDASSERT | *Fecode;
5983
0
        RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
5984
5985
0
        switch(rrc)
5986
0
          {
5987
0
          case MATCH_ACCEPT:  /* Save captures */
5988
0
          memcpy(Fovector,
5989
0
                (char *)assert_accept_frame + offsetof(heapframe, ovector),
5990
0
                assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5991
0
          Foffset_top = assert_accept_frame->offset_top;
5992
5993
          /* Fall through */
5994
          /* In the case of a match, the captures have already been put into
5995
          the current frame. */
5996
5997
0
          case MATCH_MATCH:
5998
0
          condition = Lpositive;   /* TRUE for positive assertion */
5999
0
          break;
6000
6001
          /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
6002
          assertion; it is therefore always treated as NOMATCH. */
6003
6004
0
          case MATCH_NOMATCH:
6005
0
          case MATCH_THEN:
6006
0
          Lstart_branch += GET(Lstart_branch, 1);
6007
0
          if (*Lstart_branch == OP_ALT) continue;  /* Try next branch */
6008
0
          condition = !Lpositive;  /* TRUE for negative assertion */
6009
0
          break;
6010
6011
          /* These force no match without checking other branches. */
6012
6013
0
          case MATCH_COMMIT:
6014
0
          case MATCH_SKIP:
6015
0
          case MATCH_PRUNE:
6016
0
          condition = !Lpositive;
6017
0
          break;
6018
6019
0
          default:
6020
0
          RRETURN(rrc);
6021
0
          }
6022
0
        break;  /* Out of the branch loop */
6023
0
        }
6024
6025
      /* If the condition is true, find the end of the assertion so that
6026
      advancing past it gets us to the start of the first branch. */
6027
6028
0
      if (condition)
6029
0
        {
6030
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
6031
0
        }
6032
0
      break;  /* End of assertion condition */
6033
0
      }
6034
6035
0
#undef Lpositive
6036
0
#undef Lstart_branch
6037
6038
    /* Choose branch according to the condition. */
6039
6040
0
    Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
6041
6042
    /* If the opcode is OP_SCOND it means we are at a repeated conditional
6043
    group that might match an empty string. We must therefore descend a level
6044
    so that the start is remembered for checking. For OP_COND we can just
6045
    continue at this level. */
6046
6047
0
    if (Fop == OP_SCOND)
6048
0
      {
6049
0
      group_frame_type  = GF_NOCAPTURE | Fop;
6050
0
      RMATCH(Fecode, RM35);
6051
0
      RRETURN(rrc);
6052
0
      }
6053
0
    break;
6054
6055
6056
6057
/* ========================================================================= */
6058
/*                  End of start of parenthesis opcodes                      */
6059
/* ========================================================================= */
6060
6061
6062
    /* ===================================================================== */
6063
    /* Move the subject pointer back by one fixed amount. This occurs at the
6064
    start of each branch that has a fixed length in a lookbehind assertion. If
6065
    we are too close to the start to move back, fail. When working with UTF-8
6066
    we move back a number of characters, not bytes. */
6067
6068
0
    case OP_REVERSE:
6069
0
    number = GET2(Fecode, 1);
6070
0
#ifdef SUPPORT_UNICODE
6071
0
    if (utf)
6072
0
      {
6073
      /* We used to do a simpler `while (number-- > 0)` but that triggers
6074
      clang's unsigned integer overflow sanitizer. */
6075
0
      while (number > 0)
6076
0
        {
6077
0
        --number;
6078
0
        if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
6079
0
        Feptr--;
6080
0
        BACKCHAR(Feptr);
6081
0
        }
6082
0
      }
6083
0
    else
6084
0
#endif
6085
6086
    /* No UTF support, or not in UTF mode: count is code unit count */
6087
6088
0
      {
6089
0
      if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
6090
0
      Feptr -= number;
6091
0
      }
6092
6093
    /* Save the earliest consulted character, then skip to next opcode */
6094
6095
0
    if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
6096
0
    Fecode += 1 + IMM2_SIZE;
6097
0
    break;
6098
6099
6100
    /* ===================================================================== */
6101
    /* Move the subject pointer back by a variable amount. This occurs at the
6102
    start of each branch of a lookbehind assertion when the branch has a
6103
    variable, but limited, length. A loop is needed to try matching the branch
6104
    after moving back different numbers of characters. If we are too close to
6105
    the start to move back even the minimum amount, fail. When working with
6106
    UTF-8 we move back a number of characters, not bytes. */
6107
6108
0
#define Lmin F->temp_32[0]
6109
0
#define Lmax F->temp_32[1]
6110
0
#define Leptr F->temp_sptr[0]
6111
6112
0
    case OP_VREVERSE:
6113
0
    Lmin = GET2(Fecode, 1);
6114
0
    Lmax = GET2(Fecode, 1 + IMM2_SIZE);
6115
0
    Leptr = Feptr;
6116
6117
    /* Move back by the maximum branch length and then work forwards. This
6118
    ensures that items such as \d{3,5} get the maximum length, which is
6119
    relevant for captures, and makes for Perl compatibility. */
6120
6121
0
#ifdef SUPPORT_UNICODE
6122
0
    if (utf)
6123
0
      {
6124
0
      for (i = 0; i < Lmax; i++)
6125
0
        {
6126
0
        if (Feptr == mb->start_subject)
6127
0
          {
6128
0
          if (i < Lmin) RRETURN(MATCH_NOMATCH);
6129
0
          Lmax = i;
6130
0
          break;
6131
0
          }
6132
0
        Feptr--;
6133
0
        BACKCHAR(Feptr);
6134
0
        }
6135
0
      }
6136
0
    else
6137
0
#endif
6138
6139
    /* No UTF support or not in UTF mode */
6140
6141
0
      {
6142
0
      ptrdiff_t diff = Feptr - mb->start_subject;
6143
0
      uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? (int)diff : 0);
6144
0
      if (Lmin > available) RRETURN(MATCH_NOMATCH);
6145
0
      if (Lmax > available) Lmax = available;
6146
0
      Feptr -= Lmax;
6147
0
      }
6148
6149
    /* Now try matching, moving forward one character on failure, until we
6150
    reach the minimum back length. */
6151
6152
0
    for (;;)
6153
0
      {
6154
0
      RMATCH(Fecode + 1 + 2 * IMM2_SIZE, RM37);
6155
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6156
0
      if (Lmax-- <= Lmin) RRETURN(MATCH_NOMATCH);
6157
0
      Feptr++;
6158
0
#ifdef SUPPORT_UNICODE
6159
0
      if (utf) { FORWARDCHARTEST(Feptr, mb->end_subject); }
6160
0
#endif
6161
0
      }
6162
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
6163
6164
0
#undef Lmin
6165
0
#undef Lmax
6166
0
#undef Leptr
6167
6168
    /* ===================================================================== */
6169
    /* An alternation is the end of a branch; scan along to find the end of the
6170
    bracketed group. */
6171
6172
2.23k
    case OP_ALT:
6173
2.23k
    branch_end = Fecode;
6174
74.8k
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
6175
2.23k
    break;
6176
6177
6178
    /* ===================================================================== */
6179
    /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
6180
    starting frame was added to the chained frames in order to remember the
6181
    starting subject position for the group. (Not true for OP_BRA when it's a
6182
    whole pattern recursion, but that is handled separately below.)*/
6183
6184
6.78k
    case OP_KET:
6185
6.78k
    case OP_KETRMIN:
6186
8.05k
    case OP_KETRMAX:
6187
8.98k
    case OP_KETRPOS:
6188
6189
8.98k
    bracode = Fecode - GET(Fecode, 1);
6190
6191
8.98k
    if (branch_end == NULL) branch_end = Fecode;
6192
8.98k
    branch_start = bracode;
6193
74.5k
    while (branch_start + GET(branch_start, 1) != branch_end)
6194
65.5k
      branch_start += GET(branch_start, 1);
6195
8.98k
    branch_end = NULL;
6196
6197
    /* Point N to the frame at the start of the most recent group, and P to its
6198
    predecessor. Remember the subject pointer at the start of the group. */
6199
6200
8.98k
    if (*bracode != OP_BRA && *bracode != OP_COND)
6201
6.38k
      {
6202
6.38k
      N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset);
6203
6.38k
      P = (heapframe *)((char *)N - frame_size);
6204
6.38k
      Flast_group_offset = P->last_group_offset;
6205
6206
#ifdef DEBUG_SHOW_RMATCH
6207
      fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
6208
        N->rdepth, N->group_frame_type,
6209
        (char *)P->eptr - (char *)mb->start_subject);
6210
#endif
6211
6212
      /* If we are at the end of an assertion that is a condition, first check
6213
      to see if we are at the end of a variable-length branch in a lookbehind.
6214
      If this is the case and we have not landed on the current character,
6215
      return no match. Compare code below for non-condition lookbehinds. In
6216
      other cases, return a match, discarding any intermediate backtracking
6217
      points. Copy back the mark setting and the captures into the frame before
6218
      N so that they are set on return. Doing this for all assertions, both
6219
      positive and negative, seems to match what Perl does. */
6220
6221
6.38k
      if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
6222
0
        {
6223
0
        if ((*bracode == OP_ASSERTBACK || *bracode == OP_ASSERTBACK_NOT) &&
6224
0
            branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6225
0
          RRETURN(MATCH_NOMATCH);
6226
0
        memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
6227
0
          Foffset_top * sizeof(PCRE2_SIZE));
6228
0
        P->offset_top = Foffset_top;
6229
0
        P->mark = Fmark;
6230
0
        Fback_frame = (char *)F - (char *)P;
6231
0
        RRETURN(MATCH_MATCH);
6232
0
        }
6233
6.38k
      }
6234
2.60k
    else P = NULL;   /* Indicates starting frame not recorded */
6235
6236
    /* The group was not a conditional assertion. */
6237
6238
8.98k
    switch (*bracode)
6239
8.98k
      {
6240
      /* Whole pattern recursion is handled as a recursion into group 0, but
6241
      the entire pattern is wrapped in OP_BRA/OP_KET rather than a capturing
6242
      group - a design mistake: it should perhaps have been capture group 0.
6243
      Anyway, that means the end of such recursion must be handled here. It is
6244
      detected by checking for an immediately following OP_END when we are
6245
      recursing in group 0. If this is not the end of a whole-pattern
6246
      recursion, there is nothing to be done. */
6247
6248
2.60k
      case OP_BRA:
6249
2.60k
      if (Fcurrent_recurse != 0 || Fecode[1+LINK_SIZE] != OP_END) break;
6250
6251
      /* It is the end of whole-pattern recursion. */
6252
6253
0
      offset = Flast_group_offset;
6254
6255
      /* Corrupted heapframes?. Trigger an assert and return an error */
6256
0
      PCRE2_ASSERT(offset != PCRE2_UNSET);
6257
0
      if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
6258
6259
0
      N = (heapframe *)((char *)match_data->heapframes + offset);
6260
0
      P = (heapframe *)((char *)N - frame_size);
6261
0
      Flast_group_offset = P->last_group_offset;
6262
6263
      /* Reinstate the previous set of captures and then carry on after the
6264
      recursion call. */
6265
6266
0
      memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
6267
0
        Foffset_top * sizeof(PCRE2_SIZE));
6268
0
      Foffset_top = P->offset_top;
6269
0
      Fcapture_last = P->capture_last;
6270
0
      Fcurrent_recurse = P->current_recurse;
6271
0
      Fecode = P->ecode + 1 + LINK_SIZE;
6272
0
      continue;  /* With next opcode */
6273
6274
0
      case OP_COND:     /* No need to do anything for these */
6275
0
      case OP_SCOND:
6276
0
      break;
6277
6278
      /* Non-atomic positive assertions are like OP_BRA, except that the
6279
      subject pointer must be put back to where it was at the start of the
6280
      assertion. For a variable lookbehind, check its end point. */
6281
6282
0
      case OP_ASSERTBACK_NA:
6283
0
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6284
0
        RRETURN(MATCH_NOMATCH);
6285
      /* Fall through */
6286
6287
0
      case OP_ASSERT_NA:
6288
0
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6289
0
      Feptr = P->eptr;
6290
0
      break;
6291
6292
      /* Atomic positive assertions are like OP_ONCE, except that in addition
6293
      the subject pointer must be put back to where it was at the start of the
6294
      assertion. For a variable lookbehind, check its end point. */
6295
6296
0
      case OP_ASSERTBACK:
6297
0
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6298
0
        RRETURN(MATCH_NOMATCH);
6299
      /* Fall through */
6300
6301
0
      case OP_ASSERT:
6302
0
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6303
0
      Feptr = P->eptr;
6304
      /* Fall through */
6305
6306
      /* For an atomic group, discard internal backtracking points. We must
6307
      also ensure that any remaining branches within the top-level of the group
6308
      are not tried. Do this by adjusting the code pointer within the backtrack
6309
      frame so that it points to the final branch. */
6310
6311
0
      case OP_ONCE:
6312
0
      Fback_frame = ((char *)F - (char *)P);
6313
0
      for (;;)
6314
0
        {
6315
0
        uint32_t y = GET(P->ecode,1);
6316
0
        if ((P->ecode)[y] != OP_ALT) break;
6317
0
        P->ecode += y;
6318
0
        }
6319
0
      break;
6320
6321
      /* A matching negative assertion returns MATCH, which is turned into
6322
      NOMATCH at the assertion level. For a variable lookbehind, check its end
6323
      point. */
6324
6325
0
      case OP_ASSERTBACK_NOT:
6326
0
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6327
0
        RRETURN(MATCH_NOMATCH);
6328
      /* Fall through */
6329
6330
0
      case OP_ASSERT_NOT:
6331
0
      RRETURN(MATCH_MATCH);
6332
6333
      /* A scan substring group must preserve the current end_subject,
6334
      and restore it before the backtracking is performed into its sub
6335
      pattern. */
6336
6337
0
      case OP_ASSERT_SCS:
6338
0
      F->temp_sptr[0] = mb->end_subject;
6339
0
      mb->end_subject = P->temp_sptr[0];
6340
0
      mb->true_end_subject = mb->end_subject + P->temp_size;
6341
0
      Feptr = P->temp_sptr[1];
6342
6343
0
      RMATCH(Fecode + 1 + LINK_SIZE, RM39);
6344
6345
0
      mb->end_subject = F->temp_sptr[0];
6346
0
      mb->true_end_subject = mb->end_subject;
6347
0
      RRETURN(rrc);
6348
0
      break;
6349
6350
      /* At the end of a script run, apply the script-checking rules. This code
6351
      will never by exercised if Unicode support it not compiled, because in
6352
      that environment script runs cause an error at compile time. */
6353
6354
0
      case OP_SCRIPT_RUN:
6355
0
      if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
6356
0
      break;
6357
6358
      /* Whole-pattern recursion is coded as a recurse into group 0, and is
6359
      handled with OP_BRA above. Other recursion is handled here. */
6360
6361
4.21k
      case OP_CBRA:
6362
4.27k
      case OP_CBRAPOS:
6363
5.50k
      case OP_SCBRA:
6364
6.38k
      case OP_SCBRAPOS:
6365
6.38k
      number = GET2(bracode, 1+LINK_SIZE);
6366
6367
      /* Handle a recursively called group. We reinstate the previous set of
6368
      captures and then carry on after the recursion call. */
6369
6370
6.38k
      if (Fcurrent_recurse == number)
6371
0
        {
6372
0
        P = (heapframe *)((char *)N - frame_size);
6373
0
        memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
6374
0
          Foffset_top * sizeof(PCRE2_SIZE));
6375
0
        Foffset_top = P->offset_top;
6376
0
        Fcapture_last = P->capture_last;
6377
0
        Fcurrent_recurse = P->current_recurse;
6378
0
        Fecode = P->ecode + 1 + LINK_SIZE;
6379
0
        continue;  /* With next opcode */
6380
0
        }
6381
6382
      /* Deal with actual capturing. */
6383
6384
6.38k
      offset = (number << 1) - 2;
6385
6.38k
      Fcapture_last = number;
6386
6.38k
      Fovector[offset] = P->eptr - mb->start_subject;
6387
6.38k
      Fovector[offset+1] = Feptr - mb->start_subject;
6388
6.38k
      if (offset >= Foffset_top) Foffset_top = offset + 2;
6389
6.38k
      break;
6390
8.98k
      }  /* End actions relating to the starting opcode */
6391
6392
    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
6393
    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
6394
    at a time from the outer level. This must precede the empty string test -
6395
    in this case that test is done at the outer level. */
6396
6397
8.98k
    if (*Fecode == OP_KETRPOS)
6398
931
      {
6399
931
      memcpy((char *)P + offsetof(heapframe, eptr),
6400
931
             (char *)F + offsetof(heapframe, eptr),
6401
931
             frame_copy_size);
6402
931
      RRETURN(MATCH_KETRPOS);
6403
931
      }
6404
6405
    /* Handle the different kinds of closing brackets. A non-repeating ket
6406
    needs no special action, just continuing at this level. This also happens
6407
    for the repeating kets if the group matched no characters, in order to
6408
    forcibly break infinite loops. Otherwise, the repeating kets try the rest
6409
    of the pattern or restart from the preceding bracket, in the appropriate
6410
    order. */
6411
6412
8.05k
    if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
6413
34
      {
6414
34
      if (Fop == OP_KETRMIN)
6415
0
        {
6416
0
        RMATCH(Fecode + 1 + LINK_SIZE, RM6);
6417
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6418
0
        Fecode -= GET(Fecode, 1);
6419
0
        break;   /* End of ket processing */
6420
0
        }
6421
6422
      /* Repeat the maximum number of times (KETRMAX) */
6423
6424
34
      RMATCH(bracode, RM7);
6425
34
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6426
34
      }
6427
6428
    /* Carry on at this level for a non-repeating ket, or after matching an
6429
    empty string, or after repeating for a maximum number of times. */
6430
6431
8.05k
    Fecode += 1 + LINK_SIZE;
6432
8.05k
    break;
6433
6434
6435
    /* ===================================================================== */
6436
    /* Start and end of line assertions, not multiline mode. */
6437
6438
28.6k
    case OP_CIRC:   /* Start of line, unless PCRE2_NOTBOL is set. */
6439
28.6k
    if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
6440
28.5k
      RRETURN(MATCH_NOMATCH);
6441
115
    Fecode++;
6442
115
    break;
6443
6444
68.1k
    case OP_SOD:    /* Unconditional start of subject */
6445
68.1k
    if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
6446
1
    Fecode++;
6447
1
    break;
6448
6449
    /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
6450
    terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
6451
6452
183k
    case OP_DOLL:
6453
183k
    if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6454
183k
    if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
6455
6456
    /* Fall through */
6457
    /* Unconditional end of subject assertion (\z). */
6458
6459
468
    case OP_EOD:
6460
468
    if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH);
6461
4
    if (mb->partial != 0)
6462
0
      {
6463
0
      mb->hitend = TRUE;
6464
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6465
0
      }
6466
4
    Fecode++;
6467
4
    break;
6468
6469
    /* End of subject or ending \n assertion (\Z) */
6470
6471
6.06k
    case OP_EODN:
6472
189k
    ASSERT_NL_OR_EOS:
6473
189k
    if (Feptr < mb->true_end_subject &&
6474
189k
        (!IS_NEWLINE(Feptr) || Feptr != mb->true_end_subject - mb->nllen))
6475
189k
      {
6476
189k
      if (mb->partial != 0 &&
6477
0
          Feptr + 1 >= mb->end_subject &&
6478
0
          NLBLOCK->nltype == NLTYPE_FIXED &&
6479
0
          NLBLOCK->nllen == 2 &&
6480
0
          UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6481
0
        {
6482
0
        mb->hitend = TRUE;
6483
0
        if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6484
0
        }
6485
189k
      RRETURN(MATCH_NOMATCH);
6486
189k
      }
6487
6488
    /* Either at end of string or \n before end. */
6489
6490
201
    if (mb->partial != 0)
6491
0
      {
6492
0
      mb->hitend = TRUE;
6493
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6494
0
      }
6495
201
    Fecode++;
6496
201
    break;
6497
6498
6499
    /* ===================================================================== */
6500
    /* Start and end of line assertions, multiline mode. */
6501
6502
    /* Start of subject unless notbol, or after any newline except for one at
6503
    the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
6504
6505
127k
    case OP_CIRCM:
6506
127k
    if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
6507
0
      RRETURN(MATCH_NOMATCH);
6508
127k
    if (Feptr != mb->start_subject &&
6509
127k
        ((Feptr == mb->end_subject &&
6510
551
           (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
6511
126k
         !WAS_NEWLINE(Feptr)))
6512
127k
      RRETURN(MATCH_NOMATCH);
6513
203
    Fecode++;
6514
203
    break;
6515
6516
    /* Assert before any newline, or before end of subject unless noteol is
6517
    set. */
6518
6519
109k
    case OP_DOLLM:
6520
109k
    if (Feptr < mb->end_subject)
6521
108k
      {
6522
108k
      if (!IS_NEWLINE(Feptr))
6523
107k
        {
6524
107k
        if (mb->partial != 0 &&
6525
0
            Feptr + 1 >= mb->end_subject &&
6526
0
            NLBLOCK->nltype == NLTYPE_FIXED &&
6527
0
            NLBLOCK->nllen == 2 &&
6528
0
            UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6529
0
          {
6530
0
          mb->hitend = TRUE;
6531
0
          if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6532
0
          }
6533
107k
        RRETURN(MATCH_NOMATCH);
6534
107k
        }
6535
108k
      }
6536
54
    else
6537
54
      {
6538
54
      if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6539
54
      SCHECK_PARTIAL();
6540
54
      }
6541
1.86k
    Fecode++;
6542
1.86k
    break;
6543
6544
6545
    /* ===================================================================== */
6546
    /* Start of match assertion */
6547
6548
369
    case OP_SOM:
6549
369
    if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
6550
6
    Fecode++;
6551
6
    break;
6552
6553
6554
    /* ===================================================================== */
6555
    /* Reset the start of match point */
6556
6557
2
    case OP_SET_SOM:
6558
2
    Fstart_match = Feptr;
6559
2
    Fecode++;
6560
2
    break;
6561
6562
6563
    /* ===================================================================== */
6564
    /* Word boundary assertions. Find out if the previous and current
6565
    characters are "word" characters. It takes a bit more work in UTF mode.
6566
    Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
6567
    not set. When it is set, use Unicode properties if available, even when not
6568
    in UTF mode. Remember the earliest and latest consulted characters. */
6569
6570
106k
    case OP_NOT_WORD_BOUNDARY:
6571
107k
    case OP_WORD_BOUNDARY:
6572
5.14M
    case OP_NOT_UCP_WORD_BOUNDARY:
6573
5.15M
    case OP_UCP_WORD_BOUNDARY:
6574
5.15M
    if (Feptr == mb->check_subject) prev_is_word = FALSE; else
6575
5.15M
      {
6576
5.15M
      PCRE2_SPTR lastptr = Feptr - 1;
6577
5.15M
#ifdef SUPPORT_UNICODE
6578
5.15M
      if (utf)
6579
5.04M
        {
6580
5.04M
        BACKCHAR(lastptr);
6581
5.04M
        GETCHAR(fc, lastptr);
6582
5.04M
        }
6583
107k
      else
6584
107k
#endif  /* SUPPORT_UNICODE */
6585
107k
      fc = *lastptr;
6586
5.15M
      if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
6587
5.15M
#ifdef SUPPORT_UNICODE
6588
5.15M
      if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
6589
5.04M
        {
6590
5.04M
        int chartype = UCD_CHARTYPE(fc);
6591
5.04M
        int category = PRIV(ucp_gentype)[chartype];
6592
5.04M
        prev_is_word = (category == ucp_L || category == ucp_N ||
6593
4.65M
          chartype == ucp_Mn || chartype == ucp_Pc);
6594
5.04M
        }
6595
107k
      else
6596
107k
#endif  /* SUPPORT_UNICODE */
6597
107k
      prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6598
5.15M
      }
6599
6600
    /* Get status of next character */
6601
6602
5.15M
    if (Feptr >= mb->end_subject)
6603
292k
      {
6604
292k
      SCHECK_PARTIAL();
6605
292k
      cur_is_word = FALSE;
6606
292k
      }
6607
4.85M
    else
6608
4.85M
      {
6609
4.85M
      PCRE2_SPTR nextptr = Feptr + 1;
6610
4.85M
#ifdef SUPPORT_UNICODE
6611
4.85M
      if (utf)
6612
4.75M
        {
6613
4.75M
        FORWARDCHARTEST(nextptr, mb->end_subject);
6614
4.75M
        GETCHAR(fc, Feptr);
6615
4.75M
        }
6616
107k
      else
6617
107k
#endif  /* SUPPORT_UNICODE */
6618
107k
      fc = *Feptr;
6619
4.85M
      if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
6620
4.85M
#ifdef SUPPORT_UNICODE
6621
4.85M
      if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
6622
4.75M
        {
6623
4.75M
        int chartype = UCD_CHARTYPE(fc);
6624
4.75M
        int category = PRIV(ucp_gentype)[chartype];
6625
4.75M
        cur_is_word = (category == ucp_L || category == ucp_N ||
6626
4.39M
          chartype == ucp_Mn || chartype == ucp_Pc);
6627
4.75M
        }
6628
107k
      else
6629
107k
#endif  /* SUPPORT_UNICODE */
6630
107k
      cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6631
4.85M
      }
6632
6633
    /* Now see if the situation is what we want */
6634
6635
5.15M
    if ((*Fecode++ == OP_WORD_BOUNDARY || Fop == OP_UCP_WORD_BOUNDARY)?
6636
5.14M
         cur_is_word == prev_is_word : cur_is_word != prev_is_word)
6637
359k
      RRETURN(MATCH_NOMATCH);
6638
4.79M
    break;
6639
6640
6641
    /* ===================================================================== */
6642
    /* Backtracking (*VERB)s, with and without arguments. Note that if the
6643
    pattern is successfully matched, we do not come back from RMATCH. */
6644
6645
4.79M
    case OP_MARK:
6646
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6647
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
6648
6649
    /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
6650
    argument, and we must check whether that argument matches this MARK's
6651
    argument. It is passed back in mb->verb_skip_ptr. If it does match, we
6652
    return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
6653
    position that corresponds to this mark. Otherwise, pass back the return
6654
    code unaltered. */
6655
6656
0
    if (rrc == MATCH_SKIP_ARG &&
6657
0
             PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
6658
0
      {
6659
0
      mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6660
0
      RRETURN(MATCH_SKIP);
6661
0
      }
6662
0
    RRETURN(rrc);
6663
6664
0
    case OP_FAIL:
6665
0
    RRETURN(MATCH_NOMATCH);
6666
6667
    /* Record the current recursing group number in mb->verb_current_recurse
6668
    when a backtracking return such as MATCH_COMMIT is given. This enables the
6669
    recurse processing to catch verbs from within the recursion. */
6670
6671
0
    case OP_COMMIT:
6672
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
6673
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6674
0
    mb->verb_current_recurse = Fcurrent_recurse;
6675
0
    RRETURN(MATCH_COMMIT);
6676
6677
0
    case OP_COMMIT_ARG:
6678
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6679
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
6680
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6681
0
    mb->verb_current_recurse = Fcurrent_recurse;
6682
0
    RRETURN(MATCH_COMMIT);
6683
6684
0
    case OP_PRUNE:
6685
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
6686
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6687
0
    mb->verb_current_recurse = Fcurrent_recurse;
6688
0
    RRETURN(MATCH_PRUNE);
6689
6690
0
    case OP_PRUNE_ARG:
6691
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6692
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
6693
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6694
0
    mb->verb_current_recurse = Fcurrent_recurse;
6695
0
    RRETURN(MATCH_PRUNE);
6696
6697
0
    case OP_SKIP:
6698
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
6699
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6700
0
    mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6701
0
    mb->verb_current_recurse = Fcurrent_recurse;
6702
0
    RRETURN(MATCH_SKIP);
6703
6704
    /* Note that, for Perl compatibility, SKIP with an argument does NOT set
6705
    nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
6706
    not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
6707
    that failed and any that precede it (either they also failed, or were not
6708
    triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
6709
    SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
6710
    set to the count of the one that failed. */
6711
6712
0
    case OP_SKIP_ARG:
6713
0
    mb->skip_arg_count++;
6714
0
    if (mb->skip_arg_count <= mb->ignore_skip_arg)
6715
0
      {
6716
0
      Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
6717
0
      break;
6718
0
      }
6719
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
6720
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6721
6722
    /* Pass back the current skip name and return the special MATCH_SKIP_ARG
6723
    return code. This will either be caught by a matching MARK, or get to the
6724
    top, where it causes a rematch with mb->ignore_skip_arg set to the value of
6725
    mb->skip_arg_count. */
6726
6727
0
    mb->verb_skip_ptr = Fecode + 2;
6728
0
    mb->verb_current_recurse = Fcurrent_recurse;
6729
0
    RRETURN(MATCH_SKIP_ARG);
6730
6731
    /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
6732
    the branch in which it occurs can be determined. */
6733
6734
0
    case OP_THEN:
6735
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
6736
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6737
0
    mb->verb_ecode_ptr = Fecode;
6738
0
    mb->verb_current_recurse = Fcurrent_recurse;
6739
0
    RRETURN(MATCH_THEN);
6740
6741
0
    case OP_THEN_ARG:
6742
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6743
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
6744
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6745
0
    mb->verb_ecode_ptr = Fecode;
6746
0
    mb->verb_current_recurse = Fcurrent_recurse;
6747
0
    RRETURN(MATCH_THEN);
6748
6749
6750
    /* ===================================================================== */
6751
    /* There's been some horrible disaster. Arrival here can only mean there is
6752
    something seriously wrong in the code above or the OP_xxx definitions. */
6753
6754
0
    default:
6755
0
    PCRE2_DEBUG_UNREACHABLE();
6756
0
    return PCRE2_ERROR_INTERNAL;
6757
446M
    }
6758
6759
  /* Do not insert any code in here without much thought; it is assumed
6760
  that "continue" in the code above comes out to here to repeat the main
6761
  loop. */
6762
6763
446M
  }  /* End of main loop */
6764
6765
0
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
6766
6767
/* ========================================================================= */
6768
/* The RRETURN() macro jumps here. The number that is saved in Freturn_id
6769
indicates which label we actually want to return to. The value in Frdepth is
6770
the index number of the frame in the vector. The return value has been placed
6771
in rrc. */
6772
6773
344M
#define LBL(val) case val: goto L_RM##val;
6774
6775
344M
RETURN_SWITCH:
6776
344M
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6777
344M
if (Frdepth == 0) return rrc;                     /* Exit from the top level */
6778
344M
F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
6779
344M
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
6780
6781
#ifdef DEBUG_SHOW_RMATCH
6782
fprintf(stderr, "++ RETURN %d to RM%d\n", rrc, Freturn_id);
6783
#endif
6784
6785
344M
switch (Freturn_id)
6786
344M
  {
6787
778k
  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6788
1
  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
6789
243k
  LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
6790
5.05M
  LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
6791
58.0M
  LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39)
6792
6793
0
#ifdef SUPPORT_WIDE_CHARS
6794
263k
  LBL(100) LBL(101) LBL(102) LBL(103)
6795
0
#endif
6796
6797
0
#ifdef SUPPORT_UNICODE
6798
90.7k
  LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
6799
281k
  LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
6800
250M
  LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
6801
4.43M
  LBL(221) LBL(222) LBL(223) LBL(224)
6802
0
#endif
6803
6804
0
  default:
6805
0
  PCRE2_DEBUG_UNREACHABLE();
6806
0
  return PCRE2_ERROR_INTERNAL;
6807
344M
  }
6808
344M
#undef LBL
6809
344M
}
6810
6811
6812
/*************************************************
6813
*           Match a Regular Expression           *
6814
*************************************************/
6815
6816
/* This function applies a compiled pattern to a subject string and picks out
6817
portions of the string if it matches. Two elements in the vector are set for
6818
each substring: the offsets to the start and end of the substring.
6819
6820
Arguments:
6821
  code            points to the compiled expression
6822
  subject         points to the subject string
6823
  length          length of subject string (may contain binary zeros)
6824
  start_offset    where to start in the subject string
6825
  options         option bits
6826
  match_data      points to a match_data block
6827
  mcontext        points a PCRE2 context
6828
6829
Returns:          > 0 => success; value is the number of ovector pairs filled
6830
                  = 0 => success, but ovector is not big enough
6831
                  = -1 => failed to match (PCRE2_ERROR_NOMATCH)
6832
                  = -2 => partial match (PCRE2_ERROR_PARTIAL)
6833
                  < -2 => some kind of unexpected problem
6834
*/
6835
6836
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
6837
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6838
  PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6839
  pcre2_match_context *mcontext)
6840
3.63k
{
6841
3.63k
int rc;
6842
3.63k
int was_zero_terminated = 0;
6843
3.63k
const uint8_t *start_bits = NULL;
6844
3.63k
const pcre2_real_code *re = (const pcre2_real_code *)code;
6845
6846
3.63k
BOOL anchored;
6847
3.63k
BOOL firstline;
6848
3.63k
BOOL has_first_cu = FALSE;
6849
3.63k
BOOL has_req_cu = FALSE;
6850
3.63k
BOOL startline;
6851
6852
3.63k
#if PCRE2_CODE_UNIT_WIDTH == 8
6853
3.63k
PCRE2_SPTR memchr_found_first_cu;
6854
3.63k
PCRE2_SPTR memchr_found_first_cu2;
6855
3.63k
#endif
6856
6857
3.63k
PCRE2_UCHAR first_cu = 0;
6858
3.63k
PCRE2_UCHAR first_cu2 = 0;
6859
3.63k
PCRE2_UCHAR req_cu = 0;
6860
3.63k
PCRE2_UCHAR req_cu2 = 0;
6861
6862
3.63k
PCRE2_SPTR bumpalong_limit;
6863
3.63k
PCRE2_SPTR end_subject;
6864
3.63k
PCRE2_SPTR true_end_subject;
6865
3.63k
PCRE2_SPTR start_match;
6866
3.63k
PCRE2_SPTR req_cu_ptr;
6867
3.63k
PCRE2_SPTR start_partial;
6868
3.63k
PCRE2_SPTR match_partial;
6869
6870
#ifdef SUPPORT_JIT
6871
BOOL use_jit;
6872
#endif
6873
6874
/* This flag is needed even when Unicode is not supported for convenience
6875
(it is used by the IS_NEWLINE macro). */
6876
6877
3.63k
BOOL utf = FALSE;
6878
6879
3.63k
#ifdef SUPPORT_UNICODE
6880
3.63k
BOOL ucp = FALSE;
6881
3.63k
BOOL allow_invalid;
6882
3.63k
uint32_t fragment_options = 0;
6883
#ifdef SUPPORT_JIT
6884
BOOL jit_checked_utf = FALSE;
6885
#endif
6886
3.63k
#endif  /* SUPPORT_UNICODE */
6887
6888
3.63k
PCRE2_SIZE frame_size;
6889
3.63k
PCRE2_SIZE heapframes_size;
6890
6891
/* We need to have mb as a pointer to a match block, because the IS_NEWLINE
6892
macro is used below, and it expects NLBLOCK to be defined as a pointer. */
6893
6894
3.63k
pcre2_callout_block cb;
6895
3.63k
match_block actual_match_block;
6896
3.63k
match_block *mb = &actual_match_block;
6897
6898
/* Recognize NULL, length 0 as an empty string. */
6899
6900
3.63k
if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
6901
6902
/* Plausibility checks */
6903
6904
3.63k
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
6905
3.63k
if (code == NULL || subject == NULL || match_data == NULL)
6906
0
  return PCRE2_ERROR_NULL;
6907
6908
3.63k
start_match = subject + start_offset;
6909
3.63k
req_cu_ptr = start_match - 1;
6910
3.63k
if (length == PCRE2_ZERO_TERMINATED)
6911
0
  {
6912
0
  length = PRIV(strlen)(subject);
6913
0
  was_zero_terminated = 1;
6914
0
  }
6915
3.63k
true_end_subject = end_subject = subject + length;
6916
6917
3.63k
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
6918
6919
/* Check that the first field in the block is the magic number. */
6920
6921
3.63k
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
6922
6923
/* Check the code unit width. */
6924
6925
3.63k
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
6926
0
  return PCRE2_ERROR_BADMODE;
6927
6928
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
6929
options variable for this function. Users of PCRE2 who are not calling the
6930
function directly would like to have a way of setting these flags, in the same
6931
way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with
6932
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
6933
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
6934
transfer to the options for this function. The bits are guaranteed to be
6935
adjacent, but do not have the same values. This bit of Boolean trickery assumes
6936
that the match-time bits are not more significant than the flag bits. If by
6937
accident this is not the case, a compile-time division by zero error will
6938
occur. */
6939
6940
10.8k
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
6941
7.26k
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
6942
3.63k
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
6943
3.63k
#undef FF
6944
3.63k
#undef OO
6945
6946
/* If the pattern was successfully studied with JIT support, we will run the
6947
JIT executable instead of the rest of this function. Most options must be set
6948
at compile time for the JIT code to be usable. */
6949
6950
#ifdef SUPPORT_JIT
6951
use_jit = (re->executable_jit != NULL &&
6952
          (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
6953
#endif
6954
6955
/* Initialize UTF/UCP parameters. */
6956
6957
3.63k
#ifdef SUPPORT_UNICODE
6958
3.63k
utf = (re->overall_options & PCRE2_UTF) != 0;
6959
3.63k
allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
6960
3.63k
ucp = (re->overall_options & PCRE2_UCP) != 0;
6961
3.63k
#endif  /* SUPPORT_UNICODE */
6962
6963
/* Convert the partial matching flags into an integer. */
6964
6965
3.63k
mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
6966
3.63k
              ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
6967
6968
/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
6969
time. */
6970
6971
3.63k
if (mb->partial != 0 &&
6972
0
   ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
6973
0
  return PCRE2_ERROR_BADOPTION;
6974
6975
/* It is an error to set an offset limit without setting the flag at compile
6976
time. */
6977
6978
3.63k
if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
6979
0
     (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
6980
0
  return PCRE2_ERROR_BADOFFSETLIMIT;
6981
6982
/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
6983
free the memory that was obtained. Set the field to NULL for no match cases. */
6984
6985
3.63k
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
6986
0
  {
6987
0
  match_data->memctl.free((void *)match_data->subject,
6988
0
    match_data->memctl.memory_data);
6989
0
  match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
6990
0
  }
6991
3.63k
match_data->subject = NULL;
6992
6993
/* Zero the error offset in case the first code unit is invalid UTF. */
6994
6995
3.63k
match_data->startchar = 0;
6996
6997
6998
/* ============================= JIT matching ============================== */
6999
7000
/* Prepare for JIT matching. Check a UTF string for validity unless no check is
7001
requested or invalid UTF can be handled. We check only the portion of the
7002
subject that might be be inspected during matching - from the offset minus the
7003
maximum lookbehind to the given length. This saves time when a small part of a
7004
large subject is being matched by the use of a starting offset. Note that the
7005
maximum lookbehind is a number of characters, not code units. */
7006
7007
#ifdef SUPPORT_JIT
7008
if (use_jit)
7009
  {
7010
#ifdef SUPPORT_UNICODE
7011
  if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
7012
    {
7013
7014
    /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
7015
    character start. */
7016
7017
#if PCRE2_CODE_UNIT_WIDTH != 32
7018
    if (start_match < end_subject && NOT_FIRSTCU(*start_match))
7019
      {
7020
      if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
7021
#if PCRE2_CODE_UNIT_WIDTH == 8
7022
      return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
7023
#else
7024
      return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
7025
#endif
7026
      }
7027
#endif  /* WIDTH != 32 */
7028
7029
    /* Move back by the maximum lookbehind, just in case it happens at the very
7030
    start of matching. */
7031
7032
#if PCRE2_CODE_UNIT_WIDTH != 32
7033
    for (unsigned int i = re->max_lookbehind; i > 0 && start_match > subject; i--)
7034
      {
7035
      start_match--;
7036
      while (start_match > subject &&
7037
#if PCRE2_CODE_UNIT_WIDTH == 8
7038
      (*start_match & 0xc0) == 0x80)
7039
#else  /* 16-bit */
7040
      (*start_match & 0xfc00) == 0xdc00)
7041
#endif
7042
        start_match--;
7043
      }
7044
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7045
7046
    /* In the 32-bit library, one code unit equals one character. However,
7047
    we cannot just subtract the lookbehind and then compare pointers, because
7048
    a very large lookbehind could create an invalid pointer. */
7049
7050
    if (start_offset >= re->max_lookbehind)
7051
      start_match -= re->max_lookbehind;
7052
    else
7053
      start_match = subject;
7054
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7055
7056
    /* Validate the relevant portion of the subject. Adjust the offset of an
7057
    invalid code point to be an absolute offset in the whole string. */
7058
7059
    match_data->rc = PRIV(valid_utf)(start_match,
7060
      length - (start_match - subject), &(match_data->startchar));
7061
    if (match_data->rc != 0)
7062
      {
7063
      match_data->startchar += start_match - subject;
7064
      return match_data->rc;
7065
      }
7066
    jit_checked_utf = TRUE;
7067
    }
7068
#endif  /* SUPPORT_UNICODE */
7069
7070
  /* If JIT returns BADOPTION, which means that the selected complete or
7071
  partial matching mode was not compiled, fall through to the interpreter. */
7072
7073
  rc = pcre2_jit_match(code, subject, length, start_offset, options,
7074
    match_data, mcontext);
7075
  if (rc != PCRE2_ERROR_JIT_BADOPTION)
7076
    {
7077
    match_data->subject_length = length;
7078
    if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
7079
      {
7080
      length = CU2BYTES(length + was_zero_terminated);
7081
      match_data->subject = match_data->memctl.malloc(length,
7082
        match_data->memctl.memory_data);
7083
      if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
7084
      memcpy((void *)match_data->subject, subject, length);
7085
      match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
7086
      }
7087
    return rc;
7088
    }
7089
  }
7090
#endif  /* SUPPORT_JIT */
7091
7092
/* ========================= End of JIT matching ========================== */
7093
7094
7095
/* Proceed with non-JIT matching. The default is to allow lookbehinds to the
7096
start of the subject. A UTF check when there is a non-zero offset may change
7097
this. */
7098
7099
3.63k
mb->check_subject = subject;
7100
7101
/* If a UTF subject string was not checked for validity in the JIT code above,
7102
check it here, and handle support for invalid UTF strings. The check above
7103
happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
7104
If we get here in those circumstances, it means the subject string is valid,
7105
but for some reason JIT matching was not successful. There is no need to check
7106
the subject again.
7107
7108
We check only the portion of the subject that might be be inspected during
7109
matching - from the offset minus the maximum lookbehind to the given length.
7110
This saves time when a small part of a large subject is being matched by the
7111
use of a starting offset. Note that the maximum lookbehind is a number of
7112
characters, not code units.
7113
7114
Note also that support for invalid UTF forces a check, overriding the setting
7115
of PCRE2_NO_CHECK_UTF. */
7116
7117
3.63k
#ifdef SUPPORT_UNICODE
7118
3.63k
if (utf &&
7119
#ifdef SUPPORT_JIT
7120
    !jit_checked_utf &&
7121
#endif
7122
826
    ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
7123
813
  {
7124
813
#if PCRE2_CODE_UNIT_WIDTH != 32
7125
813
  BOOL skipped_bad_start = FALSE;
7126
813
#endif
7127
7128
  /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
7129
  character start. If we are handling invalid UTF, just skip over such code
7130
  units. Otherwise, give an appropriate error. */
7131
7132
813
#if PCRE2_CODE_UNIT_WIDTH != 32
7133
813
  if (allow_invalid)
7134
0
    {
7135
0
    while (start_match < end_subject && NOT_FIRSTCU(*start_match))
7136
0
      {
7137
0
      start_match++;
7138
0
      skipped_bad_start = TRUE;
7139
0
      }
7140
0
    }
7141
813
  else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
7142
1
    {
7143
1
    if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
7144
1
#if PCRE2_CODE_UNIT_WIDTH == 8
7145
1
    return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
7146
#else
7147
    return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
7148
#endif
7149
1
    }
7150
812
#endif  /* WIDTH != 32 */
7151
7152
  /* The mb->check_subject field points to the start of UTF checking;
7153
  lookbehinds can go back no further than this. */
7154
7155
812
  mb->check_subject = start_match;
7156
7157
  /* Move back by the maximum lookbehind, just in case it happens at the very
7158
  start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
7159
  units above. */
7160
7161
812
#if PCRE2_CODE_UNIT_WIDTH != 32
7162
812
  if (!skipped_bad_start)
7163
812
    {
7164
812
    unsigned int i;
7165
812
    for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
7166
0
      {
7167
0
      mb->check_subject--;
7168
0
      while (mb->check_subject > subject &&
7169
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7170
0
      (*mb->check_subject & 0xc0) == 0x80)
7171
#else  /* 16-bit */
7172
      (*mb->check_subject & 0xfc00) == 0xdc00)
7173
#endif
7174
0
        mb->check_subject--;
7175
0
      }
7176
812
    }
7177
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7178
7179
  /* In the 32-bit library, one code unit equals one character. However,
7180
  we cannot just subtract the lookbehind and then compare pointers, because
7181
  a very large lookbehind could create an invalid pointer. */
7182
7183
  if (start_offset >= re->max_lookbehind)
7184
    mb->check_subject -= re->max_lookbehind;
7185
  else
7186
    mb->check_subject = subject;
7187
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7188
7189
  /* Validate the relevant portion of the subject. There's a loop in case we
7190
  encounter bad UTF in the characters preceding start_match which we are
7191
  scanning because of a lookbehind. */
7192
7193
812
  for (;;)
7194
812
    {
7195
812
    match_data->rc = PRIV(valid_utf)(mb->check_subject,
7196
812
      length - (mb->check_subject - subject), &(match_data->startchar));
7197
7198
812
    if (match_data->rc == 0) break;   /* Valid UTF string */
7199
7200
    /* Invalid UTF string. Adjust the offset to be an absolute offset in the
7201
    whole string. If we are handling invalid UTF strings, set end_subject to
7202
    stop before the bad code unit, and set the options to "not end of line".
7203
    Otherwise return the error. */
7204
7205
110
    match_data->startchar += mb->check_subject - subject;
7206
110
    if (!allow_invalid || match_data->rc > 0) return match_data->rc;
7207
0
    end_subject = subject + match_data->startchar;
7208
7209
    /* If the end precedes start_match, it means there is invalid UTF in the
7210
    extra code units we reversed over because of a lookbehind. Advance past the
7211
    first bad code unit, and then skip invalid character starting code units in
7212
    8-bit and 16-bit modes, and try again with the original end point. */
7213
7214
0
    if (end_subject < start_match)
7215
0
      {
7216
0
      mb->check_subject = end_subject + 1;
7217
0
#if PCRE2_CODE_UNIT_WIDTH != 32
7218
0
      while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
7219
0
        mb->check_subject++;
7220
0
#endif
7221
0
      end_subject = true_end_subject;
7222
0
      }
7223
7224
    /* Otherwise, set the not end of line option, and do the match. */
7225
7226
0
    else
7227
0
      {
7228
0
      fragment_options = PCRE2_NOTEOL;
7229
0
      break;
7230
0
      }
7231
0
    }
7232
812
  }
7233
3.52k
#endif  /* SUPPORT_UNICODE */
7234
7235
/* A NULL match context means "use a default context", but we take the memory
7236
control functions from the pattern. */
7237
7238
3.52k
if (mcontext == NULL)
7239
0
  {
7240
0
  mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
7241
0
  mb->memctl = re->memctl;
7242
0
  }
7243
3.52k
else mb->memctl = mcontext->memctl;
7244
7245
3.52k
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
7246
3.52k
firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0;
7247
3.52k
startline = (re->flags & PCRE2_STARTLINE) != 0;
7248
3.52k
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
7249
3.52k
  true_end_subject : subject + mcontext->offset_limit;
7250
7251
/* Initialize and set up the fixed fields in the callout block, with a pointer
7252
in the match block. */
7253
7254
3.52k
mb->cb = &cb;
7255
3.52k
cb.version = 2;
7256
3.52k
cb.subject = subject;
7257
3.52k
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
7258
3.52k
cb.callout_flags = 0;
7259
7260
/* Fill in the remaining fields in the match block, except for moptions, which
7261
gets set later. */
7262
7263
3.52k
mb->callout = mcontext->callout;
7264
3.52k
mb->callout_data = mcontext->callout_data;
7265
7266
3.52k
mb->start_subject = subject;
7267
3.52k
mb->start_offset = start_offset;
7268
3.52k
mb->end_subject = end_subject;
7269
3.52k
mb->true_end_subject = true_end_subject;
7270
3.52k
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
7271
3.52k
mb->allowemptypartial = (re->max_lookbehind > 0) ||
7272
2.92k
    (re->flags & PCRE2_MATCH_EMPTY) != 0;
7273
3.52k
mb->poptions = re->overall_options;          /* Pattern options */
7274
3.52k
mb->ignore_skip_arg = 0;
7275
3.52k
mb->mark = mb->nomatch_mark = NULL;          /* In case never set */
7276
7277
/* The name table is needed for finding all the numbers associated with a
7278
given name, for condition testing. The code follows the name table. */
7279
7280
3.52k
mb->name_table = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code));
7281
3.52k
mb->name_count = re->name_count;
7282
3.52k
mb->name_entry_size = re->name_entry_size;
7283
3.52k
mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start);
7284
7285
/* Process the \R and newline settings. */
7286
7287
3.52k
mb->bsr_convention = re->bsr_convention;
7288
3.52k
mb->nltype = NLTYPE_FIXED;
7289
3.52k
switch(re->newline_convention)
7290
3.52k
  {
7291
0
  case PCRE2_NEWLINE_CR:
7292
0
  mb->nllen = 1;
7293
0
  mb->nl[0] = CHAR_CR;
7294
0
  break;
7295
7296
3.52k
  case PCRE2_NEWLINE_LF:
7297
3.52k
  mb->nllen = 1;
7298
3.52k
  mb->nl[0] = CHAR_NL;
7299
3.52k
  break;
7300
7301
0
  case PCRE2_NEWLINE_NUL:
7302
0
  mb->nllen = 1;
7303
0
  mb->nl[0] = CHAR_NUL;
7304
0
  break;
7305
7306
0
  case PCRE2_NEWLINE_CRLF:
7307
0
  mb->nllen = 2;
7308
0
  mb->nl[0] = CHAR_CR;
7309
0
  mb->nl[1] = CHAR_NL;
7310
0
  break;
7311
7312
0
  case PCRE2_NEWLINE_ANY:
7313
0
  mb->nltype = NLTYPE_ANY;
7314
0
  break;
7315
7316
0
  case PCRE2_NEWLINE_ANYCRLF:
7317
0
  mb->nltype = NLTYPE_ANYCRLF;
7318
0
  break;
7319
7320
0
  default:
7321
0
  PCRE2_DEBUG_UNREACHABLE();
7322
0
  return PCRE2_ERROR_INTERNAL;
7323
3.52k
  }
7324
7325
/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
7326
vector at the end, whose size depends on the number of capturing parentheses in
7327
the pattern. It is not used at all if there are no capturing parentheses.
7328
7329
  frame_size                   is the total size of each frame
7330
  match_data->heapframes       is the pointer to the frames vector
7331
  match_data->heapframes_size  is the allocated size of the vector
7332
7333
We must pad the frame_size for alignment to ensure subsequent frames are as
7334
aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
7335
array, that does not guarantee it is suitably aligned for pointers, as some
7336
architectures have pointers that are larger than a size_t. */
7337
7338
3.52k
frame_size = (offsetof(heapframe, ovector) +
7339
3.52k
  re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) &
7340
3.52k
  ~(HEAPFRAME_ALIGNMENT - 1);
7341
7342
/* Limits set in the pattern override the match context only if they are
7343
smaller. */
7344
7345
3.52k
mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)?
7346
3.52k
  mcontext->heap_limit : re->limit_heap);
7347
7348
3.52k
mb->match_limit = (mcontext->match_limit < re->limit_match)?
7349
3.52k
  mcontext->match_limit : re->limit_match;
7350
7351
3.52k
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
7352
3.52k
  mcontext->depth_limit : re->limit_depth;
7353
7354
/* If a pattern has very many capturing parentheses, the frame size may be very
7355
large. Set the initial frame vector size to ensure that there are at least 10
7356
available frames, but enforce a minimum of START_FRAMES_SIZE. If this is
7357
greater than the heap limit, get as large a vector as possible. */
7358
7359
3.52k
heapframes_size = frame_size * 10;
7360
3.52k
if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE;
7361
3.52k
if (heapframes_size / 1024 > mb->heap_limit)
7362
0
  {
7363
0
  PCRE2_SIZE max_size = 1024 * mb->heap_limit;
7364
0
  if (max_size < frame_size) return PCRE2_ERROR_HEAPLIMIT;
7365
0
  heapframes_size = max_size;
7366
0
  }
7367
7368
/* If an existing frame vector in the match_data block is large enough, we can
7369
use it. Otherwise, free any pre-existing vector and get a new one. */
7370
7371
3.52k
if (match_data->heapframes_size < heapframes_size)
7372
163
  {
7373
163
  match_data->memctl.free(match_data->heapframes,
7374
163
    match_data->memctl.memory_data);
7375
163
  match_data->heapframes = match_data->memctl.malloc(heapframes_size,
7376
163
    match_data->memctl.memory_data);
7377
163
  if (match_data->heapframes == NULL)
7378
0
    {
7379
0
    match_data->heapframes_size = 0;
7380
0
    return PCRE2_ERROR_NOMEMORY;
7381
0
    }
7382
163
  match_data->heapframes_size = heapframes_size;
7383
163
  }
7384
7385
/* Write to the ovector within the first frame to mark every capture unset and
7386
to avoid uninitialized memory read errors when it is copied to a new frame. */
7387
7388
3.52k
memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff,
7389
3.52k
  frame_size - offsetof(heapframe, ovector));
7390
7391
/* Pointers to the individual character tables */
7392
7393
3.52k
mb->lcc = re->tables + lcc_offset;
7394
3.52k
mb->fcc = re->tables + fcc_offset;
7395
3.52k
mb->ctypes = re->tables + ctypes_offset;
7396
7397
/* Set up the first code unit to match, if available. If there's no first code
7398
unit there may be a bitmap of possible first characters. */
7399
7400
3.52k
if ((re->flags & PCRE2_FIRSTSET) != 0)
7401
677
  {
7402
677
  has_first_cu = TRUE;
7403
677
  first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
7404
677
  if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
7405
9
    {
7406
9
    first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
7407
9
#ifdef SUPPORT_UNICODE
7408
9
#if PCRE2_CODE_UNIT_WIDTH == 8
7409
9
    if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
7410
#else
7411
    if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
7412
#endif
7413
9
#endif  /* SUPPORT_UNICODE */
7414
9
    }
7415
677
  }
7416
2.84k
else
7417
2.84k
  if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
7418
1.08k
    start_bits = re->start_bitmap;
7419
7420
/* There may also be a "last known required character" set. */
7421
7422
3.52k
if ((re->flags & PCRE2_LASTSET) != 0)
7423
590
  {
7424
590
  has_req_cu = TRUE;
7425
590
  req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
7426
590
  if ((re->flags & PCRE2_LASTCASELESS) != 0)
7427
59
    {
7428
59
    req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
7429
59
#ifdef SUPPORT_UNICODE
7430
59
#if PCRE2_CODE_UNIT_WIDTH == 8
7431
59
    if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
7432
#else
7433
    if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
7434
#endif
7435
59
#endif  /* SUPPORT_UNICODE */
7436
59
    }
7437
590
  }
7438
7439
7440
/* ==========================================================================*/
7441
7442
/* Loop for handling unanchored repeated matching attempts; for anchored regexs
7443
the loop runs just once. */
7444
7445
3.52k
#ifdef SUPPORT_UNICODE
7446
3.52k
FRAGMENT_RESTART:
7447
3.52k
#endif
7448
7449
3.52k
start_partial = match_partial = NULL;
7450
3.52k
mb->hitend = FALSE;
7451
7452
3.52k
#if PCRE2_CODE_UNIT_WIDTH == 8
7453
3.52k
memchr_found_first_cu = NULL;
7454
3.52k
memchr_found_first_cu2 = NULL;
7455
3.52k
#endif
7456
7457
3.52k
for(;;)
7458
241k
  {
7459
241k
  PCRE2_SPTR new_start_match;
7460
7461
  /* ----------------- Start of match optimizations ---------------- */
7462
7463
  /* There are some optimizations that avoid running the match if a known
7464
  starting point is not found, or if a known later code unit is not present.
7465
  However, there is an option (settable at compile time) that disables these,
7466
  for testing and for ensuring that all callouts do actually occur. */
7467
7468
241k
  if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
7469
241k
    {
7470
    /* If firstline is TRUE, the start of the match is constrained to the first
7471
    line of a multiline string. That is, the match must be before or at the
7472
    first newline following the start of matching. Temporarily adjust
7473
    end_subject so that we stop the scans for a first code unit at a newline.
7474
    If the match fails at the newline, later code breaks the loop. */
7475
7476
241k
    if (firstline)
7477
0
      {
7478
0
      PCRE2_SPTR t = start_match;
7479
0
#ifdef SUPPORT_UNICODE
7480
0
      if (utf)
7481
0
        {
7482
0
        while (t < end_subject && !IS_NEWLINE(t))
7483
0
          {
7484
0
          t++;
7485
0
          ACROSSCHAR(t < end_subject, t, t++);
7486
0
          }
7487
0
        }
7488
0
      else
7489
0
#endif
7490
0
      while (t < end_subject && !IS_NEWLINE(t)) t++;
7491
0
      end_subject = t;
7492
0
      }
7493
7494
    /* Anchored: check the first code unit if one is recorded. This may seem
7495
    pointless but it can help in detecting a no match case without scanning for
7496
    the required code unit. */
7497
7498
241k
    if (anchored)
7499
346
      {
7500
346
      if (has_first_cu || start_bits != NULL)
7501
51
        {
7502
51
        BOOL ok = start_match < end_subject;
7503
51
        if (ok)
7504
49
          {
7505
49
          PCRE2_UCHAR c = UCHAR21TEST(start_match);
7506
49
          ok = has_first_cu && (c == first_cu || c == first_cu2);
7507
49
          if (!ok && start_bits != NULL)
7508
33
            {
7509
#if PCRE2_CODE_UNIT_WIDTH != 8
7510
            if (c > 255) c = 255;
7511
#endif
7512
33
            ok = (start_bits[c/8] & (1u << (c&7))) != 0;
7513
33
            }
7514
49
          }
7515
51
        if (!ok)
7516
18
          {
7517
18
          rc = MATCH_NOMATCH;
7518
18
          break;
7519
18
          }
7520
51
        }
7521
346
      }
7522
7523
    /* Not anchored. Advance to a unique first code unit if there is one. */
7524
7525
241k
    else
7526
241k
      {
7527
241k
      if (has_first_cu)
7528
1.10k
        {
7529
1.10k
        if (first_cu != first_cu2)  /* Caseless */
7530
14
          {
7531
          /* In 16-bit and 32_bit modes we have to do our own search, so can
7532
          look for both cases at once. */
7533
7534
#if PCRE2_CODE_UNIT_WIDTH != 8
7535
          PCRE2_UCHAR smc;
7536
          while (start_match < end_subject &&
7537
                (smc = UCHAR21TEST(start_match)) != first_cu &&
7538
                 smc != first_cu2)
7539
            start_match++;
7540
#else
7541
          /* In 8-bit mode, the use of memchr() gives a big speed up, even
7542
          though we have to call it twice in order to find the earliest
7543
          occurrence of the code unit in either of its cases. Caching is used
7544
          to remember the positions of previously found code units. This can
7545
          make a huge difference when the strings are very long and only one
7546
          case is actually present. */
7547
7548
14
          PCRE2_SPTR pp1 = NULL;
7549
14
          PCRE2_SPTR pp2 = NULL;
7550
14
          PCRE2_SIZE searchlength = end_subject - start_match;
7551
7552
          /* If we haven't got a previously found position for first_cu, or if
7553
          the current starting position is later, we need to do a search. If
7554
          the code unit is not found, set it to the end. */
7555
7556
14
          if (memchr_found_first_cu == NULL ||
7557
5
              start_match > memchr_found_first_cu)
7558
11
            {
7559
11
            pp1 = memchr(start_match, first_cu, searchlength);
7560
11
            memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
7561
11
            }
7562
7563
          /* If the start is before a previously found position, use the
7564
          previous position, or NULL if a previous search failed. */
7565
7566
3
          else pp1 = (memchr_found_first_cu == end_subject)? NULL :
7567
3
            memchr_found_first_cu;
7568
7569
          /* Do the same thing for the other case. */
7570
7571
14
          if (memchr_found_first_cu2 == NULL ||
7572
5
              start_match > memchr_found_first_cu2)
7573
12
            {
7574
12
            pp2 = memchr(start_match, first_cu2, searchlength);
7575
12
            memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
7576
12
            }
7577
7578
2
          else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
7579
2
            memchr_found_first_cu2;
7580
7581
          /* Set the start to the end of the subject if neither case was found.
7582
          Otherwise, use the earlier found point. */
7583
7584
14
          if (pp1 == NULL)
7585
10
            start_match = (pp2 == NULL)? end_subject : pp2;
7586
4
          else
7587
4
            start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
7588
7589
14
#endif  /* 8-bit handling */
7590
14
          }
7591
7592
        /* The caseful case is much simpler. */
7593
7594
1.09k
        else
7595
1.09k
          {
7596
#if PCRE2_CODE_UNIT_WIDTH != 8
7597
          while (start_match < end_subject && UCHAR21TEST(start_match) !=
7598
                 first_cu)
7599
            start_match++;
7600
#else
7601
1.09k
          start_match = memchr(start_match, first_cu, end_subject - start_match);
7602
1.09k
          if (start_match == NULL) start_match = end_subject;
7603
1.09k
#endif
7604
1.09k
          }
7605
7606
        /* If we can't find the required first code unit, having reached the
7607
        true end of the subject, break the bumpalong loop, to force a match
7608
        failure, except when doing partial matching, when we let the next cycle
7609
        run at the end of the subject. To see why, consider the pattern
7610
        /(?<=abc)def/, which partially matches "abc", even though the string
7611
        does not contain the starting character "d". If we have not reached the
7612
        true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
7613
        temporarily modified) we also let the cycle run, because the matching
7614
        string is legitimately allowed to start with the first code unit of a
7615
        newline. */
7616
7617
1.10k
        if (mb->partial == 0 && start_match >= mb->end_subject)
7618
247
          {
7619
247
          rc = MATCH_NOMATCH;
7620
247
          break;
7621
247
          }
7622
1.10k
        }
7623
7624
      /* If there's no first code unit, advance to just after a linebreak for a
7625
      multiline match if required. */
7626
7627
240k
      else if (startline)
7628
3
        {
7629
3
        if (start_match > mb->start_subject + start_offset)
7630
2
          {
7631
2
#ifdef SUPPORT_UNICODE
7632
2
          if (utf)
7633
0
            {
7634
0
            while (start_match < end_subject && !WAS_NEWLINE(start_match))
7635
0
              {
7636
0
              start_match++;
7637
0
              ACROSSCHAR(start_match < end_subject, start_match, start_match++);
7638
0
              }
7639
0
            }
7640
2
          else
7641
2
#endif
7642
60
          while (start_match < end_subject && !WAS_NEWLINE(start_match))
7643
58
            start_match++;
7644
7645
          /* If we have just passed a CR and the newline option is ANY or
7646
          ANYCRLF, and we are now at a LF, advance the match position by one
7647
          more code unit. */
7648
7649
2
          if (start_match[-1] == CHAR_CR &&
7650
0
               (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
7651
0
               start_match < end_subject &&
7652
0
               UCHAR21TEST(start_match) == CHAR_NL)
7653
0
            start_match++;
7654
2
          }
7655
3
        }
7656
7657
      /* If there's no first code unit or a requirement for a multiline line
7658
      start, advance to a non-unique first code unit if any have been
7659
      identified. The bitmap contains only 256 bits. When code units are 16 or
7660
      32 bits wide, all code units greater than 254 set the 255 bit. */
7661
7662
240k
      else if (start_bits != NULL)
7663
35.3k
        {
7664
65.1k
        while (start_match < end_subject)
7665
64.6k
          {
7666
64.6k
          uint32_t c = UCHAR21TEST(start_match);
7667
#if PCRE2_CODE_UNIT_WIDTH != 8
7668
          if (c > 255) c = 255;
7669
#endif
7670
64.6k
          if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
7671
29.8k
          start_match++;
7672
29.8k
          }
7673
7674
        /* See comment above in first_cu checking about the next few lines. */
7675
7676
35.3k
        if (mb->partial == 0 && start_match >= mb->end_subject)
7677
485
          {
7678
485
          rc = MATCH_NOMATCH;
7679
485
          break;
7680
485
          }
7681
35.3k
        }
7682
241k
      }   /* End first code unit handling */
7683
7684
    /* Restore fudged end_subject */
7685
7686
240k
    end_subject = mb->end_subject;
7687
7688
    /* The following two optimizations must be disabled for partial matching. */
7689
7690
240k
    if (mb->partial == 0)
7691
240k
      {
7692
240k
      PCRE2_SPTR p;
7693
7694
      /* The minimum matching length is a lower bound; no string of that length
7695
      may actually match the pattern. Although the value is, strictly, in
7696
      characters, we treat it as code units to avoid spending too much time in
7697
      this optimization. */
7698
7699
240k
      if (end_subject - start_match < re->minlength)
7700
1.54k
        {
7701
1.54k
        rc = MATCH_NOMATCH;
7702
1.54k
        break;
7703
1.54k
        }
7704
7705
      /* If req_cu is set, we know that that code unit must appear in the
7706
      subject for the (non-partial) match to succeed. If the first code unit is
7707
      set, req_cu must be later in the subject; otherwise the test starts at
7708
      the match point. This optimization can save a huge amount of backtracking
7709
      in patterns with nested unlimited repeats that aren't going to match.
7710
      Writing separate code for caseful/caseless versions makes it go faster,
7711
      as does using an autoincrement and backing off on a match. As in the case
7712
      of the first code unit, using memchr() in the 8-bit library gives a big
7713
      speed up. Unlike the first_cu check above, we do not need to call
7714
      memchr() twice in the caseless case because we only need to check for the
7715
      presence of the character in either case, not find the first occurrence.
7716
7717
      The search can be skipped if the code unit was found later than the
7718
      current starting point in a previous iteration of the bumpalong loop.
7719
7720
      HOWEVER: when the subject string is very, very long, searching to its end
7721
      can take a long time, and give bad performance on quite ordinary
7722
      anchored patterns. This showed up when somebody was matching something
7723
      like /^\d+C/ on a 32-megabyte string... so we don't do this when the
7724
      string is sufficiently long, but it's worth searching a lot more for
7725
      unanchored patterns. */
7726
7727
239k
      p = start_match + (has_first_cu? 1:0);
7728
239k
      if (has_req_cu && p > req_cu_ptr)
7729
1.50k
        {
7730
1.50k
        PCRE2_SIZE check_length = end_subject - start_match;
7731
7732
1.50k
        if (check_length < REQ_CU_MAX ||
7733
0
              (!anchored && check_length < REQ_CU_MAX * 1000))
7734
1.50k
          {
7735
1.50k
          if (req_cu != req_cu2)  /* Caseless */
7736
44
            {
7737
#if PCRE2_CODE_UNIT_WIDTH != 8
7738
            while (p < end_subject)
7739
              {
7740
              uint32_t pp = UCHAR21INCTEST(p);
7741
              if (pp == req_cu || pp == req_cu2) { p--; break; }
7742
              }
7743
#else  /* 8-bit code units */
7744
44
            PCRE2_SPTR pp = p;
7745
44
            p = memchr(pp, req_cu, end_subject - pp);
7746
44
            if (p == NULL)
7747
27
              {
7748
27
              p = memchr(pp, req_cu2, end_subject - pp);
7749
27
              if (p == NULL) p = end_subject;
7750
27
              }
7751
44
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
7752
44
            }
7753
7754
          /* The caseful case */
7755
7756
1.45k
          else
7757
1.45k
            {
7758
#if PCRE2_CODE_UNIT_WIDTH != 8
7759
            while (p < end_subject)
7760
              {
7761
              if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
7762
              }
7763
7764
#else  /* 8-bit code units */
7765
1.45k
            p = memchr(p, req_cu, end_subject - p);
7766
1.45k
            if (p == NULL) p = end_subject;
7767
1.45k
#endif
7768
1.45k
            }
7769
7770
          /* If we can't find the required code unit, break the bumpalong loop,
7771
          forcing a match failure. */
7772
7773
1.50k
          if (p >= end_subject)
7774
170
            {
7775
170
            rc = MATCH_NOMATCH;
7776
170
            break;
7777
170
            }
7778
7779
          /* If we have found the required code unit, save the point where we
7780
          found it, so that we don't search again next time round the bumpalong
7781
          loop if the start hasn't yet passed this code unit. */
7782
7783
1.33k
          req_cu_ptr = p;
7784
1.33k
          }
7785
1.50k
        }
7786
239k
      }
7787
240k
    }
7788
7789
  /* ------------ End of start of match optimizations ------------ */
7790
7791
  /* Give no match if we have passed the bumpalong limit. */
7792
7793
239k
  if (start_match > bumpalong_limit)
7794
0
    {
7795
0
    rc = MATCH_NOMATCH;
7796
0
    break;
7797
0
    }
7798
7799
  /* OK, we can now run the match. If "hitend" is set afterwards, remember the
7800
  first starting point for which a partial match was found. */
7801
7802
239k
  cb.start_match = (PCRE2_SIZE)(start_match - subject);
7803
239k
  cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
7804
7805
239k
  mb->start_used_ptr = start_match;
7806
239k
  mb->last_used_ptr = start_match;
7807
239k
#ifdef SUPPORT_UNICODE
7808
239k
  mb->moptions = options | fragment_options;
7809
#else
7810
  mb->moptions = options;
7811
#endif
7812
239k
  mb->match_call_count = 0;
7813
239k
  mb->end_offset_top = 0;
7814
239k
  mb->skip_arg_count = 0;
7815
7816
#ifdef DEBUG_SHOW_OPS
7817
  fprintf(stderr, "++ Calling match()\n");
7818
#endif
7819
7820
239k
  rc = match(start_match, mb->start_code, re->top_bracket, frame_size,
7821
239k
    match_data, mb);
7822
7823
#ifdef DEBUG_SHOW_OPS
7824
  fprintf(stderr, "++ match() returned %d\n\n", rc);
7825
#endif
7826
7827
239k
  if (mb->hitend && start_partial == NULL)
7828
0
    {
7829
0
    start_partial = mb->start_used_ptr;
7830
0
    match_partial = start_match;
7831
0
    }
7832
7833
239k
  switch(rc)
7834
239k
    {
7835
    /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7836
    the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7837
    entirely. The only way we can do that is to re-do the match at the same
7838
    point, with a flag to force SKIP with an argument to be ignored. Just
7839
    treating this case as NOMATCH does not work because it does not check other
7840
    alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7841
7842
0
    case MATCH_SKIP_ARG:
7843
0
    new_start_match = start_match;
7844
0
    mb->ignore_skip_arg = mb->skip_arg_count;
7845
0
    break;
7846
7847
    /* SKIP passes back the next starting point explicitly, but if it is no
7848
    greater than the match we have just done, treat it as NOMATCH. */
7849
7850
0
    case MATCH_SKIP:
7851
0
    if (mb->verb_skip_ptr > start_match)
7852
0
      {
7853
0
      new_start_match = mb->verb_skip_ptr;
7854
0
      break;
7855
0
      }
7856
    /* Fall through */
7857
7858
    /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7859
    exactly like PRUNE. Unset ignore SKIP-with-argument. */
7860
7861
238k
    case MATCH_NOMATCH:
7862
238k
    case MATCH_PRUNE:
7863
238k
    case MATCH_THEN:
7864
238k
    mb->ignore_skip_arg = 0;
7865
238k
    new_start_match = start_match + 1;
7866
238k
#ifdef SUPPORT_UNICODE
7867
238k
    if (utf)
7868
53.0k
      ACROSSCHAR(new_start_match < end_subject, new_start_match,
7869
238k
        new_start_match++);
7870
238k
#endif
7871
238k
    break;
7872
7873
    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
7874
7875
0
    case MATCH_COMMIT:
7876
0
    rc = MATCH_NOMATCH;
7877
0
    goto ENDLOOP;
7878
7879
    /* Any other return is either a match, or some kind of error. */
7880
7881
740
    default:
7882
740
    goto ENDLOOP;
7883
239k
    }
7884
7885
  /* Control reaches here for the various types of "no match at this point"
7886
  result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7887
7888
238k
  rc = MATCH_NOMATCH;
7889
7890
  /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
7891
  newline in the subject (though it may continue over the newline). Therefore,
7892
  if we have just failed to match, starting at a newline, do not continue. */
7893
7894
238k
  if (firstline && IS_NEWLINE(start_match)) break;
7895
7896
  /* Advance to new matching position */
7897
7898
238k
  start_match = new_start_match;
7899
7900
  /* Break the loop if the pattern is anchored or if we have passed the end of
7901
  the subject. */
7902
7903
238k
  if (anchored || start_match > end_subject) break;
7904
7905
  /* If we have just passed a CR and we are now at a LF, and the pattern does
7906
  not contain any explicit matches for \r or \n, and the newline option is CRLF
7907
  or ANY or ANYCRLF, advance the match position by one more code unit. In
7908
  normal matching start_match will aways be greater than the first position at
7909
  this stage, but a failed *SKIP can cause a return at the same point, which is
7910
  why the first test exists. */
7911
7912
238k
  if (start_match > subject + start_offset &&
7913
238k
      start_match[-1] == CHAR_CR &&
7914
1.45k
      start_match < end_subject &&
7915
1.44k
      *start_match == CHAR_NL &&
7916
252
      (re->flags & PCRE2_HASCRORLF) == 0 &&
7917
192
        (mb->nltype == NLTYPE_ANY ||
7918
192
         mb->nltype == NLTYPE_ANYCRLF ||
7919
192
         mb->nllen == 2))
7920
0
    start_match++;
7921
7922
238k
  mb->mark = NULL;   /* Reset for start of next match attempt */
7923
238k
  }                  /* End of for(;;) "bumpalong" loop */
7924
7925
/* ==========================================================================*/
7926
7927
/* When we reach here, one of the following stopping conditions is true:
7928
7929
(1) The match succeeded, either completely, or partially;
7930
7931
(2) The pattern is anchored or the match was failed after (*COMMIT);
7932
7933
(3) We are past the end of the subject or the bumpalong limit;
7934
7935
(4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
7936
    this option requests that a match occur at or before the first newline in
7937
    the subject.
7938
7939
(5) Some kind of error occurred.
7940
7941
*/
7942
7943
3.52k
ENDLOOP:
7944
7945
/* If end_subject != true_end_subject, it means we are handling invalid UTF,
7946
and have just processed a non-terminal fragment. If this resulted in no match
7947
or a partial match we must carry on to the next fragment (a partial match is
7948
returned to the caller only at the very end of the subject). A loop is used to
7949
avoid trying to match against empty fragments; if the pattern can match an
7950
empty string it would have done so already. */
7951
7952
3.52k
#ifdef SUPPORT_UNICODE
7953
3.52k
if (utf && end_subject != true_end_subject &&
7954
0
    (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
7955
0
  {
7956
0
  for (;;)
7957
0
    {
7958
    /* Advance past the first bad code unit, and then skip invalid character
7959
    starting code units in 8-bit and 16-bit modes. */
7960
7961
0
    start_match = end_subject + 1;
7962
7963
0
#if PCRE2_CODE_UNIT_WIDTH != 32
7964
0
    while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
7965
0
      start_match++;
7966
0
#endif
7967
7968
    /* If we have hit the end of the subject, there isn't another non-empty
7969
    fragment, so give up. */
7970
7971
0
    if (start_match >= true_end_subject)
7972
0
      {
7973
0
      rc = MATCH_NOMATCH;  /* In case it was partial */
7974
0
      match_partial = NULL;
7975
0
      break;
7976
0
      }
7977
7978
    /* Check the rest of the subject */
7979
7980
0
    mb->check_subject = start_match;
7981
0
    rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
7982
0
      &(match_data->startchar));
7983
7984
    /* The rest of the subject is valid UTF. */
7985
7986
0
    if (rc == 0)
7987
0
      {
7988
0
      mb->end_subject = end_subject = true_end_subject;
7989
0
      fragment_options = PCRE2_NOTBOL;
7990
0
      goto FRAGMENT_RESTART;
7991
0
      }
7992
7993
    /* A subsequent UTF error has been found; if the next fragment is
7994
    non-empty, set up to process it. Otherwise, let the loop advance. */
7995
7996
0
    else if (rc < 0)
7997
0
      {
7998
0
      mb->end_subject = end_subject = start_match + match_data->startchar;
7999
0
      if (end_subject > start_match)
8000
0
        {
8001
0
        fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
8002
0
        goto FRAGMENT_RESTART;
8003
0
        }
8004
0
      }
8005
0
    }
8006
0
  }
8007
3.52k
#endif  /* SUPPORT_UNICODE */
8008
8009
/* Fill in fields that are always returned in the match data. */
8010
8011
3.52k
match_data->code = re;
8012
3.52k
match_data->mark = mb->mark;
8013
3.52k
match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
8014
8015
/* Handle a fully successful match. Set the return code to the number of
8016
captured strings, or 0 if there were too many to fit into the ovector, and then
8017
set the remaining returned values before returning. Make a copy of the subject
8018
string if requested. */
8019
8020
3.52k
if (rc == MATCH_MATCH)
8021
735
  {
8022
735
  match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
8023
735
    0 : (int)mb->end_offset_top/2 + 1;
8024
735
  match_data->subject_length = length;
8025
735
  match_data->startchar = start_match - subject;
8026
735
  match_data->leftchar = mb->start_used_ptr - subject;
8027
735
  match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
8028
555
    mb->last_used_ptr : mb->end_match_ptr) - subject;
8029
735
  if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
8030
0
    {
8031
0
    length = CU2BYTES(length + was_zero_terminated);
8032
0
    match_data->subject = match_data->memctl.malloc(length,
8033
0
      match_data->memctl.memory_data);
8034
0
    if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
8035
0
    memcpy((void *)match_data->subject, subject, length);
8036
0
    match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
8037
0
    }
8038
735
  else match_data->subject = subject;
8039
8040
735
  return match_data->rc;
8041
735
  }
8042
8043
/* Control gets here if there has been a partial match, an error, or if the
8044
overall match attempt has failed at all permitted starting positions. Any mark
8045
data is in the nomatch_mark field. */
8046
8047
2.78k
match_data->mark = mb->nomatch_mark;
8048
8049
/* For anything other than nomatch or partial match, just return the code. */
8050
8051
2.78k
if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
8052
8053
/* Handle a partial match. If a "soft" partial match was requested, searching
8054
for a complete match will have continued, and the value of rc at this point
8055
will be MATCH_NOMATCH. For a "hard" partial match, it will already be
8056
PCRE2_ERROR_PARTIAL. */
8057
8058
2.78k
else if (match_partial != NULL)
8059
0
  {
8060
0
  match_data->subject = subject;
8061
0
  match_data->subject_length = length;
8062
0
  match_data->ovector[0] = match_partial - subject;
8063
0
  match_data->ovector[1] = end_subject - subject;
8064
0
  match_data->startchar = match_partial - subject;
8065
0
  match_data->leftchar = start_partial - subject;
8066
0
  match_data->rightchar = end_subject - subject;
8067
0
  match_data->rc = PCRE2_ERROR_PARTIAL;
8068
0
  }
8069
8070
/* Else this is the classic nomatch case. */
8071
8072
2.78k
else match_data->rc = PCRE2_ERROR_NOMATCH;
8073
8074
2.78k
return match_data->rc;
8075
3.52k
}
8076
8077
/* These #undefs are here to enable unity builds with CMake. */
8078
8079
#undef NLBLOCK /* Block containing newline information */
8080
#undef PSSTART /* Field containing processed string start */
8081
#undef PSEND   /* Field containing processed string end */
8082
8083
/* End of pcre2_match.c */