Coverage Report

Created: 2025-07-23 06:33

/src/php-src/ext/pcre/pcre2lib/pcre2_match.c
Line
Count
Source (jump to first uncovered line)
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2015-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#include "pcre2_internal.h"
47
48
/* These defines enable debugging code */
49
50
/* #define DEBUG_FRAMES_DISPLAY */
51
/* #define DEBUG_SHOW_OPS */
52
/* #define DEBUG_SHOW_RMATCH */
53
54
#ifdef DEBUG_FRAMES_DISPLAY
55
#include <stdarg.h>
56
#endif
57
58
#ifdef DEBUG_SHOW_OPS
59
static const char *OP_names[] = { OP_NAME_LIST };
60
#endif
61
62
/* These defines identify the name of the block containing "static"
63
information, and fields within it. */
64
65
110M
#define NLBLOCK mb              /* Block containing newline information */
66
395k
#define PSSTART start_subject   /* Field containing processed string start */
67
27.2M
#define PSEND   end_subject     /* Field containing processed string end */
68
69
462k
#define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */
70
71
/* Masks for identifying the public options that are permitted at match time. */
72
73
#define PUBLIC_MATCH_OPTIONS \
74
145k
  (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
75
145k
   PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
76
145k
   PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT| \
77
145k
   PCRE2_DISABLE_RECURSELOOP_CHECK)
78
79
#define PUBLIC_JIT_MATCH_OPTIONS \
80
   (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
81
    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
82
    PCRE2_COPY_MATCHED_SUBJECT)
83
84
/* Non-error returns from and within the match() function. Error returns are
85
externally defined PCRE2_ERROR_xxx codes, which are all negative. */
86
87
217k
#define MATCH_MATCH        1
88
448M
#define MATCH_NOMATCH      0
89
90
/* Special internal returns used in the match() function. Make them
91
sufficiently negative to avoid the external error codes. */
92
93
2.80k
#define MATCH_ACCEPT       (-999)
94
1.68k
#define MATCH_KETRPOS      (-998)
95
/* The next 5 must be kept together and in sequence so that a test that checks
96
for any one of them can use a range. */
97
1.01k
#define MATCH_COMMIT       (-997)
98
390k
#define MATCH_PRUNE        (-996)
99
0
#define MATCH_SKIP         (-995)
100
0
#define MATCH_SKIP_ARG     (-994)
101
2.86M
#define MATCH_THEN         (-993)
102
505
#define MATCH_BACKTRACK_MAX MATCH_THEN
103
505
#define MATCH_BACKTRACK_MIN MATCH_COMMIT
104
105
/* Group frame type values. Zero means the frame is not a group frame. The
106
lower 16 bits are used for data (e.g. the capture number). Group frames are
107
used for most groups so that information about the start is easily available at
108
the end without having to scan back through intermediate frames (backtrack
109
points). */
110
111
307k
#define GF_CAPTURE     0x00010000u
112
732
#define GF_NOCAPTURE   0x00020000u
113
308k
#define GF_CONDASSERT  0x00030000u
114
1.01M
#define GF_RECURSE     0x00040000u
115
116
/* Masks for the identity and data parts of the group frame type. */
117
118
1.32M
#define GF_IDMASK(a)   ((a) & 0xffff0000u)
119
507
#define GF_DATAMASK(a) ((a) & 0x0000ffffu)
120
121
/* Repetition types */
122
123
enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
124
125
/* Min and max values for the common repeats; a maximum of UINT32_MAX =>
126
infinity. */
127
128
static const uint32_t rep_min[] = {
129
  0, 0,       /* * and *? */
130
  1, 1,       /* + and +? */
131
  0, 0,       /* ? and ?? */
132
  0, 0,       /* dummy placefillers for OP_CR[MIN]RANGE */
133
  0, 1, 0 };  /* OP_CRPOS{STAR, PLUS, QUERY} */
134
135
static const uint32_t rep_max[] = {
136
  UINT32_MAX, UINT32_MAX,      /* * and *? */
137
  UINT32_MAX, UINT32_MAX,      /* + and +? */
138
  1, 1,                        /* ? and ?? */
139
  0, 0,                        /* dummy placefillers for OP_CR[MIN]RANGE */
140
  UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
141
142
/* Repetition types - must include OP_CRPOSRANGE (not needed above) */
143
144
static const uint32_t rep_typ[] = {
145
  REPTYPE_MAX, REPTYPE_MIN,    /* * and *? */
146
  REPTYPE_MAX, REPTYPE_MIN,    /* + and +? */
147
  REPTYPE_MAX, REPTYPE_MIN,    /* ? and ?? */
148
  REPTYPE_MAX, REPTYPE_MIN,    /* OP_CRRANGE and OP_CRMINRANGE */
149
  REPTYPE_POS, REPTYPE_POS,    /* OP_CRPOSSTAR, OP_CRPOSPLUS */
150
  REPTYPE_POS, REPTYPE_POS };  /* OP_CRPOSQUERY, OP_CRPOSRANGE */
151
152
/* Numbers for RMATCH calls at backtracking points. When these lists are
153
changed, the code at RETURN_SWITCH below must be updated in sync.  */
154
155
enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
156
       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
157
       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
158
       RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39 };
159
160
#ifdef SUPPORT_WIDE_CHARS
161
enum { RM100=100, RM101, RM102, RM103 };
162
#endif
163
164
#ifdef SUPPORT_UNICODE
165
enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
166
       RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
167
       RM216,     RM217, RM218, RM219, RM220, RM221, RM222, RM223,
168
       RM224 };
169
#endif
170
171
/* Define short names for general fields in the current backtrack frame, which
172
is always pointed to by the F variable. Occasional references to fields in
173
other frames are written out explicitly. There are also some fields in the
174
current frame whose names start with "temp" that are used for short-term,
175
localised backtracking memory. These are #defined with Lxxx names at the point
176
of use and undefined afterwards. */
177
178
895M
#define Fback_frame        F->back_frame
179
770k
#define Fcapture_last      F->capture_last
180
953k
#define Fcurrent_recurse   F->current_recurse
181
1.58G
#define Fecode             F->ecode
182
2.58G
#define Feptr              F->eptr
183
448M
#define Fgroup_frame_type  F->group_frame_type
184
2.09M
#define Flast_group_offset F->last_group_offset
185
294M
#define Flength            F->length
186
534k
#define Fmark              F->mark
187
1.34G
#define Frdepth            F->rdepth
188
1.00M
#define Fstart_match       F->start_match
189
1.26M
#define Foffset_top        F->offset_top
190
0
#define Foccu              F->occu
191
1.21G
#define Fop                F->op
192
687k
#define Fovector           F->ovector
193
895M
#define Freturn_id         F->return_id
194
195
196
#ifdef DEBUG_FRAMES_DISPLAY
197
/*************************************************
198
*      Display current frames and contents       *
199
*************************************************/
200
201
/* This debugging function displays the current set of frames and their
202
contents. It is not called automatically from anywhere, the intention being
203
that calls can be inserted where necessary when debugging frame-related
204
problems.
205
206
Arguments:
207
  f           the file to write to
208
  F           the current top frame
209
  P           a previous frame of interest
210
  frame_size  the frame size
211
  mb          points to the match block
212
  match_data  points to the match data block
213
  s           identification text
214
215
Returns:    nothing
216
*/
217
218
static void
219
display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
220
  match_block *mb, pcre2_match_data *match_data, const char *s, ...)
221
{
222
uint32_t i;
223
heapframe *Q;
224
va_list ap;
225
va_start(ap, s);
226
227
fprintf(f, "FRAMES ");
228
vfprintf(f, s, ap);
229
va_end(ap);
230
231
if (P != NULL) fprintf(f, " P=%lu",
232
  ((char *)P - (char *)(match_data->heapframes))/frame_size);
233
fprintf(f, "\n");
234
235
for (i = 0, Q = match_data->heapframes;
236
     Q <= F;
237
     i++, Q = (heapframe *)((char *)Q + frame_size))
238
  {
239
  fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
240
    i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
241
    Q->back_frame, Q->return_id);
242
243
  if (Q->last_group_offset == PCRE2_UNSET)
244
    fprintf(f, " lgoffset=unset\n");
245
  else
246
    fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
247
  }
248
}
249
250
#endif
251
252
253
254
/*************************************************
255
*                Process a callout               *
256
*************************************************/
257
258
/* This function is called for all callouts, whether "standalone" or at the
259
start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
260
OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
261
with fixed values.
262
263
Arguments:
264
  F          points to the current backtracking frame
265
  mb         points to the match block
266
  lengthptr  where to return the length of the callout item
267
268
Returns:     the return from the callout
269
             or 0 if no callout function exists
270
*/
271
272
static int
273
do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
274
0
{
275
0
int rc;
276
0
PCRE2_SIZE save0, save1;
277
0
PCRE2_SIZE *callout_ovector;
278
0
pcre2_callout_block *cb;
279
280
0
*lengthptr = (*Fecode == OP_CALLOUT)?
281
0
  PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
282
283
0
if (mb->callout == NULL) return 0;   /* No callout function provided */
284
285
/* The original matching code (pre 10.30) worked directly with the ovector
286
passed by the user, and this was passed to callouts. Now that the working
287
ovector is in the backtracking frame, it no longer needs to reserve space for
288
the overall match offsets (which would waste space in the frame). For backward
289
compatibility, however, we pass capture_top and offset_vector to the callout as
290
if for the extended ovector, and we ensure that the first two slots are unset
291
by preserving and restoring their current contents. Picky compilers complain if
292
references such as Fovector[-2] are use directly, so we set up a separate
293
pointer. */
294
295
0
callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
296
297
/* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
298
are set externally. The first 3 never change; the last is updated for each
299
bumpalong. */
300
301
0
cb = mb->cb;
302
0
cb->capture_top      = (uint32_t)Foffset_top/2 + 1;
303
0
cb->capture_last     = Fcapture_last;
304
0
cb->offset_vector    = callout_ovector;
305
0
cb->mark             = mb->nomatch_mark;
306
0
cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
307
0
cb->pattern_position = GET(Fecode, 1);
308
0
cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
309
310
0
if (*Fecode == OP_CALLOUT)  /* Numerical callout */
311
0
  {
312
0
  cb->callout_number = Fecode[1 + 2*LINK_SIZE];
313
0
  cb->callout_string_offset = 0;
314
0
  cb->callout_string = NULL;
315
0
  cb->callout_string_length = 0;
316
0
  }
317
0
else  /* String callout */
318
0
  {
319
0
  cb->callout_number = 0;
320
0
  cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
321
0
  cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
322
0
  cb->callout_string_length =
323
0
    *lengthptr - (1 + 4*LINK_SIZE) - 2;
324
0
  }
325
326
0
save0 = callout_ovector[0];
327
0
save1 = callout_ovector[1];
328
0
callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
329
0
rc = mb->callout(cb, mb->callout_data);
330
0
callout_ovector[0] = save0;
331
0
callout_ovector[1] = save1;
332
0
cb->callout_flags = 0;
333
0
return rc;
334
0
}
335
336
337
338
/*************************************************
339
*          Match a back-reference                *
340
*************************************************/
341
342
/* This function is called only when it is known that the offset lies within
343
the offsets that have so far been used in the match. Note that in caseless
344
UTF-8 mode, the number of subject bytes matched may be different to the number
345
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
346
seems unlikely.)
347
348
Arguments:
349
  offset      index into the offset vector
350
  caseless    TRUE if caseless
351
  caseopts    bitmask of REFI_FLAG_XYZ values
352
  F           the current backtracking frame pointer
353
  mb          points to match block
354
  lengthptr   pointer for returning the length matched
355
356
Returns:      = 0 sucessful match; number of code units matched is set
357
              < 0 no match
358
              > 0 partial match
359
*/
360
361
static int
362
match_ref(PCRE2_SIZE offset, BOOL caseless, int caseopts, heapframe *F,
363
  match_block *mb, PCRE2_SIZE *lengthptr)
364
0
{
365
0
PCRE2_SPTR p;
366
0
PCRE2_SIZE length;
367
0
PCRE2_SPTR eptr;
368
0
PCRE2_SPTR eptr_start;
369
370
/* Deal with an unset group. The default is no match, but there is an option to
371
match an empty string. */
372
373
0
if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
374
0
  {
375
0
  if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
376
0
    {
377
0
    *lengthptr = 0;
378
0
    return 0;      /* Match */
379
0
    }
380
0
  else return -1;  /* No match */
381
0
  }
382
383
/* Separate the caseless and UTF cases for speed. */
384
385
0
eptr = eptr_start = Feptr;
386
0
p = mb->start_subject + Fovector[offset];
387
0
length = Fovector[offset+1] - Fovector[offset];
388
389
0
if (caseless)
390
0
  {
391
0
#if defined SUPPORT_UNICODE
392
0
  BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
393
0
  BOOL caseless_restrict = (caseopts & REFI_FLAG_CASELESS_RESTRICT) != 0;
394
0
  BOOL turkish_casing = !caseless_restrict && (caseopts & REFI_FLAG_TURKISH_CASING) != 0;
395
396
0
  if (utf || (mb->poptions & PCRE2_UCP) != 0)
397
0
    {
398
0
    PCRE2_SPTR endptr = p + length;
399
400
    /* Match characters up to the end of the reference. NOTE: the number of
401
    code units matched may differ, because in UTF-8 there are some characters
402
    whose upper and lower case codes have different numbers of bytes. For
403
    example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
404
    bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
405
    sequence of two of the latter. It is important, therefore, to check the
406
    length along the reference, not along the subject (earlier code did this
407
    wrong). UCP without uses Unicode properties but without UTF encoding. */
408
409
0
    while (p < endptr)
410
0
      {
411
0
      uint32_t c, d;
412
0
      const ucd_record *ur;
413
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
414
415
0
      if (utf)
416
0
        {
417
0
        GETCHARINC(c, eptr);
418
0
        GETCHARINC(d, p);
419
0
        }
420
0
      else
421
0
        {
422
0
        c = *eptr++;
423
0
        d = *p++;
424
0
        }
425
426
0
      if (turkish_casing && UCD_ANY_I(d))
427
0
        {
428
0
        c = UCD_FOLD_I_TURKISH(c);
429
0
        d = UCD_FOLD_I_TURKISH(d);
430
0
        if (c != d) return -1;  /* No match */
431
0
        }
432
0
      else if (c != d && c != (uint32_t)((int)d + (ur = GET_UCD(d))->other_case))
433
0
        {
434
0
        const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
435
436
        /* When PCRE2_EXTRA_CASELESS_RESTRICT is set, ignore any caseless sets
437
        that start with an ASCII character. */
438
0
        if (caseless_restrict && *pp < 128) return -1;  /* No match */
439
440
0
        for (;;)
441
0
          {
442
0
          if (c < *pp) return -1;  /* No match */
443
0
          if (c == *pp++) break;
444
0
          }
445
0
        }
446
0
      }
447
0
    }
448
0
  else
449
0
#endif
450
451
  /* Not in UTF or UCP mode */
452
0
    {
453
0
    for (; length > 0; length--)
454
0
      {
455
0
      uint32_t cc, cp;
456
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
457
0
      cc = UCHAR21TEST(eptr);
458
0
      cp = UCHAR21TEST(p);
459
0
      if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
460
0
        return -1;  /* No match */
461
0
      p++;
462
0
      eptr++;
463
0
      }
464
0
    }
465
0
  }
466
467
/* In the caseful case, we can just compare the code units, whether or not we
468
are in UTF and/or UCP mode. When partial matching, we have to do this unit by
469
unit. */
470
471
0
else
472
0
  {
473
0
  if (mb->partial != 0)
474
0
    {
475
0
    for (; length > 0; length--)
476
0
      {
477
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
478
0
      if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /* No match */
479
0
      }
480
0
    }
481
482
  /* Not partial matching */
483
484
0
  else
485
0
    {
486
0
    if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
487
0
    if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1;  /* No match */
488
0
    eptr += length;
489
0
    }
490
0
  }
491
492
0
*lengthptr = eptr - eptr_start;
493
0
return 0;  /* Match */
494
0
}
495
496
497
498
/******************************************************************************
499
*******************************************************************************
500
                   "Recursion" in the match() function
501
502
The original match() function was highly recursive, but this proved to be the
503
source of a number of problems over the years, mostly because of the relatively
504
small system stacks that are commonly found. As new features were added to
505
patterns, various kludges were invented to reduce the amount of stack used,
506
making the code hard to understand in places.
507
508
A version did exist that used individual frames on the heap instead of calling
509
match() recursively, but this ran substantially slower. The current version is
510
a refactoring that uses a vector of frames to remember backtracking points.
511
This runs no slower, and possibly even a bit faster than the original recursive
512
implementation.
513
514
At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50
515
frames) was allocated on the system stack. If this was not big enough, the heap
516
was used for a larger vector. However, it turns out that there are environments
517
where taking as little as 20KiB from the system stack is an embarrassment.
518
After another refactoring, the heap is used exclusively, but a pointer the
519
frames vector and its size are cached in the match_data block, so that there is
520
no new memory allocation if the same match_data block is used for multiple
521
matches (unless the frames vector has to be extended).
522
*******************************************************************************
523
******************************************************************************/
524
525
526
527
528
/*************************************************
529
*       Macros for the match() function          *
530
*************************************************/
531
532
/* These macros pack up tests that are used for partial matching several times
533
in the code. The second one is used when we already know we are past the end of
534
the subject. We set the "hit end" flag if the pointer is at the end of the
535
subject and either (a) the pointer is past the earliest inspected character
536
(i.e. something has been matched, even if not part of the actual matched
537
string), or (b) the pattern contains a lookbehind. These are the conditions for
538
which adding more characters may allow the current match to continue.
539
540
For hard partial matching, we immediately return a partial match. Otherwise,
541
carrying on means that a complete match on the current subject will be sought.
542
A partial match is returned only if no complete match can be found. */
543
544
#define CHECK_PARTIAL() \
545
28.1M
  do { \
546
28.1M
     if (Feptr >= mb->end_subject) \
547
28.1M
       { \
548
1.08M
       SCHECK_PARTIAL(); \
549
1.08M
       } \
550
28.1M
     } \
551
28.1M
  while (0)
552
553
#define SCHECK_PARTIAL() \
554
10.2M
  do { \
555
10.2M
     if (mb->partial != 0 && \
556
10.2M
         (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
557
10.2M
       { \
558
0
       mb->hitend = TRUE; \
559
0
       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
560
0
       } \
561
10.2M
     } \
562
10.2M
  while (0)
563
564
565
/* These macros are used to implement backtracking. They simulate a recursive
566
call to the match() function by means of a local vector of frames which
567
remember the backtracking points. */
568
569
#define RMATCH(ra,rb) \
570
447M
  do { \
571
447M
     start_ecode = ra; \
572
447M
     Freturn_id = rb; \
573
447M
     goto MATCH_RECURSE; \
574
447M
     L_##rb:; \
575
447M
     } \
576
447M
  while (0)
577
578
#define RRETURN(ra) \
579
447M
  do { \
580
447M
     rrc = ra; \
581
447M
     goto RETURN_SWITCH; \
582
447M
     } \
583
447M
  while (0)
584
585
586
587
/*************************************************
588
*         Match from current position            *
589
*************************************************/
590
591
/* This function is called to run one match attempt at a single starting point
592
in the subject.
593
594
Performance note: It might be tempting to extract commonly used fields from the
595
mb structure (e.g. end_subject) into individual variables to improve
596
performance. Tests using gcc on a SPARC disproved this; in the first case, it
597
made performance worse.
598
599
Arguments:
600
   start_eptr   starting character in subject
601
   start_ecode  starting position in compiled code
602
   top_bracket  number of capturing parentheses in the pattern
603
   frame_size   size of each backtracking frame
604
   match_data   pointer to the match_data block
605
   mb           pointer to "static" variables block
606
607
Returns:        MATCH_MATCH if matched            )  these values are >= 0
608
                MATCH_NOMATCH if failed to match  )
609
                negative MATCH_xxx value for PRUNE, SKIP, etc
610
                negative PCRE2_ERROR_xxx value if aborted by an error condition
611
                (e.g. stopped by repeated call or depth limit)
612
*/
613
614
static int
615
match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket,
616
  PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
617
462k
{
618
/* Frame-handling variables */
619
620
462k
heapframe *F;           /* Current frame pointer */
621
462k
heapframe *N = NULL;    /* Temporary frame pointers */
622
462k
heapframe *P = NULL;
623
624
462k
heapframe *frames_top;  /* End of frames vector */
625
462k
heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
626
462k
PCRE2_SIZE frame_copy_size;   /* Amount to copy when creating a new frame */
627
628
/* Local variables that do not need to be preserved over calls to RRMATCH(). */
629
630
462k
PCRE2_SPTR branch_end = NULL;
631
462k
PCRE2_SPTR branch_start;
632
462k
PCRE2_SPTR bracode;     /* Temp pointer to start of group */
633
462k
PCRE2_SIZE offset;      /* Used for group offsets */
634
462k
PCRE2_SIZE length;      /* Used for various length calculations */
635
636
462k
int rrc;                /* Return from functions & backtracking "recursions" */
637
462k
#ifdef SUPPORT_UNICODE
638
462k
int proptype;           /* Type of character property */
639
462k
#endif
640
641
462k
uint32_t i;             /* Used for local loops */
642
462k
uint32_t fc;            /* Character values */
643
462k
uint32_t number;        /* Used for group and other numbers */
644
462k
uint32_t reptype = 0;   /* Type of repetition (0 to avoid compiler warning) */
645
462k
uint32_t group_frame_type;  /* Specifies type for new group frames */
646
647
462k
BOOL condition;         /* Used in conditional groups */
648
462k
BOOL cur_is_word;       /* Used in "word" tests */
649
462k
BOOL prev_is_word;      /* Used in "word" tests */
650
651
/* UTF and UCP flags */
652
653
462k
#ifdef SUPPORT_UNICODE
654
462k
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
655
462k
BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
656
#else
657
BOOL utf = FALSE;  /* Required for convenience even when no Unicode support */
658
#endif
659
660
/* This is the length of the last part of a backtracking frame that must be
661
copied when a new frame is created. */
662
663
462k
frame_copy_size = frame_size - offsetof(heapframe, eptr);
664
665
/* Set up the first frame and the end of the frames vector. */
666
667
462k
F = match_data->heapframes;
668
462k
frames_top = (heapframe *)((char *)F + match_data->heapframes_size);
669
670
462k
Frdepth = 0;                        /* "Recursion" depth */
671
462k
Fcapture_last = 0;                  /* Number of most recent capture */
672
462k
Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
673
462k
Fstart_match = Feptr = start_eptr;  /* Current data pointer and start match */
674
462k
Fmark = NULL;                       /* Most recent mark */
675
462k
Foffset_top = 0;                    /* End of captures within the frame */
676
462k
Flast_group_offset = PCRE2_UNSET;   /* Saved frame of most recent group */
677
462k
group_frame_type = 0;               /* Not a start of group frame */
678
462k
goto NEW_FRAME;                     /* Start processing with this frame */
679
680
/* Come back here when we want to create a new frame for remembering a
681
backtracking point. */
682
683
447M
MATCH_RECURSE:
684
685
/* Set up a new backtracking frame. If the vector is full, get a new one,
686
doubling the size, but constrained by the heap limit (which is in KiB). */
687
688
447M
N = (heapframe *)((char *)F + frame_size);
689
447M
if ((heapframe *)((char *)N + frame_size) >= frames_top)
690
0
  {
691
0
  heapframe *new;
692
0
  PCRE2_SIZE newsize;
693
0
  PCRE2_SIZE usedsize = (char *)N - (char *)(match_data->heapframes);
694
695
0
  if (match_data->heapframes_size >= PCRE2_SIZE_MAX / 2)
696
0
    {
697
0
    if (match_data->heapframes_size == PCRE2_SIZE_MAX - 1)
698
0
      return PCRE2_ERROR_NOMEMORY;
699
0
    newsize = PCRE2_SIZE_MAX - 1;
700
0
    }
701
0
  else
702
0
    newsize = match_data->heapframes_size * 2;
703
704
0
  if (newsize / 1024 >= mb->heap_limit)
705
0
    {
706
0
    PCRE2_SIZE old_size = match_data->heapframes_size / 1024;
707
0
    if (mb->heap_limit <= old_size)
708
0
      return PCRE2_ERROR_HEAPLIMIT;
709
0
    else
710
0
      {
711
0
      PCRE2_SIZE max_delta = 1024 * (mb->heap_limit - old_size);
712
0
      int over_bytes = match_data->heapframes_size % 1024;
713
0
      if (over_bytes) max_delta -= (1024 - over_bytes);
714
0
      newsize = match_data->heapframes_size + max_delta;
715
0
      }
716
0
    }
717
718
  /* With a heap limit set, the permitted additional size may not be enough for
719
  another frame, so do a final check. */
720
721
0
  if (newsize - usedsize < frame_size) return PCRE2_ERROR_HEAPLIMIT;
722
0
  new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data);
723
0
  if (new == NULL) return PCRE2_ERROR_NOMEMORY;
724
0
  memcpy(new, match_data->heapframes, usedsize);
725
726
0
  N = (heapframe *)((char *)new + usedsize);
727
0
  F = (heapframe *)((char *)N - frame_size);
728
729
0
  match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data);
730
0
  match_data->heapframes = new;
731
0
  match_data->heapframes_size = newsize;
732
0
  frames_top = (heapframe *)((char *)new + newsize);
733
0
  }
734
735
#ifdef DEBUG_SHOW_RMATCH
736
fprintf(stderr, "++ RMATCH %d frame=%d", Freturn_id, Frdepth + 1);
737
if (group_frame_type != 0)
738
  {
739
  fprintf(stderr, " type=%x ", group_frame_type);
740
  switch (GF_IDMASK(group_frame_type))
741
    {
742
    case GF_CAPTURE:
743
    fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
744
    break;
745
746
    case GF_NOCAPTURE:
747
    fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
748
    break;
749
750
    case GF_CONDASSERT:
751
    fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
752
    break;
753
754
    case GF_RECURSE:
755
    fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
756
    break;
757
758
    default:
759
    fprintf(stderr, "*** unknown ***");
760
    break;
761
    }
762
  }
763
fprintf(stderr, "\n");
764
#endif
765
766
/* Copy those fields that must be copied into the new frame, increase the
767
"recursion" depth (i.e. the new frame's index) and then make the new frame
768
current. */
769
770
447M
memcpy((char *)N + offsetof(heapframe, eptr),
771
447M
       (char *)F + offsetof(heapframe, eptr),
772
447M
       frame_copy_size);
773
774
447M
N->rdepth = Frdepth + 1;
775
447M
F = N;
776
777
/* Carry on processing with a new frame. */
778
779
448M
NEW_FRAME:
780
448M
Fgroup_frame_type = group_frame_type;
781
448M
Fecode = start_ecode;      /* Starting code pointer */
782
448M
Fback_frame = frame_size;  /* Default is go back one frame */
783
784
/* If this is a special type of group frame, remember its offset for quick
785
access at the end of the group. If this is a recursion, set a new current
786
recursion value. */
787
788
448M
if (group_frame_type != 0)
789
1.01M
  {
790
1.01M
  Flast_group_offset = (char *)F - (char *)match_data->heapframes;
791
1.01M
  if (GF_IDMASK(group_frame_type) == GF_RECURSE)
792
507
    Fcurrent_recurse = GF_DATAMASK(group_frame_type);
793
1.01M
  group_frame_type = 0;
794
1.01M
  }
795
796
797
/* ========================================================================= */
798
/* This is the main processing loop. First check that we haven't recorded too
799
many backtracks (search tree is too large), or that we haven't exceeded the
800
recursive depth limit (used too many backtracking frames). If not, process the
801
opcodes. */
802
803
448M
if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
804
448M
if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
805
806
#ifdef DEBUG_SHOW_OPS
807
fprintf(stderr, "\n++ New frame: type=0x%x subject offset %ld\n",
808
  GF_IDMASK(Fgroup_frame_type), Feptr - mb->start_subject);
809
#endif
810
811
448M
for (;;)
812
585M
  {
813
#ifdef DEBUG_SHOW_OPS
814
fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
815
  OP_names[*Fecode]);
816
#endif
817
818
585M
  Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
819
585M
  switch(Fop)
820
585M
    {
821
    /* ===================================================================== */
822
    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
823
    any currently open capturing brackets. Unlike reaching the end of a group,
824
    where we know the starting frame is at the top of the chained frames, in
825
    this case we have to search back for the relevant frame in case other types
826
    of group that use chained frames have intervened. Multiple OP_CLOSEs always
827
    come innermost first, which matches the chain order. We can ignore this in
828
    a recursion, because captures are not passed out of recursions. */
829
830
0
    case OP_CLOSE:
831
0
    if (Fcurrent_recurse == RECURSE_UNSET)
832
0
      {
833
0
      number = GET2(Fecode, 1);
834
0
      offset = Flast_group_offset;
835
0
      for(;;)
836
0
        {
837
        /* Corrupted heapframes?. Trigger an assert and return an error */
838
0
        PCRE2_ASSERT(offset != PCRE2_UNSET);
839
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
840
841
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
842
0
        P = (heapframe *)((char *)N - frame_size);
843
0
        if (N->group_frame_type == (GF_CAPTURE | number)) break;
844
0
        offset = P->last_group_offset;
845
0
        }
846
0
      offset = (number << 1) - 2;
847
0
      Fcapture_last = number;
848
0
      Fovector[offset] = P->eptr - mb->start_subject;
849
0
      Fovector[offset+1] = Feptr - mb->start_subject;
850
0
      if (offset >= Foffset_top) Foffset_top = offset + 2;
851
0
      }
852
0
    Fecode += PRIV(OP_lengths)[*Fecode];
853
0
    break;
854
855
856
    /* ===================================================================== */
857
    /* Real or forced end of the pattern, assertion, or recursion. In an
858
    assertion ACCEPT, update the last used pointer and remember the current
859
    frame so that the captures and mark can be fished out of it. */
860
861
0
    case OP_ASSERT_ACCEPT:
862
0
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
863
0
    assert_accept_frame = F;
864
0
    RRETURN(MATCH_ACCEPT);
865
866
    /* For ACCEPT within a recursion, we have to find the most recent
867
    recursion. If not in a recursion, fall through to code that is common with
868
    OP_END. */
869
870
0
    case OP_ACCEPT:
871
0
    if (Fcurrent_recurse != RECURSE_UNSET)
872
0
      {
873
#ifdef DEBUG_SHOW_OPS
874
      fprintf(stderr, "++ Accept within recursion\n");
875
#endif
876
0
      offset = Flast_group_offset;
877
0
      for(;;)
878
0
        {
879
        /* Corrupted heapframes?. Trigger an assert and return an error */
880
0
        PCRE2_ASSERT(offset != PCRE2_UNSET);
881
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
882
883
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
884
0
        P = (heapframe *)((char *)N - frame_size);
885
0
        if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
886
0
        offset = P->last_group_offset;
887
0
        }
888
889
      /* N is now the frame of the recursion; the previous frame is at the
890
      OP_RECURSE position. Go back there, copying the current subject position
891
      and mark, and the start_match position (\K might have changed it), and
892
      then move on past the OP_RECURSE. */
893
894
0
      P->eptr = Feptr;
895
0
      P->mark = Fmark;
896
0
      P->start_match = Fstart_match;
897
0
      F = P;
898
0
      Fecode += 1 + LINK_SIZE;
899
0
      continue;
900
0
      }
901
    /* Fall through */
902
903
    /* OP_END itself can never be reached within a recursion because that is
904
    picked up when the OP_KET that always precedes OP_END is reached. */
905
906
181k
    case OP_END:
907
908
    /* Fail for an empty string match if either PCRE2_NOTEMPTY is set, or if
909
    PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the
910
    subject. In both cases, backtracking will then try other alternatives, if
911
    any. */
912
913
181k
    if (Feptr == Fstart_match &&
914
181k
         ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
915
179k
           ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
916
179k
             Fstart_match == mb->start_subject + mb->start_offset)))
917
109k
      {
918
#ifdef DEBUG_SHOW_OPS
919
      fprintf(stderr, "++ Backtrack because empty string\n");
920
#endif
921
109k
      RRETURN(MATCH_NOMATCH);
922
109k
      }
923
924
    /* Fail if PCRE2_ENDANCHORED is set and the end of the match is not
925
    the end of the subject. After (*ACCEPT) we fail the entire match (at this
926
    position) but backtrack if we've reached the end of the pattern. This
927
    applies whether or not we are in a recursion. */
928
929
72.3k
    if (Feptr < mb->end_subject &&
930
72.3k
        ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
931
0
      {
932
0
      if (Fop == OP_END)
933
0
        {
934
#ifdef DEBUG_SHOW_OPS
935
        fprintf(stderr, "++ Backtrack because not at end (endanchored set)\n");
936
#endif
937
0
        RRETURN(MATCH_NOMATCH);
938
0
        }
939
940
#ifdef DEBUG_SHOW_OPS
941
      fprintf(stderr, "++ Failed ACCEPT not at end (endanchnored set)\n");
942
#endif
943
0
      return MATCH_NOMATCH;   /* (*ACCEPT) */
944
0
      }
945
946
    /* We have a successful match of the whole pattern. Record the result and
947
    then do a direct return from the function. If there is space in the offset
948
    vector, set any pairs that follow the highest-numbered captured string but
949
    are less than the number of capturing groups in the pattern to PCRE2_UNSET.
950
    It is documented that this happens. "Gaps" are set to PCRE2_UNSET
951
    dynamically. It is only those at the end that need setting here. */
952
953
72.3k
    mb->end_match_ptr = Feptr;           /* Record where we ended */
954
72.3k
    mb->end_offset_top = Foffset_top;    /* and how many extracts were taken */
955
72.3k
    mb->mark = Fmark;                    /* and the last success mark */
956
72.3k
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
957
958
72.3k
    match_data->ovector[0] = Fstart_match - mb->start_subject;
959
72.3k
    match_data->ovector[1] = Feptr - mb->start_subject;
960
961
    /* Set i to the smaller of the sizes of the external and frame ovectors. */
962
963
72.3k
    i = 2 * ((top_bracket + 1 > match_data->oveccount)?
964
72.3k
      match_data->oveccount : top_bracket + 1);
965
72.3k
    memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
966
110k
    while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET;
967
72.3k
    return MATCH_MATCH;  /* Note: NOT RRETURN */
968
969
970
    /*===================================================================== */
971
    /* Match any single character type except newline; have to take care with
972
    CRLF newlines and partial matching. */
973
974
5.61M
    case OP_ANY:
975
5.61M
    if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
976
5.59M
    if (mb->partial != 0 &&
977
5.59M
        Feptr == mb->end_subject - 1 &&
978
5.59M
        NLBLOCK->nltype == NLTYPE_FIXED &&
979
5.59M
        NLBLOCK->nllen == 2 &&
980
5.59M
        UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
981
0
      {
982
0
      mb->hitend = TRUE;
983
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
984
0
      }
985
    /* Fall through */
986
987
    /* Match any single character whatsoever. */
988
989
8.69M
    case OP_ALLANY:
990
8.69M
    if (Feptr >= mb->end_subject)  /* DO NOT merge the Feptr++ here; it must */
991
105k
      {                            /* not be updated before SCHECK_PARTIAL. */
992
105k
      SCHECK_PARTIAL();
993
105k
      RRETURN(MATCH_NOMATCH);
994
105k
      }
995
8.59M
    Feptr++;
996
8.59M
#ifdef SUPPORT_UNICODE
997
8.59M
    if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
998
8.59M
#endif
999
8.59M
    Fecode++;
1000
8.59M
    break;
1001
1002
1003
    /* ===================================================================== */
1004
    /* Match a single code unit, even in UTF mode. This opcode really does
1005
    match any code unit, even newline. (It really should be called ANYCODEUNIT,
1006
    of course - the byte name is from pre-16 bit days.) */
1007
1008
316
    case OP_ANYBYTE:
1009
316
    if (Feptr >= mb->end_subject)   /* DO NOT merge the Feptr++ here; it must */
1010
5
      {                             /* not be updated before SCHECK_PARTIAL. */
1011
5
      SCHECK_PARTIAL();
1012
5
      RRETURN(MATCH_NOMATCH);
1013
5
      }
1014
311
    Feptr++;
1015
311
    Fecode++;
1016
311
    break;
1017
1018
1019
    /* ===================================================================== */
1020
    /* Match a single character, casefully */
1021
1022
168M
    case OP_CHAR:
1023
168M
#ifdef SUPPORT_UNICODE
1024
168M
    if (utf)
1025
80.7M
      {
1026
80.7M
      Flength = 1;
1027
80.7M
      Fecode++;
1028
80.7M
      GETCHARLEN(fc, Fecode, Flength);
1029
80.7M
      if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
1030
946k
        {
1031
946k
        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
1032
946k
        RRETURN(MATCH_NOMATCH);
1033
946k
        }
1034
79.8M
      for (; Flength > 0; Flength--)
1035
79.8M
        {
1036
79.8M
        if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
1037
79.8M
        }
1038
79.8M
      }
1039
87.4M
    else
1040
87.4M
#endif
1041
1042
    /* Not UTF mode */
1043
87.4M
      {
1044
87.4M
      if (mb->end_subject - Feptr < 1)
1045
250k
        {
1046
250k
        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
1047
250k
        RRETURN(MATCH_NOMATCH);
1048
250k
        }
1049
87.2M
      if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
1050
2.28M
      Fecode += 2;
1051
2.28M
      }
1052
2.32M
    break;
1053
1054
1055
    /* ===================================================================== */
1056
    /* Match a single character, caselessly. If we are at the end of the
1057
    subject, give up immediately. We get here only when the pattern character
1058
    has at most one other case. Characters with more than two cases are coded
1059
    as OP_PROP with the pseudo-property PT_CLIST. */
1060
1061
20.4M
    case OP_CHARI:
1062
20.4M
    if (Feptr >= mb->end_subject)
1063
179k
      {
1064
179k
      SCHECK_PARTIAL();
1065
179k
      RRETURN(MATCH_NOMATCH);
1066
179k
      }
1067
1068
20.3M
#ifdef SUPPORT_UNICODE
1069
20.3M
    if (utf)
1070
3.79M
      {
1071
3.79M
      Flength = 1;
1072
3.79M
      Fecode++;
1073
3.79M
      GETCHARLEN(fc, Fecode, Flength);
1074
1075
      /* If the pattern character's value is < 128, we know that its other case
1076
      (if any) is also < 128 (and therefore only one code unit long in all
1077
      code-unit widths), so we can use the fast lookup table. We checked above
1078
      that there is at least one character left in the subject. */
1079
1080
3.79M
      if (fc < 128)
1081
3.76M
        {
1082
3.76M
        uint32_t cc = UCHAR21(Feptr);
1083
3.76M
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1084
166k
        Fecode++;
1085
166k
        Feptr++;
1086
166k
        }
1087
1088
      /* Otherwise we must pick up the subject character and use Unicode
1089
      property support to test its other case. Note that we cannot use the
1090
      value of "Flength" to check for sufficient bytes left, because the other
1091
      case of the character may have more or fewer code units. */
1092
1093
27.5k
      else
1094
27.5k
        {
1095
27.5k
        uint32_t dc;
1096
27.5k
        GETCHARINC(dc, Feptr);
1097
27.5k
        Fecode += Flength;
1098
27.5k
        if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1099
27.5k
        }
1100
3.79M
      }
1101
1102
    /* If UCP is set without UTF we must do the same as above, but with one
1103
    character per code unit. */
1104
1105
16.5M
    else if (ucp)
1106
0
      {
1107
0
      uint32_t cc = UCHAR21(Feptr);
1108
0
      fc = Fecode[1];
1109
0
      if (fc < 128)
1110
0
        {
1111
0
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1112
0
        }
1113
0
      else
1114
0
        {
1115
0
        if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1116
0
        }
1117
0
      Feptr++;
1118
0
      Fecode += 2;
1119
0
      }
1120
1121
16.5M
    else
1122
16.5M
#endif   /* SUPPORT_UNICODE */
1123
1124
    /* Not UTF or UCP mode; use the table for characters < 256. */
1125
16.5M
      {
1126
16.5M
      if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
1127
16.5M
          != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
1128
223k
      Feptr++;
1129
223k
      Fecode += 2;
1130
223k
      }
1131
390k
    break;
1132
1133
1134
    /* ===================================================================== */
1135
    /* Match not a single character. */
1136
1137
390k
    case OP_NOT:
1138
282k
    case OP_NOTI:
1139
282k
    if (Feptr >= mb->end_subject)
1140
1.26k
      {
1141
1.26k
      SCHECK_PARTIAL();
1142
1.26k
      RRETURN(MATCH_NOMATCH);
1143
1.26k
      }
1144
1145
281k
#ifdef SUPPORT_UNICODE
1146
281k
    if (utf)
1147
2.32k
      {
1148
2.32k
      uint32_t ch;
1149
2.32k
      Fecode++;
1150
2.32k
      GETCHARINC(ch, Fecode);
1151
2.32k
      GETCHARINC(fc, Feptr);
1152
2.32k
      if (ch == fc)
1153
42
        {
1154
42
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1155
42
        }
1156
2.28k
      else if (Fop == OP_NOTI)   /* If caseless */
1157
2.17k
        {
1158
2.17k
        if (ch > 127)
1159
0
          ch = UCD_OTHERCASE(ch);
1160
2.17k
        else
1161
2.17k
          ch = (mb->fcc)[ch];
1162
2.17k
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1163
2.17k
        }
1164
2.32k
      }
1165
1166
    /* UCP without UTF is as above, but with one character per code unit. */
1167
1168
278k
    else if (ucp)
1169
0
      {
1170
0
      uint32_t ch;
1171
0
      fc = UCHAR21INC(Feptr);
1172
0
      ch = Fecode[1];
1173
0
      Fecode += 2;
1174
1175
0
      if (ch == fc)
1176
0
        {
1177
0
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1178
0
        }
1179
0
      else if (Fop == OP_NOTI)   /* If caseless */
1180
0
        {
1181
0
        if (ch > 127)
1182
0
          ch = UCD_OTHERCASE(ch);
1183
0
        else
1184
0
          ch = (mb->fcc)[ch];
1185
0
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1186
0
        }
1187
0
      }
1188
1189
278k
    else
1190
278k
#endif  /* SUPPORT_UNICODE */
1191
1192
    /* Neither UTF nor UCP is set */
1193
1194
278k
      {
1195
278k
      uint32_t ch = Fecode[1];
1196
278k
      fc = UCHAR21INC(Feptr);
1197
278k
      if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
1198
2.34k
        RRETURN(MATCH_NOMATCH);
1199
276k
      Fecode += 2;
1200
276k
      }
1201
278k
    break;
1202
1203
1204
    /* ===================================================================== */
1205
    /* Match a single character repeatedly. */
1206
1207
8.93M
#define Loclength    F->temp_size
1208
35.4M
#define Lstart_eptr  F->temp_sptr[0]
1209
17.3M
#define Lcharptr     F->temp_sptr[1]
1210
133M
#define Lmin         F->temp_32[0]
1211
100M
#define Lmax         F->temp_32[1]
1212
67.0M
#define Lc           F->temp_32[2]
1213
12.6M
#define Loc          F->temp_32[3]
1214
1215
278k
    case OP_EXACT:
1216
0
    case OP_EXACTI:
1217
0
    Lmin = Lmax = GET2(Fecode, 1);
1218
0
    Fecode += 1 + IMM2_SIZE;
1219
0
    goto REPEATCHAR;
1220
1221
0
    case OP_POSUPTO:
1222
0
    case OP_POSUPTOI:
1223
0
    reptype = REPTYPE_POS;
1224
0
    Lmin = 0;
1225
0
    Lmax = GET2(Fecode, 1);
1226
0
    Fecode += 1 + IMM2_SIZE;
1227
0
    goto REPEATCHAR;
1228
1229
0
    case OP_UPTO:
1230
0
    case OP_UPTOI:
1231
0
    reptype = REPTYPE_MAX;
1232
0
    Lmin = 0;
1233
0
    Lmax = GET2(Fecode, 1);
1234
0
    Fecode += 1 + IMM2_SIZE;
1235
0
    goto REPEATCHAR;
1236
1237
0
    case OP_MINUPTO:
1238
0
    case OP_MINUPTOI:
1239
0
    reptype = REPTYPE_MIN;
1240
0
    Lmin = 0;
1241
0
    Lmax = GET2(Fecode, 1);
1242
0
    Fecode += 1 + IMM2_SIZE;
1243
0
    goto REPEATCHAR;
1244
1245
12.1k
    case OP_POSSTAR:
1246
67.4k
    case OP_POSSTARI:
1247
67.4k
    reptype = REPTYPE_POS;
1248
67.4k
    Lmin = 0;
1249
67.4k
    Lmax = UINT32_MAX;
1250
67.4k
    Fecode++;
1251
67.4k
    goto REPEATCHAR;
1252
1253
88.8k
    case OP_POSPLUS:
1254
139k
    case OP_POSPLUSI:
1255
139k
    reptype = REPTYPE_POS;
1256
139k
    Lmin = 1;
1257
139k
    Lmax = UINT32_MAX;
1258
139k
    Fecode++;
1259
139k
    goto REPEATCHAR;
1260
1261
17.9M
    case OP_POSQUERY:
1262
19.8M
    case OP_POSQUERYI:
1263
19.8M
    reptype = REPTYPE_POS;
1264
19.8M
    Lmin = 0;
1265
19.8M
    Lmax = 1;
1266
19.8M
    Fecode++;
1267
19.8M
    goto REPEATCHAR;
1268
1269
1.32k
    case OP_STAR:
1270
4.07k
    case OP_STARI:
1271
5.50k
    case OP_MINSTAR:
1272
21.1k
    case OP_MINSTARI:
1273
26.4k
    case OP_PLUS:
1274
28.2k
    case OP_PLUSI:
1275
30.5k
    case OP_MINPLUS:
1276
31.5k
    case OP_MINPLUSI:
1277
5.96M
    case OP_QUERY:
1278
7.77M
    case OP_QUERYI:
1279
10.8M
    case OP_MINQUERY:
1280
13.3M
    case OP_MINQUERYI:
1281
13.3M
    fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
1282
13.3M
    Lmin = rep_min[fc];
1283
13.3M
    Lmax = rep_max[fc];
1284
13.3M
    reptype = rep_typ[fc];
1285
1286
    /* Common code for all repeated single-character matches. We first check
1287
    for the minimum number of characters. If the minimum equals the maximum, we
1288
    are done. Otherwise, if minimizing, check the rest of the pattern for a
1289
    match; if there isn't one, advance up to the maximum, one character at a
1290
    time.
1291
1292
    If maximizing, advance up to the maximum number of matching characters,
1293
    until Feptr is past the end of the maximum run. If possessive, we are
1294
    then done (no backing up). Otherwise, match at this position; anything
1295
    other than no match is immediately returned. For nomatch, back up one
1296
    character, unless we are matching \R and the last thing matched was
1297
    \r\n, in which case, back up two code units until we reach the first
1298
    optional character position.
1299
1300
    The various UTF/non-UTF and caseful/caseless cases are handled separately,
1301
    for speed. */
1302
1303
33.3M
    REPEATCHAR:
1304
33.3M
#ifdef SUPPORT_UNICODE
1305
33.3M
    if (utf)
1306
14.4M
      {
1307
14.4M
      Flength = 1;
1308
14.4M
      Lcharptr = Fecode;
1309
14.4M
      GETCHARLEN(fc, Fecode, Flength);
1310
14.4M
      Fecode += Flength;
1311
1312
      /* Handle multi-code-unit character matching, caseful and caseless. */
1313
1314
14.4M
      if (Flength > 1)
1315
2.97M
        {
1316
2.97M
        uint32_t othercase;
1317
1318
2.97M
        if (Fop >= OP_STARI &&     /* Caseless */
1319
2.97M
            (othercase = UCD_OTHERCASE(fc)) != fc)
1320
0
          Loclength = PRIV(ord2utf)(othercase, Foccu);
1321
2.97M
        else Loclength = 0;
1322
1323
2.97M
        for (i = 1; i <= Lmin; i++)
1324
4.37k
          {
1325
4.37k
          if (Feptr <= mb->end_subject - Flength &&
1326
4.37k
            memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1327
4.37k
          else if (Loclength > 0 &&
1328
4.37k
                   Feptr <= mb->end_subject - Loclength &&
1329
4.37k
                   memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1330
0
            Feptr += Loclength;
1331
4.37k
          else
1332
4.37k
            {
1333
4.37k
            CHECK_PARTIAL();
1334
4.37k
            RRETURN(MATCH_NOMATCH);
1335
4.37k
            }
1336
4.37k
          }
1337
1338
2.97M
        if (Lmin == Lmax) continue;
1339
1340
2.97M
        if (reptype == REPTYPE_MIN)
1341
0
          {
1342
0
          for (;;)
1343
0
            {
1344
0
            RMATCH(Fecode, RM202);
1345
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1346
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1347
0
            if (Feptr <= mb->end_subject - Flength &&
1348
0
              memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1349
0
            else if (Loclength > 0 &&
1350
0
                     Feptr <= mb->end_subject - Loclength &&
1351
0
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1352
0
              Feptr += Loclength;
1353
0
            else
1354
0
              {
1355
0
              CHECK_PARTIAL();
1356
0
              RRETURN(MATCH_NOMATCH);
1357
0
              }
1358
0
            }
1359
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
1360
0
          }
1361
1362
2.97M
        else  /* Maximize */
1363
2.97M
          {
1364
2.97M
          Lstart_eptr = Feptr;
1365
2.97M
          for (i = Lmin; i < Lmax; i++)
1366
2.97M
            {
1367
2.97M
            if (Feptr <= mb->end_subject - Flength &&
1368
2.97M
                memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
1369
0
              Feptr += Flength;
1370
2.97M
            else if (Loclength > 0 &&
1371
2.97M
                     Feptr <= mb->end_subject - Loclength &&
1372
2.97M
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1373
0
              Feptr += Loclength;
1374
2.97M
            else
1375
2.97M
              {
1376
2.97M
              CHECK_PARTIAL();
1377
2.97M
              break;
1378
2.97M
              }
1379
2.97M
            }
1380
1381
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1382
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1383
          go too far. */
1384
1385
2.97M
          if (reptype != REPTYPE_POS) for(;;)
1386
0
            {
1387
0
            if (Feptr <= Lstart_eptr) break;
1388
0
            RMATCH(Fecode, RM203);
1389
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1390
0
            Feptr--;
1391
0
            BACKCHAR(Feptr);
1392
0
            }
1393
2.97M
          }
1394
2.97M
        break;   /* End of repeated wide character handling */
1395
2.97M
        }
1396
1397
      /* Length of UTF character is 1. Put it into the preserved variable and
1398
      fall through to the non-UTF code. */
1399
1400
11.4M
      Lc = fc;
1401
11.4M
      }
1402
18.9M
    else
1403
18.9M
#endif  /* SUPPORT_UNICODE */
1404
1405
    /* When not in UTF mode, load a single-code-unit character. Then proceed as
1406
    above, using Unicode casing if either UTF or UCP is set. */
1407
1408
18.9M
    Lc = *Fecode++;
1409
1410
    /* Caseless comparison */
1411
1412
30.4M
    if (Fop >= OP_STARI)
1413
6.32M
      {
1414
6.32M
#if PCRE2_CODE_UNIT_WIDTH == 8
1415
6.32M
#ifdef SUPPORT_UNICODE
1416
6.32M
      if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1417
6.32M
      else
1418
6.32M
#endif  /* SUPPORT_UNICODE */
1419
      /* Lc will be < 128 in UTF-8 mode. */
1420
6.32M
      Loc = mb->fcc[Lc];
1421
#else /* 16-bit & 32-bit */
1422
#ifdef SUPPORT_UNICODE
1423
      if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1424
      else
1425
#endif  /* SUPPORT_UNICODE */
1426
      Loc = TABLE_GET(Lc, mb->fcc, Lc);
1427
#endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
1428
1429
6.32M
      for (i = 1; i <= Lmin; i++)
1430
49.4k
        {
1431
49.4k
        uint32_t cc;                 /* Faster than PCRE2_UCHAR */
1432
49.4k
        if (Feptr >= mb->end_subject)
1433
871
          {
1434
871
          SCHECK_PARTIAL();
1435
871
          RRETURN(MATCH_NOMATCH);
1436
871
          }
1437
48.5k
        cc = UCHAR21TEST(Feptr);
1438
48.5k
        if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1439
1.01k
        Feptr++;
1440
1.01k
        }
1441
6.27M
      if (Lmin == Lmax) continue;
1442
1443
6.27M
      if (reptype == REPTYPE_MIN)
1444
2.45M
        {
1445
2.45M
        for (;;)
1446
2.47M
          {
1447
2.47M
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1448
2.47M
          RMATCH(Fecode, RM25);
1449
2.47M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1450
2.47M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1451
2.45M
          if (Feptr >= mb->end_subject)
1452
1.18k
            {
1453
1.18k
            SCHECK_PARTIAL();
1454
1.18k
            RRETURN(MATCH_NOMATCH);
1455
1.18k
            }
1456
2.45M
          cc = UCHAR21TEST(Feptr);
1457
2.45M
          if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1458
25.2k
          Feptr++;
1459
25.2k
          }
1460
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1461
0
        }
1462
1463
3.82M
      else  /* Maximize */
1464
3.82M
        {
1465
3.82M
        Lstart_eptr = Feptr;
1466
3.84M
        for (i = Lmin; i < Lmax; i++)
1467
3.83M
          {
1468
3.83M
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1469
3.83M
          if (Feptr >= mb->end_subject)
1470
5.24k
            {
1471
5.24k
            SCHECK_PARTIAL();
1472
5.24k
            break;
1473
5.24k
            }
1474
3.82M
          cc = UCHAR21TEST(Feptr);
1475
3.82M
          if (Lc != cc && Loc != cc) break;
1476
24.0k
          Feptr++;
1477
24.0k
          }
1478
3.82M
        if (reptype != REPTYPE_POS) for (;;)
1479
1.81M
          {
1480
1.81M
          if (Feptr == Lstart_eptr) break;
1481
1.08k
          RMATCH(Fecode, RM26);
1482
1.08k
          Feptr--;
1483
1.08k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1484
1.08k
          }
1485
3.82M
        }
1486
6.27M
      }
1487
1488
    /* Caseful comparisons (includes all multi-byte characters) */
1489
1490
24.0M
    else
1491
24.0M
      {
1492
24.0M
      for (i = 1; i <= Lmin; i++)
1493
96.4k
        {
1494
96.4k
        if (Feptr >= mb->end_subject)
1495
356
          {
1496
356
          SCHECK_PARTIAL();
1497
356
          RRETURN(MATCH_NOMATCH);
1498
356
          }
1499
96.1k
        if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1500
96.1k
        }
1501
1502
23.9M
      if (Lmin == Lmax) continue;
1503
1504
23.9M
      if (reptype == REPTYPE_MIN)
1505
3.09M
        {
1506
3.09M
        for (;;)
1507
3.10M
          {
1508
3.10M
          RMATCH(Fecode, RM27);
1509
3.10M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1510
3.10M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1511
3.09M
          if (Feptr >= mb->end_subject)
1512
10.9k
            {
1513
10.9k
            SCHECK_PARTIAL();
1514
10.9k
            RRETURN(MATCH_NOMATCH);
1515
10.9k
            }
1516
3.08M
          if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1517
3.08M
          }
1518
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1519
0
        }
1520
20.8M
      else  /* Maximize */
1521
20.8M
        {
1522
20.8M
        Lstart_eptr = Feptr;
1523
21.0M
        for (i = Lmin; i < Lmax; i++)
1524
20.9M
          {
1525
20.9M
          if (Feptr >= mb->end_subject)
1526
98.9k
            {
1527
98.9k
            SCHECK_PARTIAL();
1528
98.9k
            break;
1529
98.9k
            }
1530
1531
20.8M
          if (Lc != UCHAR21TEST(Feptr)) break;
1532
115k
          Feptr++;
1533
115k
          }
1534
1535
20.8M
        if (reptype != REPTYPE_POS) for (;;)
1536
5.98M
          {
1537
5.98M
          if (Feptr <= Lstart_eptr) break;
1538
53.2k
          RMATCH(Fecode, RM28);
1539
53.2k
          Feptr--;
1540
53.2k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1541
53.2k
          }
1542
20.8M
        }
1543
23.9M
      }
1544
24.7M
    break;
1545
1546
24.7M
#undef Loclength
1547
24.7M
#undef Lstart_eptr
1548
24.7M
#undef Lcharptr
1549
24.7M
#undef Lmin
1550
24.7M
#undef Lmax
1551
24.7M
#undef Lc
1552
24.7M
#undef Loc
1553
1554
1555
    /* ===================================================================== */
1556
    /* Match a negated single one-byte character repeatedly. This is almost a
1557
    repeat of the code for a repeated single character, but I haven't found a
1558
    nice way of commoning these up that doesn't require a test of the
1559
    positive/negative option for each character match. Maybe that wouldn't add
1560
    very much to the time taken, but character matching *is* what this is all
1561
    about... */
1562
1563
24.7M
#define Lstart_eptr  F->temp_sptr[0]
1564
24.7M
#define Lmin         F->temp_32[0]
1565
24.7M
#define Lmax         F->temp_32[1]
1566
24.7M
#define Lc           F->temp_32[2]
1567
24.7M
#define Loc          F->temp_32[3]
1568
1569
24.7M
    case OP_NOTEXACT:
1570
0
    case OP_NOTEXACTI:
1571
0
    Lmin = Lmax = GET2(Fecode, 1);
1572
0
    Fecode += 1 + IMM2_SIZE;
1573
0
    goto REPEATNOTCHAR;
1574
1575
0
    case OP_NOTUPTO:
1576
0
    case OP_NOTUPTOI:
1577
0
    Lmin = 0;
1578
0
    Lmax = GET2(Fecode, 1);
1579
0
    reptype = REPTYPE_MAX;
1580
0
    Fecode += 1 + IMM2_SIZE;
1581
0
    goto REPEATNOTCHAR;
1582
1583
0
    case OP_NOTMINUPTO:
1584
0
    case OP_NOTMINUPTOI:
1585
0
    Lmin = 0;
1586
0
    Lmax = GET2(Fecode, 1);
1587
0
    reptype = REPTYPE_MIN;
1588
0
    Fecode += 1 + IMM2_SIZE;
1589
0
    goto REPEATNOTCHAR;
1590
1591
0
    case OP_NOTPOSSTAR:
1592
0
    case OP_NOTPOSSTARI:
1593
0
    reptype = REPTYPE_POS;
1594
0
    Lmin = 0;
1595
0
    Lmax = UINT32_MAX;
1596
0
    Fecode++;
1597
0
    goto REPEATNOTCHAR;
1598
1599
345
    case OP_NOTPOSPLUS:
1600
1.23k
    case OP_NOTPOSPLUSI:
1601
1.23k
    reptype = REPTYPE_POS;
1602
1.23k
    Lmin = 1;
1603
1.23k
    Lmax = UINT32_MAX;
1604
1.23k
    Fecode++;
1605
1.23k
    goto REPEATNOTCHAR;
1606
1607
0
    case OP_NOTPOSQUERY:
1608
3
    case OP_NOTPOSQUERYI:
1609
3
    reptype = REPTYPE_POS;
1610
3
    Lmin = 0;
1611
3
    Lmax = 1;
1612
3
    Fecode++;
1613
3
    goto REPEATNOTCHAR;
1614
1615
0
    case OP_NOTPOSUPTO:
1616
0
    case OP_NOTPOSUPTOI:
1617
0
    reptype = REPTYPE_POS;
1618
0
    Lmin = 0;
1619
0
    Lmax = GET2(Fecode, 1);
1620
0
    Fecode += 1 + IMM2_SIZE;
1621
0
    goto REPEATNOTCHAR;
1622
1623
0
    case OP_NOTSTAR:
1624
0
    case OP_NOTSTARI:
1625
0
    case OP_NOTMINSTAR:
1626
0
    case OP_NOTMINSTARI:
1627
5.80k
    case OP_NOTPLUS:
1628
136k
    case OP_NOTPLUSI:
1629
144k
    case OP_NOTMINPLUS:
1630
149k
    case OP_NOTMINPLUSI:
1631
150k
    case OP_NOTQUERY:
1632
158k
    case OP_NOTQUERYI:
1633
159k
    case OP_NOTMINQUERY:
1634
275k
    case OP_NOTMINQUERYI:
1635
275k
    fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1636
275k
    Lmin = rep_min[fc];
1637
275k
    Lmax = rep_max[fc];
1638
275k
    reptype = rep_typ[fc];
1639
1640
    /* Common code for all repeated single-character non-matches. */
1641
1642
276k
    REPEATNOTCHAR:
1643
276k
    GETCHARINCTEST(Lc, Fecode);
1644
1645
    /* The code is duplicated for the caseless and caseful cases, for speed,
1646
    since matching characters is likely to be quite common. First, ensure the
1647
    minimum number of matches are present. If Lmin = Lmax, we are done.
1648
    Otherwise, if minimizing, keep trying the rest of the expression and
1649
    advancing one matching character if failing, up to the maximum.
1650
    Alternatively, if maximizing, find the maximum number of characters and
1651
    work backwards. */
1652
1653
276k
    if (Fop >= OP_NOTSTARI)     /* Caseless */
1654
259k
      {
1655
259k
#ifdef SUPPORT_UNICODE
1656
259k
      if ((utf || ucp) && Lc > 127)
1657
0
        Loc = UCD_OTHERCASE(Lc);
1658
259k
      else
1659
259k
#endif /* SUPPORT_UNICODE */
1660
1661
259k
      Loc = TABLE_GET(Lc, mb->fcc, Lc);  /* Other case from table */
1662
1663
259k
#ifdef SUPPORT_UNICODE
1664
259k
      if (utf)
1665
117k
        {
1666
117k
        uint32_t d;
1667
118k
        for (i = 1; i <= Lmin; i++)
1668
654
          {
1669
654
          if (Feptr >= mb->end_subject)
1670
54
            {
1671
54
            SCHECK_PARTIAL();
1672
54
            RRETURN(MATCH_NOMATCH);
1673
54
            }
1674
600
          GETCHARINC(d, Feptr);
1675
600
          if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1676
600
          }
1677
117k
        }
1678
141k
      else
1679
141k
#endif  /* SUPPORT_UNICODE */
1680
1681
      /* Not UTF mode */
1682
141k
        {
1683
274k
        for (i = 1; i <= Lmin; i++)
1684
135k
          {
1685
135k
          if (Feptr >= mb->end_subject)
1686
943
            {
1687
943
            SCHECK_PARTIAL();
1688
943
            RRETURN(MATCH_NOMATCH);
1689
943
            }
1690
134k
          if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1691
133k
          Feptr++;
1692
133k
          }
1693
141k
        }
1694
1695
257k
      if (Lmin == Lmax) continue;  /* Finished for exact count */
1696
1697
257k
      if (reptype == REPTYPE_MIN)
1698
120k
        {
1699
120k
#ifdef SUPPORT_UNICODE
1700
120k
        if (utf)
1701
114k
          {
1702
114k
          uint32_t d;
1703
114k
          for (;;)
1704
222k
            {
1705
222k
            RMATCH(Fecode, RM204);
1706
222k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1707
222k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1708
114k
            if (Feptr >= mb->end_subject)
1709
1.45k
              {
1710
1.45k
              SCHECK_PARTIAL();
1711
1.45k
              RRETURN(MATCH_NOMATCH);
1712
1.45k
              }
1713
113k
            GETCHARINC(d, Feptr);
1714
113k
            if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1715
113k
            }
1716
114k
          }
1717
5.85k
        else
1718
5.85k
#endif  /*SUPPORT_UNICODE */
1719
1720
        /* Not UTF mode */
1721
5.85k
          {
1722
5.85k
          for (;;)
1723
205k
            {
1724
205k
            RMATCH(Fecode, RM29);
1725
205k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1726
205k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1727
204k
            if (Feptr >= mb->end_subject)
1728
882
              {
1729
882
              SCHECK_PARTIAL();
1730
882
              RRETURN(MATCH_NOMATCH);
1731
882
              }
1732
203k
            if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1733
199k
            Feptr++;
1734
199k
            }
1735
5.85k
          }
1736
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1737
0
        }
1738
1739
      /* Maximize case */
1740
1741
136k
      else
1742
136k
        {
1743
136k
        Lstart_eptr = Feptr;
1744
1745
136k
#ifdef SUPPORT_UNICODE
1746
136k
        if (utf)
1747
3.16k
          {
1748
3.16k
          uint32_t d;
1749
7.81k
          for (i = Lmin; i < Lmax; i++)
1750
5.35k
            {
1751
5.35k
            int len = 1;
1752
5.35k
            if (Feptr >= mb->end_subject)
1753
400
              {
1754
400
              SCHECK_PARTIAL();
1755
400
              break;
1756
400
              }
1757
4.95k
            GETCHARLEN(d, Feptr, len);
1758
4.95k
            if (Lc == d || Loc == d) break;
1759
4.64k
            Feptr += len;
1760
4.64k
            }
1761
1762
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1763
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1764
          go too far. */
1765
1766
3.16k
          if (reptype != REPTYPE_POS) for(;;)
1767
7.81k
            {
1768
7.81k
            if (Feptr <= Lstart_eptr) break;
1769
4.64k
            RMATCH(Fecode, RM205);
1770
4.64k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1771
4.64k
            Feptr--;
1772
4.64k
            BACKCHAR(Feptr);
1773
4.64k
            }
1774
3.16k
          }
1775
133k
        else
1776
133k
#endif  /* SUPPORT_UNICODE */
1777
1778
        /* Not UTF mode */
1779
133k
          {
1780
6.14M
          for (i = Lmin; i < Lmax; i++)
1781
6.13M
            {
1782
6.13M
            if (Feptr >= mb->end_subject)
1783
40.7k
              {
1784
40.7k
              SCHECK_PARTIAL();
1785
40.7k
              break;
1786
40.7k
              }
1787
6.09M
            if (Lc == *Feptr || Loc == *Feptr) break;
1788
6.00M
            Feptr++;
1789
6.00M
            }
1790
133k
          if (reptype != REPTYPE_POS) for (;;)
1791
6.13M
            {
1792
6.13M
            if (Feptr == Lstart_eptr) break;
1793
5.99M
            RMATCH(Fecode, RM30);
1794
5.99M
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1795
5.99M
            Feptr--;
1796
5.99M
            }
1797
133k
          }
1798
136k
        }
1799
257k
      }
1800
1801
    /* Caseful comparisons */
1802
1803
16.6k
    else
1804
16.6k
      {
1805
16.6k
#ifdef SUPPORT_UNICODE
1806
16.6k
      if (utf)
1807
1.26k
        {
1808
1.26k
        uint32_t d;
1809
1.26k
        for (i = 1; i <= Lmin; i++)
1810
0
          {
1811
0
          if (Feptr >= mb->end_subject)
1812
0
            {
1813
0
            SCHECK_PARTIAL();
1814
0
            RRETURN(MATCH_NOMATCH);
1815
0
            }
1816
0
          GETCHARINC(d, Feptr);
1817
0
          if (Lc == d) RRETURN(MATCH_NOMATCH);
1818
0
          }
1819
1.26k
        }
1820
15.4k
      else
1821
15.4k
#endif
1822
      /* Not UTF mode */
1823
15.4k
        {
1824
25.8k
        for (i = 1; i <= Lmin; i++)
1825
14.9k
          {
1826
14.9k
          if (Feptr >= mb->end_subject)
1827
4.42k
            {
1828
4.42k
            SCHECK_PARTIAL();
1829
4.42k
            RRETURN(MATCH_NOMATCH);
1830
4.42k
            }
1831
10.5k
          if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1832
10.5k
          }
1833
15.4k
        }
1834
1835
12.1k
      if (Lmin == Lmax) continue;
1836
1837
12.1k
      if (reptype == REPTYPE_MIN)
1838
5.74k
        {
1839
5.74k
#ifdef SUPPORT_UNICODE
1840
5.74k
        if (utf)
1841
1.26k
          {
1842
1.26k
          uint32_t d;
1843
1.26k
          for (;;)
1844
2.48k
            {
1845
2.48k
            RMATCH(Fecode, RM206);
1846
2.48k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1847
2.48k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1848
1.26k
            if (Feptr >= mb->end_subject)
1849
0
              {
1850
0
              SCHECK_PARTIAL();
1851
0
              RRETURN(MATCH_NOMATCH);
1852
0
              }
1853
1.26k
            GETCHARINC(d, Feptr);
1854
1.26k
            if (Lc == d) RRETURN(MATCH_NOMATCH);
1855
1.26k
            }
1856
1.26k
          }
1857
4.48k
        else
1858
4.48k
#endif
1859
        /* Not UTF mode */
1860
4.48k
          {
1861
4.48k
          for (;;)
1862
4.65M
            {
1863
4.65M
            RMATCH(Fecode, RM31);
1864
4.65M
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1865
4.65M
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1866
4.65M
            if (Feptr >= mb->end_subject)
1867
1.37k
              {
1868
1.37k
              SCHECK_PARTIAL();
1869
1.37k
              RRETURN(MATCH_NOMATCH);
1870
1.37k
              }
1871
4.65M
            if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1872
4.65M
            }
1873
4.48k
          }
1874
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1875
0
        }
1876
1877
      /* Maximize case */
1878
1879
6.42k
      else
1880
6.42k
        {
1881
6.42k
        Lstart_eptr = Feptr;
1882
1883
6.42k
#ifdef SUPPORT_UNICODE
1884
6.42k
        if (utf)
1885
0
          {
1886
0
          uint32_t d;
1887
0
          for (i = Lmin; i < Lmax; i++)
1888
0
            {
1889
0
            int len = 1;
1890
0
            if (Feptr >= mb->end_subject)
1891
0
              {
1892
0
              SCHECK_PARTIAL();
1893
0
              break;
1894
0
              }
1895
0
            GETCHARLEN(d, Feptr, len);
1896
0
            if (Lc == d) break;
1897
0
            Feptr += len;
1898
0
            }
1899
1900
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1901
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1902
          go too far. */
1903
1904
0
          if (reptype != REPTYPE_POS) for(;;)
1905
0
            {
1906
0
            if (Feptr <= Lstart_eptr) break;
1907
0
            RMATCH(Fecode, RM207);
1908
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1909
0
            Feptr--;
1910
0
            BACKCHAR(Feptr);
1911
0
            }
1912
0
          }
1913
6.42k
        else
1914
6.42k
#endif
1915
        /* Not UTF mode */
1916
6.42k
          {
1917
9.93M
          for (i = Lmin; i < Lmax; i++)
1918
9.93M
            {
1919
9.93M
            if (Feptr >= mb->end_subject)
1920
4.62k
              {
1921
4.62k
              SCHECK_PARTIAL();
1922
4.62k
              break;
1923
4.62k
              }
1924
9.93M
            if (Lc == *Feptr) break;
1925
9.93M
            Feptr++;
1926
9.93M
            }
1927
6.42k
          if (reptype != REPTYPE_POS) for (;;)
1928
9.93M
            {
1929
9.93M
            if (Feptr == Lstart_eptr) break;
1930
9.92M
            RMATCH(Fecode, RM32);
1931
9.92M
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1932
9.92M
            Feptr--;
1933
9.92M
            }
1934
6.42k
          }
1935
6.42k
        }
1936
12.1k
      }
1937
143k
    break;
1938
1939
143k
#undef Lstart_eptr
1940
143k
#undef Lmin
1941
143k
#undef Lmax
1942
143k
#undef Lc
1943
143k
#undef Loc
1944
1945
1946
    /* ===================================================================== */
1947
    /* Match a bit-mapped character class, possibly repeatedly. These opcodes
1948
    are used when all the characters in the class have values in the range
1949
    0-255, and either the matching is caseful, or the characters are in the
1950
    range 0-127 when UTF processing is enabled. The only difference between
1951
    OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1952
    encountered. */
1953
1954
15.5M
#define Lmin               F->temp_32[0]
1955
14.5M
#define Lmax               F->temp_32[1]
1956
11.6M
#define Lstart_eptr        F->temp_sptr[0]
1957
10.5M
#define Lbyte_map_address  F->temp_sptr[1]
1958
6.74M
#define Lbyte_map          ((const unsigned char *)Lbyte_map_address)
1959
1960
356k
    case OP_NCLASS:
1961
3.78M
    case OP_CLASS:
1962
3.78M
      {
1963
3.78M
      Lbyte_map_address = Fecode + 1;           /* Save for matching */
1964
3.78M
      Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
1965
1966
      /* Look past the end of the item to see if there is repeat information
1967
      following. Then obey similar code to character type repeats. */
1968
1969
3.78M
      switch (*Fecode)
1970
3.78M
        {
1971
2.33M
        case OP_CRSTAR:
1972
2.36M
        case OP_CRMINSTAR:
1973
2.40M
        case OP_CRPLUS:
1974
2.40M
        case OP_CRMINPLUS:
1975
3.08M
        case OP_CRQUERY:
1976
3.11M
        case OP_CRMINQUERY:
1977
3.70M
        case OP_CRPOSSTAR:
1978
3.71M
        case OP_CRPOSPLUS:
1979
3.72M
        case OP_CRPOSQUERY:
1980
3.72M
        fc = *Fecode++ - OP_CRSTAR;
1981
3.72M
        Lmin = rep_min[fc];
1982
3.72M
        Lmax = rep_max[fc];
1983
3.72M
        reptype = rep_typ[fc];
1984
3.72M
        break;
1985
1986
0
        case OP_CRRANGE:
1987
0
        case OP_CRMINRANGE:
1988
0
        case OP_CRPOSRANGE:
1989
0
        Lmin = GET2(Fecode, 1);
1990
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
1991
0
        if (Lmax == 0) Lmax = UINT32_MAX;       /* Max 0 => infinity */
1992
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
1993
0
        Fecode += 1 + 2 * IMM2_SIZE;
1994
0
        break;
1995
1996
67.2k
        default:               /* No repeat follows */
1997
67.2k
        Lmin = Lmax = 1;
1998
67.2k
        break;
1999
3.78M
        }
2000
2001
      /* First, ensure the minimum number of matches are present. */
2002
2003
3.78M
#ifdef SUPPORT_UNICODE
2004
3.78M
      if (utf)
2005
69.4k
        {
2006
104k
        for (i = 1; i <= Lmin; i++)
2007
51.3k
          {
2008
51.3k
          if (Feptr >= mb->end_subject)
2009
414
            {
2010
414
            SCHECK_PARTIAL();
2011
414
            RRETURN(MATCH_NOMATCH);
2012
414
            }
2013
50.9k
          GETCHARINC(fc, Feptr);
2014
50.9k
          if (fc > 255)
2015
1.13k
            {
2016
1.13k
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2017
1.13k
            }
2018
49.8k
          else
2019
49.8k
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2020
50.9k
          }
2021
69.4k
        }
2022
3.71M
      else
2023
3.71M
#endif
2024
      /* Not UTF mode */
2025
3.71M
        {
2026
3.73M
        for (i = 1; i <= Lmin; i++)
2027
62.1k
          {
2028
62.1k
          if (Feptr >= mb->end_subject)
2029
2.06k
            {
2030
2.06k
            SCHECK_PARTIAL();
2031
2.06k
            RRETURN(MATCH_NOMATCH);
2032
2.06k
            }
2033
60.0k
          fc = *Feptr++;
2034
#if PCRE2_CODE_UNIT_WIDTH != 8
2035
          if (fc > 255)
2036
            {
2037
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2038
            }
2039
          else
2040
#endif
2041
60.0k
          if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2042
60.0k
          }
2043
3.71M
        }
2044
2045
      /* If Lmax == Lmin we are done. Continue with main loop. */
2046
2047
3.73M
      if (Lmin == Lmax) continue;
2048
2049
      /* If minimizing, keep testing the rest of the expression and advancing
2050
      the pointer while it matches the class. */
2051
2052
3.68M
      if (reptype == REPTYPE_MIN)
2053
54.6k
        {
2054
54.6k
#ifdef SUPPORT_UNICODE
2055
54.6k
        if (utf)
2056
6.13k
          {
2057
6.13k
          for (;;)
2058
49.1k
            {
2059
49.1k
            RMATCH(Fecode, RM200);
2060
49.1k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2061
49.1k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2062
46.3k
            if (Feptr >= mb->end_subject)
2063
228
              {
2064
228
              SCHECK_PARTIAL();
2065
228
              RRETURN(MATCH_NOMATCH);
2066
228
              }
2067
46.1k
            GETCHARINC(fc, Feptr);
2068
46.1k
            if (fc > 255)
2069
247
              {
2070
247
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2071
247
              }
2072
45.8k
            else
2073
45.8k
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2074
46.1k
            }
2075
6.13k
          }
2076
48.4k
        else
2077
48.4k
#endif
2078
        /* Not UTF mode */
2079
48.4k
          {
2080
48.4k
          for (;;)
2081
550k
            {
2082
550k
            RMATCH(Fecode, RM23);
2083
550k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2084
550k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2085
532k
            if (Feptr >= mb->end_subject)
2086
3.77k
              {
2087
3.77k
              SCHECK_PARTIAL();
2088
3.77k
              RRETURN(MATCH_NOMATCH);
2089
3.77k
              }
2090
529k
            fc = *Feptr++;
2091
#if PCRE2_CODE_UNIT_WIDTH != 8
2092
            if (fc > 255)
2093
              {
2094
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2095
              }
2096
            else
2097
#endif
2098
529k
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2099
529k
            }
2100
48.4k
          }
2101
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
2102
0
        }
2103
2104
      /* If maximizing, find the longest possible run, then work backwards. */
2105
2106
3.63M
      else
2107
3.63M
        {
2108
3.63M
        Lstart_eptr = Feptr;
2109
2110
3.63M
#ifdef SUPPORT_UNICODE
2111
3.63M
        if (utf)
2112
16.4k
          {
2113
178k
          for (i = Lmin; i < Lmax; i++)
2114
175k
            {
2115
175k
            int len = 1;
2116
175k
            if (Feptr >= mb->end_subject)
2117
1.58k
              {
2118
1.58k
              SCHECK_PARTIAL();
2119
1.58k
              break;
2120
1.58k
              }
2121
174k
            GETCHARLEN(fc, Feptr, len);
2122
174k
            if (fc > 255)
2123
7.04k
              {
2124
7.04k
              if (Fop == OP_CLASS) break;
2125
7.04k
              }
2126
167k
            else
2127
167k
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2128
161k
            Feptr += len;
2129
161k
            }
2130
2131
16.4k
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2132
2133
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2134
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2135
          go too far. */
2136
2137
15.5k
          for (;;)
2138
175k
            {
2139
175k
            RMATCH(Fecode, RM201);
2140
175k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2141
175k
            if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2142
160k
            BACKCHAR(Feptr);
2143
160k
            }
2144
15.5k
          }
2145
3.61M
        else
2146
3.61M
#endif
2147
          /* Not UTF mode */
2148
3.61M
          {
2149
6.28M
          for (i = Lmin; i < Lmax; i++)
2150
5.90M
            {
2151
5.90M
            if (Feptr >= mb->end_subject)
2152
12.1k
              {
2153
12.1k
              SCHECK_PARTIAL();
2154
12.1k
              break;
2155
12.1k
              }
2156
5.89M
            fc = *Feptr;
2157
#if PCRE2_CODE_UNIT_WIDTH != 8
2158
            if (fc > 255)
2159
              {
2160
              if (Fop == OP_CLASS) break;
2161
              }
2162
            else
2163
#endif
2164
5.89M
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2165
2.66M
            Feptr++;
2166
2.66M
            }
2167
2168
3.61M
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2169
2170
7.84M
          while (Feptr >= Lstart_eptr)
2171
4.82M
            {
2172
4.82M
            RMATCH(Fecode, RM24);
2173
4.82M
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2174
4.82M
            Feptr--;
2175
4.82M
            }
2176
3.02M
          }
2177
2178
3.03M
        RRETURN(MATCH_NOMATCH);
2179
3.03M
        }
2180
3.68M
      }
2181
2182
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
2183
2184
0
#undef Lbyte_map_address
2185
0
#undef Lbyte_map
2186
0
#undef Lstart_eptr
2187
0
#undef Lmin
2188
0
#undef Lmax
2189
2190
2191
    /* ===================================================================== */
2192
    /* Match an extended character class. In the 8-bit library, this opcode is
2193
    encountered only when UTF-8 mode mode is supported. In the 16-bit and
2194
    32-bit libraries, codepoints greater than 255 may be encountered even when
2195
    UTF is not supported. */
2196
2197
260k
#define Lstart_eptr  F->temp_sptr[0]
2198
505k
#define Lxclass_data F->temp_sptr[1]
2199
438k
#define Lmin         F->temp_32[0]
2200
568k
#define Lmax         F->temp_32[1]
2201
2202
0
#ifdef SUPPORT_WIDE_CHARS
2203
95.2k
    case OP_XCLASS:
2204
95.2k
      {
2205
95.2k
      Lxclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2206
95.2k
      Fecode += GET(Fecode, 1);               /* Advance past the item */
2207
2208
95.2k
      switch (*Fecode)
2209
95.2k
        {
2210
6.33k
        case OP_CRSTAR:
2211
11.4k
        case OP_CRMINSTAR:
2212
19.6k
        case OP_CRPLUS:
2213
21.7k
        case OP_CRMINPLUS:
2214
23.6k
        case OP_CRQUERY:
2215
57.0k
        case OP_CRMINQUERY:
2216
59.9k
        case OP_CRPOSSTAR:
2217
62.0k
        case OP_CRPOSPLUS:
2218
66.1k
        case OP_CRPOSQUERY:
2219
66.1k
        fc = *Fecode++ - OP_CRSTAR;
2220
66.1k
        Lmin = rep_min[fc];
2221
66.1k
        Lmax = rep_max[fc];
2222
66.1k
        reptype = rep_typ[fc];
2223
66.1k
        break;
2224
2225
0
        case OP_CRRANGE:
2226
0
        case OP_CRMINRANGE:
2227
0
        case OP_CRPOSRANGE:
2228
0
        Lmin = GET2(Fecode, 1);
2229
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2230
0
        if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2231
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
2232
0
        Fecode += 1 + 2 * IMM2_SIZE;
2233
0
        break;
2234
2235
29.1k
        default:               /* No repeat follows */
2236
29.1k
        Lmin = Lmax = 1;
2237
29.1k
        break;
2238
95.2k
        }
2239
2240
      /* First, ensure the minimum number of matches are present. */
2241
2242
113k
      for (i = 1; i <= Lmin; i++)
2243
41.5k
        {
2244
41.5k
        if (Feptr >= mb->end_subject)
2245
264
          {
2246
264
          SCHECK_PARTIAL();
2247
264
          RRETURN(MATCH_NOMATCH);
2248
264
          }
2249
41.2k
        GETCHARINCTEST(fc, Feptr);
2250
41.2k
        if (!PRIV(xclass)(fc, Lxclass_data,
2251
41.2k
            (const uint8_t*)mb->start_code, utf))
2252
22.8k
          RRETURN(MATCH_NOMATCH);
2253
41.2k
        }
2254
2255
      /* If Lmax == Lmin we can just continue with the main loop. */
2256
2257
72.1k
      if (Lmin == Lmax) continue;
2258
2259
      /* If minimizing, keep testing the rest of the expression and advancing
2260
      the pointer while it matches the class. */
2261
2262
61.5k
      if (reptype == REPTYPE_MIN)
2263
40.1k
        {
2264
40.1k
        for (;;)
2265
135k
          {
2266
135k
          RMATCH(Fecode, RM100);
2267
135k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2268
135k
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2269
108k
          if (Feptr >= mb->end_subject)
2270
565
            {
2271
565
            SCHECK_PARTIAL();
2272
565
            RRETURN(MATCH_NOMATCH);
2273
565
            }
2274
108k
          GETCHARINCTEST(fc, Feptr);
2275
108k
          if (!PRIV(xclass)(fc, Lxclass_data,
2276
108k
              (const uint8_t*)mb->start_code, utf))
2277
12.4k
            RRETURN(MATCH_NOMATCH);
2278
108k
          }
2279
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
2280
0
        }
2281
2282
      /* If maximizing, find the longest possible run, then work backwards. */
2283
2284
21.4k
      else
2285
21.4k
        {
2286
21.4k
        Lstart_eptr = Feptr;
2287
265k
        for (i = Lmin; i < Lmax; i++)
2288
263k
          {
2289
263k
          int len = 1;
2290
263k
          if (Feptr >= mb->end_subject)
2291
2.27k
            {
2292
2.27k
            SCHECK_PARTIAL();
2293
2.27k
            break;
2294
2.27k
            }
2295
260k
#ifdef SUPPORT_UNICODE
2296
260k
          GETCHARLENTEST(fc, Feptr, len);
2297
#else
2298
          fc = *Feptr;
2299
#endif
2300
260k
          if (!PRIV(xclass)(fc, Lxclass_data,
2301
260k
              (const uint8_t*)mb->start_code, utf)) break;
2302
243k
          Feptr += len;
2303
243k
          }
2304
2305
21.4k
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2306
2307
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2308
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2309
        go too far. */
2310
2311
13.9k
        for(;;)
2312
238k
          {
2313
238k
          RMATCH(Fecode, RM101);
2314
238k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2315
238k
          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2316
225k
#ifdef SUPPORT_UNICODE
2317
225k
          if (utf) BACKCHAR(Feptr);
2318
225k
#endif
2319
225k
          }
2320
13.9k
        RRETURN(MATCH_NOMATCH);
2321
13.9k
        }
2322
2323
0
      PCRE2_UNREACHABLE(); /* Control never reaches here */
2324
0
      }
2325
0
#endif  /* SUPPORT_WIDE_CHARS: end of XCLASS */
2326
2327
0
#undef Lstart_eptr
2328
0
#undef Lxclass_data
2329
0
#undef Lmin
2330
0
#undef Lmax
2331
2332
2333
    /* ===================================================================== */
2334
    /* Match a complex, set-based character class. This opcodes are used when
2335
    there is complex nesting or logical operations within the character
2336
    class. */
2337
2338
0
#define Lstart_eptr  F->temp_sptr[0]
2339
0
#define Leclass_data F->temp_sptr[1]
2340
0
#define Leclass_len  F->temp_size
2341
0
#define Lmin         F->temp_32[0]
2342
0
#define Lmax         F->temp_32[1]
2343
2344
0
#ifdef SUPPORT_WIDE_CHARS
2345
0
    case OP_ECLASS:
2346
0
      {
2347
0
      Leclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2348
0
      Fecode += GET(Fecode, 1);               /* Advance past the item */
2349
0
      Leclass_len = (PCRE2_SIZE)(Fecode - Leclass_data);
2350
2351
0
      switch (*Fecode)
2352
0
        {
2353
0
        case OP_CRSTAR:
2354
0
        case OP_CRMINSTAR:
2355
0
        case OP_CRPLUS:
2356
0
        case OP_CRMINPLUS:
2357
0
        case OP_CRQUERY:
2358
0
        case OP_CRMINQUERY:
2359
0
        case OP_CRPOSSTAR:
2360
0
        case OP_CRPOSPLUS:
2361
0
        case OP_CRPOSQUERY:
2362
0
        fc = *Fecode++ - OP_CRSTAR;
2363
0
        Lmin = rep_min[fc];
2364
0
        Lmax = rep_max[fc];
2365
0
        reptype = rep_typ[fc];
2366
0
        break;
2367
2368
0
        case OP_CRRANGE:
2369
0
        case OP_CRMINRANGE:
2370
0
        case OP_CRPOSRANGE:
2371
0
        Lmin = GET2(Fecode, 1);
2372
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2373
0
        if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2374
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
2375
0
        Fecode += 1 + 2 * IMM2_SIZE;
2376
0
        break;
2377
2378
0
        default:               /* No repeat follows */
2379
0
        Lmin = Lmax = 1;
2380
0
        break;
2381
0
        }
2382
2383
      /* First, ensure the minimum number of matches are present. */
2384
2385
0
      for (i = 1; i <= Lmin; i++)
2386
0
        {
2387
0
        if (Feptr >= mb->end_subject)
2388
0
          {
2389
0
          SCHECK_PARTIAL();
2390
0
          RRETURN(MATCH_NOMATCH);
2391
0
          }
2392
0
        GETCHARINCTEST(fc, Feptr);
2393
0
        if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len,
2394
0
                          (const uint8_t*)mb->start_code, utf))
2395
0
          RRETURN(MATCH_NOMATCH);
2396
0
        }
2397
2398
      /* If Lmax == Lmin we can just continue with the main loop. */
2399
2400
0
      if (Lmin == Lmax) continue;
2401
2402
      /* If minimizing, keep testing the rest of the expression and advancing
2403
      the pointer while it matches the class. */
2404
2405
0
      if (reptype == REPTYPE_MIN)
2406
0
        {
2407
0
        for (;;)
2408
0
          {
2409
0
          RMATCH(Fecode, RM102);
2410
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2411
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2412
0
          if (Feptr >= mb->end_subject)
2413
0
            {
2414
0
            SCHECK_PARTIAL();
2415
0
            RRETURN(MATCH_NOMATCH);
2416
0
            }
2417
0
          GETCHARINCTEST(fc, Feptr);
2418
0
          if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len,
2419
0
                            (const uint8_t*)mb->start_code, utf))
2420
0
            RRETURN(MATCH_NOMATCH);
2421
0
          }
2422
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
2423
0
        }
2424
2425
      /* If maximizing, find the longest possible run, then work backwards. */
2426
2427
0
      else
2428
0
        {
2429
0
        Lstart_eptr = Feptr;
2430
0
        for (i = Lmin; i < Lmax; i++)
2431
0
          {
2432
0
          int len = 1;
2433
0
          if (Feptr >= mb->end_subject)
2434
0
            {
2435
0
            SCHECK_PARTIAL();
2436
0
            break;
2437
0
            }
2438
0
#ifdef SUPPORT_UNICODE
2439
0
          GETCHARLENTEST(fc, Feptr, len);
2440
#else
2441
          fc = *Feptr;
2442
#endif
2443
0
          if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len,
2444
0
                            (const uint8_t*)mb->start_code, utf))
2445
0
            break;
2446
0
          Feptr += len;
2447
0
          }
2448
2449
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2450
2451
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2452
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2453
        go too far. */
2454
2455
0
        for(;;)
2456
0
          {
2457
0
          RMATCH(Fecode, RM103);
2458
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2459
0
          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2460
0
#ifdef SUPPORT_UNICODE
2461
0
          if (utf) BACKCHAR(Feptr);
2462
0
#endif
2463
0
          }
2464
0
        RRETURN(MATCH_NOMATCH);
2465
0
        }
2466
2467
0
      PCRE2_UNREACHABLE(); /* Control never reaches here */
2468
0
      }
2469
0
#endif  /* SUPPORT_WIDE_CHARS: end of ECLASS */
2470
2471
0
#undef Lstart_eptr
2472
0
#undef Leclass_data
2473
0
#undef Leclass_len
2474
0
#undef Lmin
2475
0
#undef Lmax
2476
2477
2478
    /* ===================================================================== */
2479
    /* Match various character types when PCRE2_UCP is not set. These opcodes
2480
    are not generated when PCRE2_UCP is set - instead appropriate property
2481
    tests are compiled. */
2482
2483
289k
    case OP_NOT_DIGIT:
2484
289k
    if (Feptr >= mb->end_subject)
2485
2.40k
      {
2486
2.40k
      SCHECK_PARTIAL();
2487
2.40k
      RRETURN(MATCH_NOMATCH);
2488
2.40k
      }
2489
287k
    GETCHARINCTEST(fc, Feptr);
2490
287k
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
2491
36.3k
      RRETURN(MATCH_NOMATCH);
2492
251k
    Fecode++;
2493
251k
    break;
2494
2495
1.48k
    case OP_DIGIT:
2496
1.48k
    if (Feptr >= mb->end_subject)
2497
7
      {
2498
7
      SCHECK_PARTIAL();
2499
7
      RRETURN(MATCH_NOMATCH);
2500
7
      }
2501
1.47k
    GETCHARINCTEST(fc, Feptr);
2502
1.47k
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
2503
999
      RRETURN(MATCH_NOMATCH);
2504
477
    Fecode++;
2505
477
    break;
2506
2507
379k
    case OP_NOT_WHITESPACE:
2508
379k
    if (Feptr >= mb->end_subject)
2509
1.31k
      {
2510
1.31k
      SCHECK_PARTIAL();
2511
1.31k
      RRETURN(MATCH_NOMATCH);
2512
1.31k
      }
2513
378k
    GETCHARINCTEST(fc, Feptr);
2514
378k
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
2515
14.5k
      RRETURN(MATCH_NOMATCH);
2516
363k
    Fecode++;
2517
363k
    break;
2518
2519
22.2k
    case OP_WHITESPACE:
2520
22.2k
    if (Feptr >= mb->end_subject)
2521
0
      {
2522
0
      SCHECK_PARTIAL();
2523
0
      RRETURN(MATCH_NOMATCH);
2524
0
      }
2525
22.2k
    GETCHARINCTEST(fc, Feptr);
2526
22.2k
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
2527
20.1k
      RRETURN(MATCH_NOMATCH);
2528
2.14k
    Fecode++;
2529
2.14k
    break;
2530
2531
5.16M
    case OP_NOT_WORDCHAR:
2532
5.16M
    if (Feptr >= mb->end_subject)
2533
91.8k
      {
2534
91.8k
      SCHECK_PARTIAL();
2535
91.8k
      RRETURN(MATCH_NOMATCH);
2536
91.8k
      }
2537
5.07M
    GETCHARINCTEST(fc, Feptr);
2538
5.07M
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
2539
318k
      RRETURN(MATCH_NOMATCH);
2540
4.75M
    Fecode++;
2541
4.75M
    break;
2542
2543
5.31M
    case OP_WORDCHAR:
2544
5.31M
    if (Feptr >= mb->end_subject)
2545
29.4k
      {
2546
29.4k
      SCHECK_PARTIAL();
2547
29.4k
      RRETURN(MATCH_NOMATCH);
2548
29.4k
      }
2549
5.28M
    GETCHARINCTEST(fc, Feptr);
2550
5.28M
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
2551
4.54M
      RRETURN(MATCH_NOMATCH);
2552
739k
    Fecode++;
2553
739k
    break;
2554
2555
236M
    case OP_ANYNL:
2556
236M
    if (Feptr >= mb->end_subject)
2557
2.97M
      {
2558
2.97M
      SCHECK_PARTIAL();
2559
2.97M
      RRETURN(MATCH_NOMATCH);
2560
2.97M
      }
2561
233M
    GETCHARINCTEST(fc, Feptr);
2562
233M
    switch(fc)
2563
233M
      {
2564
220M
      default: RRETURN(MATCH_NOMATCH);
2565
2566
38.9k
      case CHAR_CR:
2567
38.9k
      if (Feptr >= mb->end_subject)
2568
690
        {
2569
690
        SCHECK_PARTIAL();
2570
690
        }
2571
38.2k
      else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
2572
38.9k
      break;
2573
2574
8.16M
      case CHAR_LF:
2575
8.16M
      break;
2576
2577
3.49M
      case CHAR_VT:
2578
5.59M
      case CHAR_FF:
2579
5.61M
      case CHAR_NEL:
2580
5.61M
#ifndef EBCDIC
2581
5.61M
      case 0x2028:
2582
5.61M
      case 0x2029:
2583
5.61M
#endif  /* Not EBCDIC */
2584
5.61M
      if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2585
5.61M
      break;
2586
233M
      }
2587
13.8M
    Fecode++;
2588
13.8M
    break;
2589
2590
4.63M
    case OP_NOT_HSPACE:
2591
4.63M
    if (Feptr >= mb->end_subject)
2592
62.5k
      {
2593
62.5k
      SCHECK_PARTIAL();
2594
62.5k
      RRETURN(MATCH_NOMATCH);
2595
62.5k
      }
2596
4.57M
    GETCHARINCTEST(fc, Feptr);
2597
4.57M
    switch(fc)
2598
4.57M
      {
2599
2.75M
      HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2600
4.42M
      default: break;
2601
4.57M
      }
2602
4.42M
    Fecode++;
2603
4.42M
    break;
2604
2605
194k
    case OP_HSPACE:
2606
194k
    if (Feptr >= mb->end_subject)
2607
1.35k
      {
2608
1.35k
      SCHECK_PARTIAL();
2609
1.35k
      RRETURN(MATCH_NOMATCH);
2610
1.35k
      }
2611
192k
    GETCHARINCTEST(fc, Feptr);
2612
192k
    switch(fc)
2613
192k
      {
2614
5.85k
      HSPACE_CASES: break;  /* Byte and multibyte cases */
2615
187k
      default: RRETURN(MATCH_NOMATCH);
2616
192k
      }
2617
5.85k
    Fecode++;
2618
5.85k
    break;
2619
2620
4.06M
    case OP_NOT_VSPACE:
2621
4.06M
    if (Feptr >= mb->end_subject)
2622
27.8k
      {
2623
27.8k
      SCHECK_PARTIAL();
2624
27.8k
      RRETURN(MATCH_NOMATCH);
2625
27.8k
      }
2626
4.03M
    GETCHARINCTEST(fc, Feptr);
2627
4.03M
    switch(fc)
2628
4.03M
      {
2629
910k
      VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2630
3.86M
      default: break;
2631
4.03M
      }
2632
3.86M
    Fecode++;
2633
3.86M
    break;
2634
2635
5.65M
    case OP_VSPACE:
2636
5.65M
    if (Feptr >= mb->end_subject)
2637
13.6k
      {
2638
13.6k
      SCHECK_PARTIAL();
2639
13.6k
      RRETURN(MATCH_NOMATCH);
2640
13.6k
      }
2641
5.64M
    GETCHARINCTEST(fc, Feptr);
2642
5.64M
    switch(fc)
2643
5.64M
      {
2644
251k
      VSPACE_CASES: break;
2645
5.39M
      default: RRETURN(MATCH_NOMATCH);
2646
5.64M
      }
2647
251k
    Fecode++;
2648
251k
    break;
2649
2650
2651
0
#ifdef SUPPORT_UNICODE
2652
2653
    /* ===================================================================== */
2654
    /* Check the next character by Unicode property. We will get here only
2655
    if the support is in the binary; otherwise a compile-time error occurs. */
2656
2657
125k
    case OP_PROP:
2658
156k
    case OP_NOTPROP:
2659
156k
    if (Feptr >= mb->end_subject)
2660
1.31k
      {
2661
1.31k
      SCHECK_PARTIAL();
2662
1.31k
      RRETURN(MATCH_NOMATCH);
2663
1.31k
      }
2664
155k
    GETCHARINCTEST(fc, Feptr);
2665
155k
      {
2666
155k
      const uint32_t *cp;
2667
155k
      uint32_t chartype;
2668
155k
      const ucd_record *prop = GET_UCD(fc);
2669
155k
      BOOL notmatch = Fop == OP_NOTPROP;
2670
2671
155k
      switch(Fecode[1])
2672
155k
        {
2673
0
        case PT_LAMP:
2674
0
        chartype = prop->chartype;
2675
0
        if ((chartype == ucp_Lu ||
2676
0
             chartype == ucp_Ll ||
2677
0
             chartype == ucp_Lt) == notmatch)
2678
0
          RRETURN(MATCH_NOMATCH);
2679
0
        break;
2680
2681
4.43k
        case PT_GC:
2682
4.43k
        if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch)
2683
546
          RRETURN(MATCH_NOMATCH);
2684
3.88k
        break;
2685
2686
3.88k
        case PT_PC:
2687
786
        if ((Fecode[2] == prop->chartype) == notmatch)
2688
770
          RRETURN(MATCH_NOMATCH);
2689
16
        break;
2690
2691
16
        case PT_SC:
2692
0
        if ((Fecode[2] == prop->script) == notmatch)
2693
0
          RRETURN(MATCH_NOMATCH);
2694
0
        break;
2695
2696
0
        case PT_SCX:
2697
0
          {
2698
0
          BOOL ok = (Fecode[2] == prop->script ||
2699
0
                     MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
2700
0
          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2701
0
          }
2702
0
        break;
2703
2704
        /* These are specials */
2705
2706
0
        case PT_ALNUM:
2707
0
        chartype = prop->chartype;
2708
0
        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2709
0
             PRIV(ucp_gentype)[chartype] == ucp_N) == notmatch)
2710
0
          RRETURN(MATCH_NOMATCH);
2711
0
        break;
2712
2713
        /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2714
        which means that Perl space and POSIX space are now identical. PCRE
2715
        was changed at release 8.34. */
2716
2717
25.4k
        case PT_SPACE:    /* Perl space */
2718
25.4k
        case PT_PXSPACE:  /* POSIX space */
2719
25.4k
        switch(fc)
2720
25.4k
          {
2721
55.9k
          HSPACE_CASES:
2722
55.9k
          VSPACE_CASES:
2723
31.1k
          if (notmatch) RRETURN(MATCH_NOMATCH);
2724
0
          break;
2725
2726
20.9k
          default:
2727
20.9k
          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch)
2728
464
            RRETURN(MATCH_NOMATCH);
2729
20.4k
          break;
2730
25.4k
          }
2731
20.4k
        break;
2732
2733
70.0k
        case PT_WORD:
2734
70.0k
        chartype = prop->chartype;
2735
70.0k
        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2736
70.0k
             PRIV(ucp_gentype)[chartype] == ucp_N ||
2737
70.0k
             chartype == ucp_Mn ||
2738
70.0k
             chartype == ucp_Pc) == notmatch)
2739
54.2k
          RRETURN(MATCH_NOMATCH);
2740
15.7k
        break;
2741
2742
54.5k
        case PT_CLIST:
2743
#if PCRE2_CODE_UNIT_WIDTH == 32
2744
            if (fc > MAX_UTF_CODE_POINT)
2745
              {
2746
              if (notmatch) break;;
2747
              RRETURN(MATCH_NOMATCH);
2748
              }
2749
#endif
2750
54.5k
        cp = PRIV(ucd_caseless_sets) + Fecode[2];
2751
54.5k
        for (;;)
2752
71.8k
          {
2753
71.8k
          if (fc < *cp)
2754
52.9k
            { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } }
2755
18.8k
          if (fc == *cp++)
2756
1.53k
            { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; }
2757
18.8k
          }
2758
2.69k
        break;
2759
2760
2.69k
        case PT_UCNC:
2761
0
        if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2762
0
             fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2763
0
             fc >= 0xe000) == notmatch)
2764
0
          RRETURN(MATCH_NOMATCH);
2765
0
        break;
2766
2767
0
        case PT_BIDICL:
2768
0
        if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
2769
0
          RRETURN(MATCH_NOMATCH);
2770
0
        break;
2771
2772
0
        case PT_BOOL:
2773
0
          {
2774
0
          BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2775
0
            UCD_BPROPS_PROP(prop), Fecode[2]) != 0;
2776
0
          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2777
0
          }
2778
0
        break;
2779
2780
        /* This should never occur */
2781
2782
0
        default:
2783
0
        PCRE2_DEBUG_UNREACHABLE();
2784
0
        return PCRE2_ERROR_INTERNAL;
2785
155k
        }
2786
2787
42.8k
      Fecode += 3;
2788
42.8k
      }
2789
0
    break;
2790
2791
2792
    /* ===================================================================== */
2793
    /* Match an extended Unicode sequence. We will get here only if the support
2794
    is in the binary; otherwise a compile-time error occurs. */
2795
2796
25.4k
    case OP_EXTUNI:
2797
25.4k
    if (Feptr >= mb->end_subject)
2798
414
      {
2799
414
      SCHECK_PARTIAL();
2800
414
      RRETURN(MATCH_NOMATCH);
2801
414
      }
2802
25.0k
    else
2803
25.0k
      {
2804
25.0k
      GETCHARINCTEST(fc, Feptr);
2805
25.0k
      Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
2806
25.0k
        NULL);
2807
25.0k
      }
2808
25.0k
    CHECK_PARTIAL();
2809
25.0k
    Fecode++;
2810
25.0k
    break;
2811
2812
0
#endif  /* SUPPORT_UNICODE */
2813
2814
2815
    /* ===================================================================== */
2816
    /* Match a single character type repeatedly. Note that the property type
2817
    does not need to be in a stack frame as it is not used within an RMATCH()
2818
    loop. */
2819
2820
850M
#define Lstart_eptr  F->temp_sptr[0]
2821
326M
#define Lmin         F->temp_32[0]
2822
333M
#define Lmax         F->temp_32[1]
2823
780M
#define Lctype       F->temp_32[2]
2824
2.59M
#define Lpropvalue   F->temp_32[3]
2825
2826
0
    case OP_TYPEEXACT:
2827
0
    Lmin = Lmax = GET2(Fecode, 1);
2828
0
    Fecode += 1 + IMM2_SIZE;
2829
0
    goto REPEATTYPE;
2830
2831
0
    case OP_TYPEUPTO:
2832
0
    case OP_TYPEMINUPTO:
2833
0
    Lmin = 0;
2834
0
    Lmax = GET2(Fecode, 1);
2835
0
    reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
2836
0
    Fecode += 1 + IMM2_SIZE;
2837
0
    goto REPEATTYPE;
2838
2839
22.5k
    case OP_TYPEPOSSTAR:
2840
22.5k
    reptype = REPTYPE_POS;
2841
22.5k
    Lmin = 0;
2842
22.5k
    Lmax = UINT32_MAX;
2843
22.5k
    Fecode++;
2844
22.5k
    goto REPEATTYPE;
2845
2846
5.01M
    case OP_TYPEPOSPLUS:
2847
5.01M
    reptype = REPTYPE_POS;
2848
5.01M
    Lmin = 1;
2849
5.01M
    Lmax = UINT32_MAX;
2850
5.01M
    Fecode++;
2851
5.01M
    goto REPEATTYPE;
2852
2853
52.2M
    case OP_TYPEPOSQUERY:
2854
52.2M
    reptype = REPTYPE_POS;
2855
52.2M
    Lmin = 0;
2856
52.2M
    Lmax = 1;
2857
52.2M
    Fecode++;
2858
52.2M
    goto REPEATTYPE;
2859
2860
0
    case OP_TYPEPOSUPTO:
2861
0
    reptype = REPTYPE_POS;
2862
0
    Lmin = 0;
2863
0
    Lmax = GET2(Fecode, 1);
2864
0
    Fecode += 1 + IMM2_SIZE;
2865
0
    goto REPEATTYPE;
2866
2867
17.4k
    case OP_TYPESTAR:
2868
18.3k
    case OP_TYPEMINSTAR:
2869
8.16M
    case OP_TYPEPLUS:
2870
8.44M
    case OP_TYPEMINPLUS:
2871
17.1M
    case OP_TYPEQUERY:
2872
17.2M
    case OP_TYPEMINQUERY:
2873
17.2M
    fc = *Fecode++ - OP_TYPESTAR;
2874
17.2M
    Lmin = rep_min[fc];
2875
17.2M
    Lmax = rep_max[fc];
2876
17.2M
    reptype = rep_typ[fc];
2877
2878
    /* Common code for all repeated character type matches. */
2879
2880
74.4M
    REPEATTYPE:
2881
74.4M
    Lctype = *Fecode++;      /* Code for the character type */
2882
2883
74.4M
#ifdef SUPPORT_UNICODE
2884
74.4M
    if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
2885
2.33M
      {
2886
2.33M
      proptype = *Fecode++;
2887
2.33M
      Lpropvalue = *Fecode++;
2888
2.33M
      }
2889
72.1M
    else proptype = -1;
2890
74.4M
#endif
2891
2892
    /* First, ensure the minimum number of matches are present. Use inline
2893
    code for maximizing the speed, and do the type test once at the start
2894
    (i.e. keep it out of the loops). As there are no calls to RMATCH in the
2895
    loops, we can use an ordinary variable for "notmatch". The code for UTF
2896
    mode is separated out for tidiness, except for Unicode property tests. */
2897
2898
74.4M
    if (Lmin > 0)
2899
13.4M
      {
2900
13.4M
#ifdef SUPPORT_UNICODE
2901
13.4M
      if (proptype >= 0)  /* Property tests in all modes */
2902
2.32M
        {
2903
2.32M
        BOOL notmatch = Lctype == OP_NOTPROP;
2904
2.32M
        switch(proptype)
2905
2.32M
          {
2906
0
          case PT_LAMP:
2907
0
          for (i = 1; i <= Lmin; i++)
2908
0
            {
2909
0
            int chartype;
2910
0
            if (Feptr >= mb->end_subject)
2911
0
              {
2912
0
              SCHECK_PARTIAL();
2913
0
              RRETURN(MATCH_NOMATCH);
2914
0
              }
2915
0
            GETCHARINCTEST(fc, Feptr);
2916
0
            chartype = UCD_CHARTYPE(fc);
2917
0
            if ((chartype == ucp_Lu ||
2918
0
                 chartype == ucp_Ll ||
2919
0
                 chartype == ucp_Lt) == notmatch)
2920
0
              RRETURN(MATCH_NOMATCH);
2921
0
            }
2922
0
          break;
2923
2924
9.12k
          case PT_GC:
2925
12.3k
          for (i = 1; i <= Lmin; i++)
2926
9.12k
            {
2927
9.12k
            if (Feptr >= mb->end_subject)
2928
0
              {
2929
0
              SCHECK_PARTIAL();
2930
0
              RRETURN(MATCH_NOMATCH);
2931
0
              }
2932
9.12k
            GETCHARINCTEST(fc, Feptr);
2933
9.12k
            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch)
2934
5.90k
              RRETURN(MATCH_NOMATCH);
2935
9.12k
            }
2936
3.22k
          break;
2937
2938
147k
          case PT_PC:
2939
151k
          for (i = 1; i <= Lmin; i++)
2940
147k
            {
2941
147k
            if (Feptr >= mb->end_subject)
2942
0
              {
2943
0
              SCHECK_PARTIAL();
2944
0
              RRETURN(MATCH_NOMATCH);
2945
0
              }
2946
147k
            GETCHARINCTEST(fc, Feptr);
2947
147k
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch)
2948
143k
              RRETURN(MATCH_NOMATCH);
2949
147k
            }
2950
3.95k
          break;
2951
2952
3.95k
          case PT_SC:
2953
0
          for (i = 1; i <= Lmin; i++)
2954
0
            {
2955
0
            if (Feptr >= mb->end_subject)
2956
0
              {
2957
0
              SCHECK_PARTIAL();
2958
0
              RRETURN(MATCH_NOMATCH);
2959
0
              }
2960
0
            GETCHARINCTEST(fc, Feptr);
2961
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch)
2962
0
              RRETURN(MATCH_NOMATCH);
2963
0
            }
2964
0
          break;
2965
2966
0
          case PT_SCX:
2967
0
          for (i = 1; i <= Lmin; i++)
2968
0
            {
2969
0
            BOOL ok;
2970
0
            const ucd_record *prop;
2971
0
            if (Feptr >= mb->end_subject)
2972
0
              {
2973
0
              SCHECK_PARTIAL();
2974
0
              RRETURN(MATCH_NOMATCH);
2975
0
              }
2976
0
            GETCHARINCTEST(fc, Feptr);
2977
0
            prop = GET_UCD(fc);
2978
0
            ok = (prop->script == Lpropvalue ||
2979
0
                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
2980
0
            if (ok == notmatch)
2981
0
              RRETURN(MATCH_NOMATCH);
2982
0
            }
2983
0
          break;
2984
2985
0
          case PT_ALNUM:
2986
0
          for (i = 1; i <= Lmin; i++)
2987
0
            {
2988
0
            int category;
2989
0
            if (Feptr >= mb->end_subject)
2990
0
              {
2991
0
              SCHECK_PARTIAL();
2992
0
              RRETURN(MATCH_NOMATCH);
2993
0
              }
2994
0
            GETCHARINCTEST(fc, Feptr);
2995
0
            category = UCD_CATEGORY(fc);
2996
0
            if ((category == ucp_L || category == ucp_N) == notmatch)
2997
0
              RRETURN(MATCH_NOMATCH);
2998
0
            }
2999
0
          break;
3000
3001
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3002
          which means that Perl space and POSIX space are now identical. PCRE
3003
          was changed at release 8.34. */
3004
3005
2.09M
          case PT_SPACE:    /* Perl space */
3006
2.09M
          case PT_PXSPACE:  /* POSIX space */
3007
2.95M
          for (i = 1; i <= Lmin; i++)
3008
2.09M
            {
3009
2.09M
            if (Feptr >= mb->end_subject)
3010
12.5k
              {
3011
12.5k
              SCHECK_PARTIAL();
3012
12.5k
              RRETURN(MATCH_NOMATCH);
3013
12.5k
              }
3014
2.08M
            GETCHARINCTEST(fc, Feptr);
3015
2.08M
            switch(fc)
3016
2.08M
              {
3017
9.60M
              HSPACE_CASES:
3018
9.60M
              VSPACE_CASES:
3019
4.37M
              if (notmatch) RRETURN(MATCH_NOMATCH);
3020
579k
              break;
3021
3022
1.45M
              default:
3023
1.45M
              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
3024
1.17M
                RRETURN(MATCH_NOMATCH);
3025
280k
              break;
3026
2.08M
              }
3027
2.08M
            }
3028
860k
          break;
3029
3030
860k
          case PT_WORD:
3031
95.9k
          for (i = 1; i <= Lmin; i++)
3032
72.0k
            {
3033
72.0k
            int chartype, category;
3034
72.0k
            if (Feptr >= mb->end_subject)
3035
1.13k
              {
3036
1.13k
              SCHECK_PARTIAL();
3037
1.13k
              RRETURN(MATCH_NOMATCH);
3038
1.13k
              }
3039
70.9k
            GETCHARINCTEST(fc, Feptr);
3040
70.9k
            chartype = UCD_CHARTYPE(fc);
3041
70.9k
            category = PRIV(ucp_gentype)[chartype];
3042
70.9k
            if ((category == ucp_L || category == ucp_N ||
3043
70.9k
                 chartype == ucp_Mn || chartype == ucp_Pc) == notmatch)
3044
47.0k
              RRETURN(MATCH_NOMATCH);
3045
70.9k
            }
3046
23.8k
          break;
3047
3048
23.8k
          case PT_CLIST:
3049
0
          for (i = 1; i <= Lmin; i++)
3050
0
            {
3051
0
            const uint32_t *cp;
3052
0
            if (Feptr >= mb->end_subject)
3053
0
              {
3054
0
              SCHECK_PARTIAL();
3055
0
              RRETURN(MATCH_NOMATCH);
3056
0
              }
3057
0
            GETCHARINCTEST(fc, Feptr);
3058
#if PCRE2_CODE_UNIT_WIDTH == 32
3059
            if (fc > MAX_UTF_CODE_POINT)
3060
              {
3061
              if (notmatch) continue;
3062
              RRETURN(MATCH_NOMATCH);
3063
              }
3064
#endif
3065
0
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3066
0
            for (;;)
3067
0
              {
3068
0
              if (fc < *cp)
3069
0
                {
3070
0
                if (notmatch) break;
3071
0
                RRETURN(MATCH_NOMATCH);
3072
0
                }
3073
0
              if (fc == *cp++)
3074
0
                {
3075
0
                if (notmatch) RRETURN(MATCH_NOMATCH);
3076
0
                break;
3077
0
                }
3078
0
              }
3079
0
            }
3080
0
          break;
3081
3082
0
          case PT_UCNC:
3083
0
          for (i = 1; i <= Lmin; i++)
3084
0
            {
3085
0
            if (Feptr >= mb->end_subject)
3086
0
              {
3087
0
              SCHECK_PARTIAL();
3088
0
              RRETURN(MATCH_NOMATCH);
3089
0
              }
3090
0
            GETCHARINCTEST(fc, Feptr);
3091
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3092
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3093
0
                 fc >= 0xe000) == notmatch)
3094
0
              RRETURN(MATCH_NOMATCH);
3095
0
            }
3096
0
          break;
3097
3098
0
          case PT_BIDICL:
3099
0
          for (i = 1; i <= Lmin; i++)
3100
0
            {
3101
0
            if (Feptr >= mb->end_subject)
3102
0
              {
3103
0
              SCHECK_PARTIAL();
3104
0
              RRETURN(MATCH_NOMATCH);
3105
0
              }
3106
0
            GETCHARINCTEST(fc, Feptr);
3107
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch)
3108
0
              RRETURN(MATCH_NOMATCH);
3109
0
            }
3110
0
          break;
3111
3112
0
          case PT_BOOL:
3113
0
          for (i = 1; i <= Lmin; i++)
3114
0
            {
3115
0
            BOOL ok;
3116
0
            const ucd_record *prop;
3117
0
            if (Feptr >= mb->end_subject)
3118
0
              {
3119
0
              SCHECK_PARTIAL();
3120
0
              RRETURN(MATCH_NOMATCH);
3121
0
              }
3122
0
            GETCHARINCTEST(fc, Feptr);
3123
0
            prop = GET_UCD(fc);
3124
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
3125
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
3126
0
            if (ok == notmatch)
3127
0
              RRETURN(MATCH_NOMATCH);
3128
0
            }
3129
0
          break;
3130
3131
          /* This should not occur */
3132
3133
0
          default:
3134
0
          PCRE2_DEBUG_UNREACHABLE();
3135
0
          return PCRE2_ERROR_INTERNAL;
3136
2.32M
          }
3137
2.32M
        }
3138
3139
      /* Match extended Unicode sequences. We will get here only if the
3140
      support is in the binary; otherwise a compile-time error occurs. */
3141
3142
11.1M
      else if (Lctype == OP_EXTUNI)
3143
124k
        {
3144
249k
        for (i = 1; i <= Lmin; i++)
3145
124k
          {
3146
124k
          if (Feptr >= mb->end_subject)
3147
36
            {
3148
36
            SCHECK_PARTIAL();
3149
36
            RRETURN(MATCH_NOMATCH);
3150
36
            }
3151
124k
          else
3152
124k
            {
3153
124k
            GETCHARINCTEST(fc, Feptr);
3154
124k
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
3155
124k
              mb->end_subject, utf, NULL);
3156
124k
            }
3157
124k
          CHECK_PARTIAL();
3158
124k
          }
3159
124k
        }
3160
10.9M
      else
3161
10.9M
#endif     /* SUPPORT_UNICODE */
3162
3163
/* Handle all other cases in UTF mode */
3164
3165
10.9M
#ifdef SUPPORT_UNICODE
3166
10.9M
      if (utf) switch(Lctype)
3167
3.98M
        {
3168
1.45k
        case OP_ANY:
3169
2.90k
        for (i = 1; i <= Lmin; i++)
3170
1.45k
          {
3171
1.45k
          if (Feptr >= mb->end_subject)
3172
0
            {
3173
0
            SCHECK_PARTIAL();
3174
0
            RRETURN(MATCH_NOMATCH);
3175
0
            }
3176
1.45k
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3177
1.45k
          if (mb->partial != 0 &&
3178
1.45k
              Feptr + 1 >= mb->end_subject &&
3179
1.45k
              NLBLOCK->nltype == NLTYPE_FIXED &&
3180
1.45k
              NLBLOCK->nllen == 2 &&
3181
1.45k
              UCHAR21(Feptr) == NLBLOCK->nl[0])
3182
0
            {
3183
0
            mb->hitend = TRUE;
3184
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3185
0
            }
3186
1.45k
          Feptr++;
3187
1.45k
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3188
1.45k
          }
3189
1.45k
        break;
3190
3191
3.94M
        case OP_ALLANY:
3192
7.87M
        for (i = 1; i <= Lmin; i++)
3193
3.94M
          {
3194
3.94M
          if (Feptr >= mb->end_subject)
3195
21.7k
            {
3196
21.7k
            SCHECK_PARTIAL();
3197
21.7k
            RRETURN(MATCH_NOMATCH);
3198
21.7k
            }
3199
3.92M
          Feptr++;
3200
3.92M
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3201
3.92M
          }
3202
3.92M
        break;
3203
3204
3.92M
        case OP_ANYBYTE:
3205
19.0k
        if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
3206
19.0k
        Feptr += Lmin;
3207
19.0k
        break;
3208
3209
0
        case OP_ANYNL:
3210
0
        for (i = 1; i <= Lmin; i++)
3211
0
          {
3212
0
          if (Feptr >= mb->end_subject)
3213
0
            {
3214
0
            SCHECK_PARTIAL();
3215
0
            RRETURN(MATCH_NOMATCH);
3216
0
            }
3217
0
          GETCHARINC(fc, Feptr);
3218
0
          switch(fc)
3219
0
            {
3220
0
            default: RRETURN(MATCH_NOMATCH);
3221
3222
0
            case CHAR_CR:
3223
0
            if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3224
0
            break;
3225
3226
0
            case CHAR_LF:
3227
0
            break;
3228
3229
0
            case CHAR_VT:
3230
0
            case CHAR_FF:
3231
0
            case CHAR_NEL:
3232
0
#ifndef EBCDIC
3233
0
            case 0x2028:
3234
0
            case 0x2029:
3235
0
#endif  /* Not EBCDIC */
3236
0
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3237
0
            break;
3238
0
            }
3239
0
          }
3240
0
        break;
3241
3242
9.56k
        case OP_NOT_HSPACE:
3243
17.7k
        for (i = 1; i <= Lmin; i++)
3244
9.56k
          {
3245
9.56k
          if (Feptr >= mb->end_subject)
3246
4
            {
3247
4
            SCHECK_PARTIAL();
3248
4
            RRETURN(MATCH_NOMATCH);
3249
4
            }
3250
9.56k
          GETCHARINC(fc, Feptr);
3251
9.56k
          switch(fc)
3252
9.56k
            {
3253
23.8k
            HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3254
8.22k
            default: break;
3255
9.56k
            }
3256
9.56k
          }
3257
8.22k
        break;
3258
3259
8.22k
        case OP_HSPACE:
3260
0
        for (i = 1; i <= Lmin; i++)
3261
0
          {
3262
0
          if (Feptr >= mb->end_subject)
3263
0
            {
3264
0
            SCHECK_PARTIAL();
3265
0
            RRETURN(MATCH_NOMATCH);
3266
0
            }
3267
0
          GETCHARINC(fc, Feptr);
3268
0
          switch(fc)
3269
0
            {
3270
0
            HSPACE_CASES: break;
3271
0
            default: RRETURN(MATCH_NOMATCH);
3272
0
            }
3273
0
          }
3274
0
        break;
3275
3276
5.74k
        case OP_NOT_VSPACE:
3277
11.1k
        for (i = 1; i <= Lmin; i++)
3278
5.74k
          {
3279
5.74k
          if (Feptr >= mb->end_subject)
3280
0
            {
3281
0
            SCHECK_PARTIAL();
3282
0
            RRETURN(MATCH_NOMATCH);
3283
0
            }
3284
5.74k
          GETCHARINC(fc, Feptr);
3285
5.74k
          switch(fc)
3286
5.74k
            {
3287
2.18k
            VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3288
5.39k
            default: break;
3289
5.74k
            }
3290
5.74k
          }
3291
5.39k
        break;
3292
3293
5.39k
        case OP_VSPACE:
3294
1.88k
        for (i = 1; i <= Lmin; i++)
3295
1.69k
          {
3296
1.69k
          if (Feptr >= mb->end_subject)
3297
0
            {
3298
0
            SCHECK_PARTIAL();
3299
0
            RRETURN(MATCH_NOMATCH);
3300
0
            }
3301
1.69k
          GETCHARINC(fc, Feptr);
3302
1.69k
          switch(fc)
3303
1.69k
            {
3304
183
            VSPACE_CASES: break;
3305
1.51k
            default: RRETURN(MATCH_NOMATCH);
3306
1.69k
            }
3307
1.69k
          }
3308
183
        break;
3309
3310
183
        case OP_NOT_DIGIT:
3311
0
        for (i = 1; i <= Lmin; i++)
3312
0
          {
3313
0
          if (Feptr >= mb->end_subject)
3314
0
            {
3315
0
            SCHECK_PARTIAL();
3316
0
            RRETURN(MATCH_NOMATCH);
3317
0
            }
3318
0
          GETCHARINC(fc, Feptr);
3319
0
          if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
3320
0
            RRETURN(MATCH_NOMATCH);
3321
0
          }
3322
0
        break;
3323
3324
0
        case OP_DIGIT:
3325
0
        for (i = 1; i <= Lmin; i++)
3326
0
          {
3327
0
          uint32_t cc;
3328
0
          if (Feptr >= mb->end_subject)
3329
0
            {
3330
0
            SCHECK_PARTIAL();
3331
0
            RRETURN(MATCH_NOMATCH);
3332
0
            }
3333
0
          cc = UCHAR21(Feptr);
3334
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
3335
0
            RRETURN(MATCH_NOMATCH);
3336
0
          Feptr++;
3337
          /* No need to skip more code units - we know it has only one. */
3338
0
          }
3339
0
        break;
3340
3341
0
        case OP_NOT_WHITESPACE:
3342
0
        for (i = 1; i <= Lmin; i++)
3343
0
          {
3344
0
          uint32_t cc;
3345
0
          if (Feptr >= mb->end_subject)
3346
0
            {
3347
0
            SCHECK_PARTIAL();
3348
0
            RRETURN(MATCH_NOMATCH);
3349
0
            }
3350
0
          cc = UCHAR21(Feptr);
3351
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
3352
0
            RRETURN(MATCH_NOMATCH);
3353
0
          Feptr++;
3354
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3355
0
          }
3356
0
        break;
3357
3358
0
        case OP_WHITESPACE:
3359
0
        for (i = 1; i <= Lmin; i++)
3360
0
          {
3361
0
          uint32_t cc;
3362
0
          if (Feptr >= mb->end_subject)
3363
0
            {
3364
0
            SCHECK_PARTIAL();
3365
0
            RRETURN(MATCH_NOMATCH);
3366
0
            }
3367
0
          cc = UCHAR21(Feptr);
3368
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
3369
0
            RRETURN(MATCH_NOMATCH);
3370
0
          Feptr++;
3371
          /* No need to skip more code units - we know it has only one. */
3372
0
          }
3373
0
        break;
3374
3375
0
        case OP_NOT_WORDCHAR:
3376
0
        for (i = 1; i <= Lmin; i++)
3377
0
          {
3378
0
          uint32_t cc;
3379
0
          if (Feptr >= mb->end_subject)
3380
0
            {
3381
0
            SCHECK_PARTIAL();
3382
0
            RRETURN(MATCH_NOMATCH);
3383
0
            }
3384
0
          cc = UCHAR21(Feptr);
3385
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
3386
0
            RRETURN(MATCH_NOMATCH);
3387
0
          Feptr++;
3388
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3389
0
          }
3390
0
        break;
3391
3392
0
        case OP_WORDCHAR:
3393
0
        for (i = 1; i <= Lmin; i++)
3394
0
          {
3395
0
          uint32_t cc;
3396
0
          if (Feptr >= mb->end_subject)
3397
0
            {
3398
0
            SCHECK_PARTIAL();
3399
0
            RRETURN(MATCH_NOMATCH);
3400
0
            }
3401
0
          cc = UCHAR21(Feptr);
3402
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
3403
0
            RRETURN(MATCH_NOMATCH);
3404
0
          Feptr++;
3405
          /* No need to skip more code units - we know it has only one. */
3406
0
          }
3407
0
        break;
3408
3409
0
        default:
3410
0
        PCRE2_DEBUG_UNREACHABLE();
3411
0
        return PCRE2_ERROR_INTERNAL;
3412
3.98M
        }  /* End switch(Lctype) */
3413
3414
7.01M
      else
3415
7.01M
#endif     /* SUPPORT_UNICODE */
3416
3417
      /* Code for the non-UTF case for minimum matching of operators other
3418
      than OP_PROP and OP_NOTPROP. */
3419
3420
7.01M
      switch(Lctype)
3421
7.01M
        {
3422
109k
        case OP_ANY:
3423
216k
        for (i = 1; i <= Lmin; i++)
3424
109k
          {
3425
109k
          if (Feptr >= mb->end_subject)
3426
7
            {
3427
7
            SCHECK_PARTIAL();
3428
7
            RRETURN(MATCH_NOMATCH);
3429
7
            }
3430
109k
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3431
106k
          if (mb->partial != 0 &&
3432
106k
              Feptr + 1 >= mb->end_subject &&
3433
106k
              NLBLOCK->nltype == NLTYPE_FIXED &&
3434
106k
              NLBLOCK->nllen == 2 &&
3435
106k
              *Feptr == NLBLOCK->nl[0])
3436
0
            {
3437
0
            mb->hitend = TRUE;
3438
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3439
0
            }
3440
106k
          Feptr++;
3441
106k
          }
3442
106k
        break;
3443
3444
106k
        case OP_ALLANY:
3445
19.9k
        if (Feptr > mb->end_subject - Lmin)
3446
336
          {
3447
336
          SCHECK_PARTIAL();
3448
336
          RRETURN(MATCH_NOMATCH);
3449
336
          }
3450
19.5k
        Feptr += Lmin;
3451
19.5k
        break;
3452
3453
        /* This OP_ANYBYTE case will never be reached because \C gets turned
3454
        into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
3455
        reports don't complain about it's never being used. */
3456
3457
/*        case OP_ANYBYTE:
3458
*        if (Feptr > mb->end_subject - Lmin)
3459
*          {
3460
*          SCHECK_PARTIAL();
3461
*          RRETURN(MATCH_NOMATCH);
3462
*          }
3463
*        Feptr += Lmin;
3464
*        break;
3465
*/
3466
4.57M
        case OP_ANYNL:
3467
4.63M
        for (i = 1; i <= Lmin; i++)
3468
4.57M
          {
3469
4.57M
          if (Feptr >= mb->end_subject)
3470
6.23k
            {
3471
6.23k
            SCHECK_PARTIAL();
3472
6.23k
            RRETURN(MATCH_NOMATCH);
3473
6.23k
            }
3474
4.56M
          switch(*Feptr++)
3475
4.56M
            {
3476
4.51M
            default: RRETURN(MATCH_NOMATCH);
3477
3478
18.5k
            case CHAR_CR:
3479
18.5k
            if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3480
18.5k
            break;
3481
3482
26.8k
            case CHAR_LF:
3483
26.8k
            break;
3484
3485
5.55k
            case CHAR_VT:
3486
13.5k
            case CHAR_FF:
3487
14.3k
            case CHAR_NEL:
3488
#if PCRE2_CODE_UNIT_WIDTH != 8
3489
            case 0x2028:
3490
            case 0x2029:
3491
#endif
3492
14.3k
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3493
14.3k
            break;
3494
4.56M
            }
3495
4.56M
          }
3496
59.8k
        break;
3497
3498
59.8k
        case OP_NOT_HSPACE:
3499
47.8k
        for (i = 1; i <= Lmin; i++)
3500
24.3k
          {
3501
24.3k
          if (Feptr >= mb->end_subject)
3502
0
            {
3503
0
            SCHECK_PARTIAL();
3504
0
            RRETURN(MATCH_NOMATCH);
3505
0
            }
3506
24.3k
          switch(*Feptr++)
3507
24.3k
            {
3508
23.5k
            default: break;
3509
23.5k
            HSPACE_BYTE_CASES:
3510
#if PCRE2_CODE_UNIT_WIDTH != 8
3511
            HSPACE_MULTIBYTE_CASES:
3512
#endif
3513
1.76k
            RRETURN(MATCH_NOMATCH);
3514
24.3k
            }
3515
24.3k
          }
3516
23.5k
        break;
3517
3518
23.5k
        case OP_HSPACE:
3519
4.44k
        for (i = 1; i <= Lmin; i++)
3520
3.14k
          {
3521
3.14k
          if (Feptr >= mb->end_subject)
3522
0
            {
3523
0
            SCHECK_PARTIAL();
3524
0
            RRETURN(MATCH_NOMATCH);
3525
0
            }
3526
3.14k
          switch(*Feptr++)
3527
3.14k
            {
3528
1.84k
            default: RRETURN(MATCH_NOMATCH);
3529
3.77k
            HSPACE_BYTE_CASES:
3530
#if PCRE2_CODE_UNIT_WIDTH != 8
3531
            HSPACE_MULTIBYTE_CASES:
3532
#endif
3533
3.77k
            break;
3534
3.14k
            }
3535
3.14k
          }
3536
1.29k
        break;
3537
3538
1.24M
        case OP_NOT_VSPACE:
3539
2.45M
        for (i = 1; i <= Lmin; i++)
3540
1.24M
          {
3541
1.24M
          if (Feptr >= mb->end_subject)
3542
4.74k
            {
3543
4.74k
            SCHECK_PARTIAL();
3544
4.74k
            RRETURN(MATCH_NOMATCH);
3545
4.74k
            }
3546
1.24M
          switch(*Feptr++)
3547
1.24M
            {
3548
107k
            VSPACE_BYTE_CASES:
3549
#if PCRE2_CODE_UNIT_WIDTH != 8
3550
            VSPACE_MULTIBYTE_CASES:
3551
#endif
3552
107k
            RRETURN(MATCH_NOMATCH);
3553
1.21M
            default: break;
3554
1.24M
            }
3555
1.24M
          }
3556
1.21M
        break;
3557
3558
1.21M
        case OP_VSPACE:
3559
372k
        for (i = 1; i <= Lmin; i++)
3560
366k
          {
3561
366k
          if (Feptr >= mb->end_subject)
3562
28
            {
3563
28
            SCHECK_PARTIAL();
3564
28
            RRETURN(MATCH_NOMATCH);
3565
28
            }
3566
366k
          switch(*Feptr++)
3567
366k
            {
3568
360k
            default: RRETURN(MATCH_NOMATCH);
3569
22.8k
            VSPACE_BYTE_CASES:
3570
#if PCRE2_CODE_UNIT_WIDTH != 8
3571
            VSPACE_MULTIBYTE_CASES:
3572
#endif
3573
22.8k
            break;
3574
366k
            }
3575
366k
          }
3576
6.27k
        break;
3577
3578
6.27k
        case OP_NOT_DIGIT:
3579
12
        for (i = 1; i <= Lmin; i++)
3580
6
          {
3581
6
          if (Feptr >= mb->end_subject)
3582
0
            {
3583
0
            SCHECK_PARTIAL();
3584
0
            RRETURN(MATCH_NOMATCH);
3585
0
            }
3586
6
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
3587
0
            RRETURN(MATCH_NOMATCH);
3588
6
          Feptr++;
3589
6
          }
3590
6
        break;
3591
3592
222k
        case OP_DIGIT:
3593
229k
        for (i = 1; i <= Lmin; i++)
3594
222k
          {
3595
222k
          if (Feptr >= mb->end_subject)
3596
2.06k
            {
3597
2.06k
            SCHECK_PARTIAL();
3598
2.06k
            RRETURN(MATCH_NOMATCH);
3599
2.06k
            }
3600
220k
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
3601
213k
            RRETURN(MATCH_NOMATCH);
3602
7.04k
          Feptr++;
3603
7.04k
          }
3604
7.04k
        break;
3605
3606
406k
        case OP_NOT_WHITESPACE:
3607
802k
        for (i = 1; i <= Lmin; i++)
3608
406k
          {
3609
406k
          if (Feptr >= mb->end_subject)
3610
3.20k
            {
3611
3.20k
            SCHECK_PARTIAL();
3612
3.20k
            RRETURN(MATCH_NOMATCH);
3613
3.20k
            }
3614
403k
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
3615
7.63k
            RRETURN(MATCH_NOMATCH);
3616
395k
          Feptr++;
3617
395k
          }
3618
395k
        break;
3619
3620
395k
        case OP_WHITESPACE:
3621
204
        for (i = 1; i <= Lmin; i++)
3622
180
          {
3623
180
          if (Feptr >= mb->end_subject)
3624
0
            {
3625
0
            SCHECK_PARTIAL();
3626
0
            RRETURN(MATCH_NOMATCH);
3627
0
            }
3628
180
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
3629
156
            RRETURN(MATCH_NOMATCH);
3630
24
          Feptr++;
3631
24
          }
3632
24
        break;
3633
3634
27.5k
        case OP_NOT_WORDCHAR:
3635
50.7k
        for (i = 1; i <= Lmin; i++)
3636
27.5k
          {
3637
27.5k
          if (Feptr >= mb->end_subject)
3638
237
            {
3639
237
            SCHECK_PARTIAL();
3640
237
            RRETURN(MATCH_NOMATCH);
3641
237
            }
3642
27.2k
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
3643
4.06k
            RRETURN(MATCH_NOMATCH);
3644
23.2k
          Feptr++;
3645
23.2k
          }
3646
23.2k
        break;
3647
3648
23.2k
        case OP_WORDCHAR:
3649
18.4k
        for (i = 1; i <= Lmin; i++)
3650
12.7k
          {
3651
12.7k
          if (Feptr >= mb->end_subject)
3652
38
            {
3653
38
            SCHECK_PARTIAL();
3654
38
            RRETURN(MATCH_NOMATCH);
3655
38
            }
3656
12.7k
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
3657
7.06k
            RRETURN(MATCH_NOMATCH);
3658
5.67k
          Feptr++;
3659
5.67k
          }
3660
5.67k
        break;
3661
3662
5.67k
        default:
3663
0
        PCRE2_DEBUG_UNREACHABLE();
3664
0
        return PCRE2_ERROR_INTERNAL;
3665
7.01M
        }
3666
13.4M
      }
3667
3668
    /* If Lmin = Lmax we are done. Continue with the main loop. */
3669
3670
67.8M
    if (Lmin == Lmax) continue;
3671
3672
    /* If minimizing, we have to test the rest of the pattern before each
3673
    subsequent match. This means we cannot use a local "notmatch" variable as
3674
    in the other cases. As all 4 temporary 32-bit values in the frame are
3675
    already in use, just test the type each time. */
3676
3677
67.8M
    if (reptype == REPTYPE_MIN)
3678
356k
      {
3679
356k
#ifdef SUPPORT_UNICODE
3680
356k
      if (proptype >= 0)
3681
23.6k
        {
3682
23.6k
        switch(proptype)
3683
23.6k
          {
3684
0
          case PT_LAMP:
3685
0
          for (;;)
3686
0
            {
3687
0
            int chartype;
3688
0
            RMATCH(Fecode, RM208);
3689
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3690
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3691
0
            if (Feptr >= mb->end_subject)
3692
0
              {
3693
0
              SCHECK_PARTIAL();
3694
0
              RRETURN(MATCH_NOMATCH);
3695
0
              }
3696
0
            GETCHARINCTEST(fc, Feptr);
3697
0
            chartype = UCD_CHARTYPE(fc);
3698
0
            if ((chartype == ucp_Lu ||
3699
0
                 chartype == ucp_Ll ||
3700
0
                 chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3701
0
              RRETURN(MATCH_NOMATCH);
3702
0
            }
3703
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3704
3705
516
          case PT_GC:
3706
516
          for (;;)
3707
20.4k
            {
3708
20.4k
            RMATCH(Fecode, RM209);
3709
20.4k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3710
20.4k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3711
20.4k
            if (Feptr >= mb->end_subject)
3712
12
              {
3713
12
              SCHECK_PARTIAL();
3714
12
              RRETURN(MATCH_NOMATCH);
3715
12
              }
3716
20.3k
            GETCHARINCTEST(fc, Feptr);
3717
20.3k
            if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3718
504
              RRETURN(MATCH_NOMATCH);
3719
20.3k
            }
3720
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3721
3722
0
          case PT_PC:
3723
0
          for (;;)
3724
0
            {
3725
0
            RMATCH(Fecode, RM210);
3726
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3727
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3728
0
            if (Feptr >= mb->end_subject)
3729
0
              {
3730
0
              SCHECK_PARTIAL();
3731
0
              RRETURN(MATCH_NOMATCH);
3732
0
              }
3733
0
            GETCHARINCTEST(fc, Feptr);
3734
0
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3735
0
              RRETURN(MATCH_NOMATCH);
3736
0
            }
3737
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3738
3739
0
          case PT_SC:
3740
0
          for (;;)
3741
0
            {
3742
0
            RMATCH(Fecode, RM211);
3743
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3744
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3745
0
            if (Feptr >= mb->end_subject)
3746
0
              {
3747
0
              SCHECK_PARTIAL();
3748
0
              RRETURN(MATCH_NOMATCH);
3749
0
              }
3750
0
            GETCHARINCTEST(fc, Feptr);
3751
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3752
0
              RRETURN(MATCH_NOMATCH);
3753
0
            }
3754
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3755
3756
0
          case PT_SCX:
3757
0
          for (;;)
3758
0
            {
3759
0
            BOOL ok;
3760
0
            const ucd_record *prop;
3761
0
            RMATCH(Fecode, RM224);
3762
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3763
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3764
0
            if (Feptr >= mb->end_subject)
3765
0
              {
3766
0
              SCHECK_PARTIAL();
3767
0
              RRETURN(MATCH_NOMATCH);
3768
0
              }
3769
0
            GETCHARINCTEST(fc, Feptr);
3770
0
            prop = GET_UCD(fc);
3771
0
            ok = (prop->script == Lpropvalue
3772
0
                  || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
3773
0
            if (ok == (Lctype == OP_NOTPROP))
3774
0
              RRETURN(MATCH_NOMATCH);
3775
0
            }
3776
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3777
3778
0
          case PT_ALNUM:
3779
0
          for (;;)
3780
0
            {
3781
0
            int category;
3782
0
            RMATCH(Fecode, RM212);
3783
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3784
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3785
0
            if (Feptr >= mb->end_subject)
3786
0
              {
3787
0
              SCHECK_PARTIAL();
3788
0
              RRETURN(MATCH_NOMATCH);
3789
0
              }
3790
0
            GETCHARINCTEST(fc, Feptr);
3791
0
            category = UCD_CATEGORY(fc);
3792
0
            if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
3793
0
              RRETURN(MATCH_NOMATCH);
3794
0
            }
3795
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3796
3797
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3798
          which means that Perl space and POSIX space are now identical. PCRE
3799
          was changed at release 8.34. */
3800
3801
22.1k
          case PT_SPACE:    /* Perl space */
3802
22.1k
          case PT_PXSPACE:  /* POSIX space */
3803
22.1k
          for (;;)
3804
2.99M
            {
3805
2.99M
            RMATCH(Fecode, RM213);
3806
2.99M
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3807
2.99M
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3808
2.98M
            if (Feptr >= mb->end_subject)
3809
180
              {
3810
180
              SCHECK_PARTIAL();
3811
180
              RRETURN(MATCH_NOMATCH);
3812
180
              }
3813
2.98M
            GETCHARINCTEST(fc, Feptr);
3814
2.98M
            switch(fc)
3815
2.98M
              {
3816
102k
              HSPACE_CASES:
3817
102k
              VSPACE_CASES:
3818
78.0k
              if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3819
0
              break;
3820
3821
2.96M
              default:
3822
2.96M
              if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3823
0
                RRETURN(MATCH_NOMATCH);
3824
2.96M
              break;
3825
2.98M
              }
3826
2.98M
            }
3827
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3828
3829
0
          case PT_WORD:
3830
0
          for (;;)
3831
0
            {
3832
0
            int chartype, category;
3833
0
            RMATCH(Fecode, RM214);
3834
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3835
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3836
0
            if (Feptr >= mb->end_subject)
3837
0
              {
3838
0
              SCHECK_PARTIAL();
3839
0
              RRETURN(MATCH_NOMATCH);
3840
0
              }
3841
0
            GETCHARINCTEST(fc, Feptr);
3842
0
            chartype = UCD_CHARTYPE(fc);
3843
0
            category = PRIV(ucp_gentype)[chartype];
3844
0
            if ((category == ucp_L ||
3845
0
                 category == ucp_N ||
3846
0
                 chartype == ucp_Mn ||
3847
0
                 chartype == ucp_Pc) == (Lctype == OP_NOTPROP))
3848
0
              RRETURN(MATCH_NOMATCH);
3849
0
            }
3850
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3851
3852
954
          case PT_CLIST:
3853
954
          for (;;)
3854
954
            {
3855
954
            const uint32_t *cp;
3856
954
            RMATCH(Fecode, RM215);
3857
954
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3858
954
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3859
954
            if (Feptr >= mb->end_subject)
3860
0
              {
3861
0
              SCHECK_PARTIAL();
3862
0
              RRETURN(MATCH_NOMATCH);
3863
0
              }
3864
954
            GETCHARINCTEST(fc, Feptr);
3865
#if PCRE2_CODE_UNIT_WIDTH == 32
3866
            if (fc > MAX_UTF_CODE_POINT)
3867
              {
3868
              if (Lctype == OP_NOTPROP) continue;
3869
              RRETURN(MATCH_NOMATCH);
3870
              }
3871
#endif
3872
954
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3873
954
            for (;;)
3874
1.07k
              {
3875
1.07k
              if (fc < *cp)
3876
954
                {
3877
954
                if (Lctype == OP_NOTPROP) break;
3878
954
                RRETURN(MATCH_NOMATCH);
3879
954
                }
3880
124
              if (fc == *cp++)
3881
0
                {
3882
0
                if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3883
0
                break;
3884
0
                }
3885
124
              }
3886
954
            }
3887
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3888
3889
0
          case PT_UCNC:
3890
0
          for (;;)
3891
0
            {
3892
0
            RMATCH(Fecode, RM216);
3893
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3894
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3895
0
            if (Feptr >= mb->end_subject)
3896
0
              {
3897
0
              SCHECK_PARTIAL();
3898
0
              RRETURN(MATCH_NOMATCH);
3899
0
              }
3900
0
            GETCHARINCTEST(fc, Feptr);
3901
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3902
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3903
0
                 fc >= 0xe000) == (Lctype == OP_NOTPROP))
3904
0
              RRETURN(MATCH_NOMATCH);
3905
0
            }
3906
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3907
3908
0
          case PT_BIDICL:
3909
0
          for (;;)
3910
0
            {
3911
0
            RMATCH(Fecode, RM223);
3912
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3913
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3914
0
            if (Feptr >= mb->end_subject)
3915
0
              {
3916
0
              SCHECK_PARTIAL();
3917
0
              RRETURN(MATCH_NOMATCH);
3918
0
              }
3919
0
            GETCHARINCTEST(fc, Feptr);
3920
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3921
0
              RRETURN(MATCH_NOMATCH);
3922
0
            }
3923
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3924
3925
0
          case PT_BOOL:
3926
0
          for (;;)
3927
0
            {
3928
0
            BOOL ok;
3929
0
            const ucd_record *prop;
3930
0
            RMATCH(Fecode, RM222);
3931
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3932
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3933
0
            if (Feptr >= mb->end_subject)
3934
0
              {
3935
0
              SCHECK_PARTIAL();
3936
0
              RRETURN(MATCH_NOMATCH);
3937
0
              }
3938
0
            GETCHARINCTEST(fc, Feptr);
3939
0
            prop = GET_UCD(fc);
3940
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
3941
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
3942
0
            if (ok == (Lctype == OP_NOTPROP))
3943
0
              RRETURN(MATCH_NOMATCH);
3944
0
            }
3945
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3946
3947
          /* This should never occur */
3948
0
          default:
3949
0
          PCRE2_DEBUG_UNREACHABLE();
3950
0
          return PCRE2_ERROR_INTERNAL;
3951
23.6k
          }
3952
23.6k
        }
3953
3954
      /* Match extended Unicode sequences. We will get here only if the
3955
      support is in the binary; otherwise a compile-time error occurs. */
3956
3957
333k
      else if (Lctype == OP_EXTUNI)
3958
17.0k
        {
3959
17.0k
        for (;;)
3960
6.11M
          {
3961
6.11M
          RMATCH(Fecode, RM217);
3962
6.11M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3963
6.11M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3964
6.11M
          if (Feptr >= mb->end_subject)
3965
17.0k
            {
3966
17.0k
            SCHECK_PARTIAL();
3967
17.0k
            RRETURN(MATCH_NOMATCH);
3968
17.0k
            }
3969
6.10M
          else
3970
6.10M
            {
3971
6.10M
            GETCHARINCTEST(fc, Feptr);
3972
6.10M
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
3973
6.10M
              utf, NULL);
3974
6.10M
            }
3975
6.10M
          CHECK_PARTIAL();
3976
6.10M
          }
3977
17.0k
        }
3978
316k
      else
3979
316k
#endif     /* SUPPORT_UNICODE */
3980
3981
      /* UTF mode for non-property testing character types. */
3982
3983
316k
#ifdef SUPPORT_UNICODE
3984
316k
      if (utf)
3985
46.5k
        {
3986
46.5k
        for (;;)
3987
5.44M
          {
3988
5.44M
          RMATCH(Fecode, RM218);
3989
5.44M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3990
5.44M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3991
5.43M
          if (Feptr >= mb->end_subject)
3992
33.4k
            {
3993
33.4k
            SCHECK_PARTIAL();
3994
33.4k
            RRETURN(MATCH_NOMATCH);
3995
33.4k
            }
3996
5.40M
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3997
5.40M
          GETCHARINC(fc, Feptr);
3998
5.40M
          switch(Lctype)
3999
5.40M
            {
4000
87.4k
            case OP_ANY:               /* This is the non-NL case */
4001
87.4k
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4002
87.4k
                Feptr >= mb->end_subject &&
4003
87.4k
                NLBLOCK->nltype == NLTYPE_FIXED &&
4004
87.4k
                NLBLOCK->nllen == 2 &&
4005
87.4k
                fc == NLBLOCK->nl[0])
4006
0
              {
4007
0
              mb->hitend = TRUE;
4008
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4009
0
              }
4010
87.4k
            break;
4011
4012
2.07M
            case OP_ALLANY:
4013
5.20M
            case OP_ANYBYTE:
4014
5.20M
            break;
4015
4016
0
            case OP_ANYNL:
4017
0
            switch(fc)
4018
0
              {
4019
0
              default: RRETURN(MATCH_NOMATCH);
4020
4021
0
              case CHAR_CR:
4022
0
              if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
4023
0
              break;
4024
4025
0
              case CHAR_LF:
4026
0
              break;
4027
4028
0
              case CHAR_VT:
4029
0
              case CHAR_FF:
4030
0
              case CHAR_NEL:
4031
0
#ifndef EBCDIC
4032
0
              case 0x2028:
4033
0
              case 0x2029:
4034
0
#endif  /* Not EBCDIC */
4035
0
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
4036
0
                RRETURN(MATCH_NOMATCH);
4037
0
              break;
4038
0
              }
4039
0
            break;
4040
4041
113k
            case OP_NOT_HSPACE:
4042
113k
            switch(fc)
4043
113k
              {
4044
119k
              HSPACE_CASES: RRETURN(MATCH_NOMATCH);
4045
107k
              default: break;
4046
113k
              }
4047
107k
            break;
4048
4049
107k
            case OP_HSPACE:
4050
0
            switch(fc)
4051
0
              {
4052
0
              HSPACE_CASES: break;
4053
0
              default: RRETURN(MATCH_NOMATCH);
4054
0
              }
4055
0
            break;
4056
4057
0
            case OP_NOT_VSPACE:
4058
0
            switch(fc)
4059
0
              {
4060
0
              VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4061
0
              default: break;
4062
0
              }
4063
0
            break;
4064
4065
24
            case OP_VSPACE:
4066
24
            switch(fc)
4067
24
              {
4068
6
              VSPACE_CASES: break;
4069
18
              default: RRETURN(MATCH_NOMATCH);
4070
24
              }
4071
6
            break;
4072
4073
6
            case OP_NOT_DIGIT:
4074
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
4075
0
              RRETURN(MATCH_NOMATCH);
4076
0
            break;
4077
4078
0
            case OP_DIGIT:
4079
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
4080
0
              RRETURN(MATCH_NOMATCH);
4081
0
            break;
4082
4083
0
            case OP_NOT_WHITESPACE:
4084
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
4085
0
              RRETURN(MATCH_NOMATCH);
4086
0
            break;
4087
4088
0
            case OP_WHITESPACE:
4089
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
4090
0
              RRETURN(MATCH_NOMATCH);
4091
0
            break;
4092
4093
0
            case OP_NOT_WORDCHAR:
4094
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
4095
0
              RRETURN(MATCH_NOMATCH);
4096
0
            break;
4097
4098
0
            case OP_WORDCHAR:
4099
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
4100
0
              RRETURN(MATCH_NOMATCH);
4101
0
            break;
4102
4103
0
            default:
4104
0
            PCRE2_DEBUG_UNREACHABLE();
4105
0
            return PCRE2_ERROR_INTERNAL;
4106
5.40M
            }
4107
5.40M
          }
4108
46.5k
        }
4109
269k
      else
4110
269k
#endif  /* SUPPORT_UNICODE */
4111
4112
      /* Not UTF mode */
4113
269k
        {
4114
269k
        for (;;)
4115
11.3M
          {
4116
11.3M
          RMATCH(Fecode, RM33);
4117
11.3M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4118
11.3M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
4119
11.3M
          if (Feptr >= mb->end_subject)
4120
117k
            {
4121
117k
            SCHECK_PARTIAL();
4122
117k
            RRETURN(MATCH_NOMATCH);
4123
117k
            }
4124
11.1M
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
4125
650
            RRETURN(MATCH_NOMATCH);
4126
11.1M
          fc = *Feptr++;
4127
11.1M
          switch(Lctype)
4128
11.1M
            {
4129
224k
            case OP_ANY:               /* This is the non-NL case */
4130
224k
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4131
224k
                Feptr >= mb->end_subject &&
4132
224k
                NLBLOCK->nltype == NLTYPE_FIXED &&
4133
224k
                NLBLOCK->nllen == 2 &&
4134
224k
                fc == NLBLOCK->nl[0])
4135
0
              {
4136
0
              mb->hitend = TRUE;
4137
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4138
0
              }
4139
224k
            break;
4140
4141
3.48M
            case OP_ALLANY:
4142
3.48M
            case OP_ANYBYTE:
4143
3.48M
            break;
4144
4145
3.11k
            case OP_ANYNL:
4146
3.11k
            switch(fc)
4147
3.11k
              {
4148
2.90k
              default: RRETURN(MATCH_NOMATCH);
4149
4150
110
              case CHAR_CR:
4151
110
              if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
4152
110
              break;
4153
4154
49
              case CHAR_LF:
4155
49
              break;
4156
4157
32
              case CHAR_VT:
4158
38
              case CHAR_FF:
4159
53
              case CHAR_NEL:
4160
#if PCRE2_CODE_UNIT_WIDTH != 8
4161
              case 0x2028:
4162
              case 0x2029:
4163
#endif
4164
53
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
4165
0
                RRETURN(MATCH_NOMATCH);
4166
53
              break;
4167
3.11k
              }
4168
212
            break;
4169
4170
910k
            case OP_NOT_HSPACE:
4171
910k
            switch(fc)
4172
910k
              {
4173
898k
              default: break;
4174
898k
              HSPACE_BYTE_CASES:
4175
#if PCRE2_CODE_UNIT_WIDTH != 8
4176
              HSPACE_MULTIBYTE_CASES:
4177
#endif
4178
29.0k
              RRETURN(MATCH_NOMATCH);
4179
910k
              }
4180
898k
            break;
4181
4182
898k
            case OP_HSPACE:
4183
0
            switch(fc)
4184
0
              {
4185
0
              default: RRETURN(MATCH_NOMATCH);
4186
0
              HSPACE_BYTE_CASES:
4187
#if PCRE2_CODE_UNIT_WIDTH != 8
4188
              HSPACE_MULTIBYTE_CASES:
4189
#endif
4190
0
              break;
4191
0
              }
4192
0
            break;
4193
4194
1.20M
            case OP_NOT_VSPACE:
4195
1.20M
            switch(fc)
4196
1.20M
              {
4197
1.18M
              default: break;
4198
1.18M
              VSPACE_BYTE_CASES:
4199
#if PCRE2_CODE_UNIT_WIDTH != 8
4200
              VSPACE_MULTIBYTE_CASES:
4201
#endif
4202
108k
              RRETURN(MATCH_NOMATCH);
4203
1.20M
              }
4204
1.18M
            break;
4205
4206
1.18M
            case OP_VSPACE:
4207
0
            switch(fc)
4208
0
              {
4209
0
              default: RRETURN(MATCH_NOMATCH);
4210
0
              VSPACE_BYTE_CASES:
4211
#if PCRE2_CODE_UNIT_WIDTH != 8
4212
              VSPACE_MULTIBYTE_CASES:
4213
#endif
4214
0
              break;
4215
0
              }
4216
0
            break;
4217
4218
0
            case OP_NOT_DIGIT:
4219
0
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
4220
0
              RRETURN(MATCH_NOMATCH);
4221
0
            break;
4222
4223
116
            case OP_DIGIT:
4224
116
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
4225
60
              RRETURN(MATCH_NOMATCH);
4226
56
            break;
4227
4228
5.17M
            case OP_NOT_WHITESPACE:
4229
5.17M
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
4230
28.9k
              RRETURN(MATCH_NOMATCH);
4231
5.14M
            break;
4232
4233
5.14M
            case OP_WHITESPACE:
4234
0
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
4235
0
              RRETURN(MATCH_NOMATCH);
4236
0
            break;
4237
4238
163k
            case OP_NOT_WORDCHAR:
4239
163k
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
4240
16.1k
              RRETURN(MATCH_NOMATCH);
4241
147k
            break;
4242
4243
147k
            case OP_WORDCHAR:
4244
14.9k
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
4245
1.38k
              RRETURN(MATCH_NOMATCH);
4246
13.5k
            break;
4247
4248
13.5k
            default:
4249
0
            PCRE2_DEBUG_UNREACHABLE();
4250
0
            return PCRE2_ERROR_INTERNAL;
4251
11.1M
            }
4252
11.1M
          }
4253
269k
        }
4254
4255
0
      PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
4256
0
      }
4257
4258
    /* If maximizing, it is worth using inline code for speed, doing the type
4259
    test once at the start (i.e. keep it out of the loops). Once again,
4260
    "notmatch" can be an ordinary local variable because the loops do not call
4261
    RMATCH. */
4262
4263
67.5M
    else
4264
67.5M
      {
4265
67.5M
      Lstart_eptr = Feptr;  /* Remember where we started */
4266
4267
67.5M
#ifdef SUPPORT_UNICODE
4268
67.5M
      if (proptype >= 0)
4269
878k
        {
4270
878k
        BOOL notmatch = Lctype == OP_NOTPROP;
4271
878k
        switch(proptype)
4272
878k
          {
4273
0
          case PT_LAMP:
4274
0
          for (i = Lmin; i < Lmax; i++)
4275
0
            {
4276
0
            int chartype;
4277
0
            int len = 1;
4278
0
            if (Feptr >= mb->end_subject)
4279
0
              {
4280
0
              SCHECK_PARTIAL();
4281
0
              break;
4282
0
              }
4283
0
            GETCHARLENTEST(fc, Feptr, len);
4284
0
            chartype = UCD_CHARTYPE(fc);
4285
0
            if ((chartype == ucp_Lu ||
4286
0
                 chartype == ucp_Ll ||
4287
0
                 chartype == ucp_Lt) == notmatch)
4288
0
              break;
4289
0
            Feptr+= len;
4290
0
            }
4291
0
          break;
4292
4293
2.97k
          case PT_GC:
4294
64.7k
          for (i = Lmin; i < Lmax; i++)
4295
64.7k
            {
4296
64.7k
            int len = 1;
4297
64.7k
            if (Feptr >= mb->end_subject)
4298
246
              {
4299
246
              SCHECK_PARTIAL();
4300
246
              break;
4301
246
              }
4302
64.5k
            GETCHARLENTEST(fc, Feptr, len);
4303
64.5k
            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break;
4304
61.7k
            Feptr+= len;
4305
61.7k
            }
4306
2.97k
          break;
4307
4308
3.95k
          case PT_PC:
4309
21.0k
          for (i = Lmin; i < Lmax; i++)
4310
21.0k
            {
4311
21.0k
            int len = 1;
4312
21.0k
            if (Feptr >= mb->end_subject)
4313
0
              {
4314
0
              SCHECK_PARTIAL();
4315
0
              break;
4316
0
              }
4317
21.0k
            GETCHARLENTEST(fc, Feptr, len);
4318
21.0k
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break;
4319
17.0k
            Feptr+= len;
4320
17.0k
            }
4321
3.95k
          break;
4322
4323
3.95k
          case PT_SC:
4324
0
          for (i = Lmin; i < Lmax; i++)
4325
0
            {
4326
0
            int len = 1;
4327
0
            if (Feptr >= mb->end_subject)
4328
0
              {
4329
0
              SCHECK_PARTIAL();
4330
0
              break;
4331
0
              }
4332
0
            GETCHARLENTEST(fc, Feptr, len);
4333
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break;
4334
0
            Feptr+= len;
4335
0
            }
4336
0
          break;
4337
4338
0
          case PT_SCX:
4339
0
          for (i = Lmin; i < Lmax; i++)
4340
0
            {
4341
0
            BOOL ok;
4342
0
            const ucd_record *prop;
4343
0
            int len = 1;
4344
0
            if (Feptr >= mb->end_subject)
4345
0
              {
4346
0
              SCHECK_PARTIAL();
4347
0
              break;
4348
0
              }
4349
0
            GETCHARLENTEST(fc, Feptr, len);
4350
0
            prop = GET_UCD(fc);
4351
0
            ok = (prop->script == Lpropvalue ||
4352
0
                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
4353
0
            if (ok == notmatch) break;
4354
0
            Feptr+= len;
4355
0
            }
4356
0
          break;
4357
4358
0
          case PT_ALNUM:
4359
0
          for (i = Lmin; i < Lmax; i++)
4360
0
            {
4361
0
            int category;
4362
0
            int len = 1;
4363
0
            if (Feptr >= mb->end_subject)
4364
0
              {
4365
0
              SCHECK_PARTIAL();
4366
0
              break;
4367
0
              }
4368
0
            GETCHARLENTEST(fc, Feptr, len);
4369
0
            category = UCD_CATEGORY(fc);
4370
0
            if ((category == ucp_L || category == ucp_N) == notmatch)
4371
0
              break;
4372
0
            Feptr+= len;
4373
0
            }
4374
0
          break;
4375
4376
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4377
          which means that Perl space and POSIX space are now identical. PCRE
4378
          was changed at release 8.34. */
4379
4380
847k
          case PT_SPACE:    /* Perl space */
4381
847k
          case PT_PXSPACE:  /* POSIX space */
4382
6.06M
          for (i = Lmin; i < Lmax; i++)
4383
6.06M
            {
4384
6.06M
            int len = 1;
4385
6.06M
            if (Feptr >= mb->end_subject)
4386
21.7k
              {
4387
21.7k
              SCHECK_PARTIAL();
4388
21.7k
              break;
4389
21.7k
              }
4390
6.03M
            GETCHARLENTEST(fc, Feptr, len);
4391
6.03M
            switch(fc)
4392
6.03M
              {
4393
28.7M
              HSPACE_CASES:
4394
28.7M
              VSPACE_CASES:
4395
12.0M
              if (notmatch) goto ENDLOOP99;  /* Break the loop */
4396
1.49M
              break;
4397
4398
4.29M
              default:
4399
4.29M
              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
4400
579k
                goto ENDLOOP99;   /* Break the loop */
4401
3.71M
              break;
4402
6.03M
              }
4403
5.21M
            Feptr+= len;
4404
5.21M
            }
4405
847k
          ENDLOOP99:
4406
847k
          break;
4407
4408
23.8k
          case PT_WORD:
4409
109k
          for (i = Lmin; i < Lmax; i++)
4410
109k
            {
4411
109k
            int chartype, category;
4412
109k
            int len = 1;
4413
109k
            if (Feptr >= mb->end_subject)
4414
612
              {
4415
612
              SCHECK_PARTIAL();
4416
612
              break;
4417
612
              }
4418
109k
            GETCHARLENTEST(fc, Feptr, len);
4419
109k
            chartype = UCD_CHARTYPE(fc);
4420
109k
            category = PRIV(ucp_gentype)[chartype];
4421
109k
            if ((category == ucp_L ||
4422
109k
                 category == ucp_N ||
4423
109k
                 chartype == ucp_Mn ||
4424
109k
                 chartype == ucp_Pc) == notmatch)
4425
23.2k
              break;
4426
86.0k
            Feptr+= len;
4427
86.0k
            }
4428
23.8k
          break;
4429
4430
23.8k
          case PT_CLIST:
4431
6
          for (i = Lmin; i < Lmax; i++)
4432
6
            {
4433
6
            const uint32_t *cp;
4434
6
            int len = 1;
4435
6
            if (Feptr >= mb->end_subject)
4436
0
              {
4437
0
              SCHECK_PARTIAL();
4438
0
              break;
4439
0
              }
4440
6
            GETCHARLENTEST(fc, Feptr, len);
4441
#if PCRE2_CODE_UNIT_WIDTH == 32
4442
            if (fc > MAX_UTF_CODE_POINT)
4443
              {
4444
              if (!notmatch) goto GOT_MAX;
4445
              }
4446
            else
4447
#endif
4448
6
              {
4449
6
              cp = PRIV(ucd_caseless_sets) + Lpropvalue;
4450
6
              for (;;)
4451
10
                {
4452
10
                if (fc < *cp)
4453
6
                  { if (notmatch) break; else goto GOT_MAX; }
4454
4
                if (fc == *cp++)
4455
0
                  { if (notmatch) goto GOT_MAX; else break; }
4456
4
                }
4457
6
              }
4458
4459
0
            Feptr += len;
4460
0
            }
4461
6
          GOT_MAX:
4462
6
          break;
4463
4464
0
          case PT_UCNC:
4465
0
          for (i = Lmin; i < Lmax; i++)
4466
0
            {
4467
0
            int len = 1;
4468
0
            if (Feptr >= mb->end_subject)
4469
0
              {
4470
0
              SCHECK_PARTIAL();
4471
0
              break;
4472
0
              }
4473
0
            GETCHARLENTEST(fc, Feptr, len);
4474
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
4475
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
4476
0
                 fc >= 0xe000) == notmatch)
4477
0
              break;
4478
0
            Feptr += len;
4479
0
            }
4480
0
          break;
4481
4482
0
          case PT_BIDICL:
4483
0
          for (i = Lmin; i < Lmax; i++)
4484
0
            {
4485
0
            int len = 1;
4486
0
            if (Feptr >= mb->end_subject)
4487
0
              {
4488
0
              SCHECK_PARTIAL();
4489
0
              break;
4490
0
              }
4491
0
            GETCHARLENTEST(fc, Feptr, len);
4492
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break;
4493
0
            Feptr+= len;
4494
0
            }
4495
0
          break;
4496
4497
0
          case PT_BOOL:
4498
0
          for (i = Lmin; i < Lmax; i++)
4499
0
            {
4500
0
            BOOL ok;
4501
0
            const ucd_record *prop;
4502
0
            int len = 1;
4503
0
            if (Feptr >= mb->end_subject)
4504
0
              {
4505
0
              SCHECK_PARTIAL();
4506
0
              break;
4507
0
              }
4508
0
            GETCHARLENTEST(fc, Feptr, len);
4509
0
            prop = GET_UCD(fc);
4510
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
4511
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
4512
0
            if (ok == notmatch) break;
4513
0
            Feptr+= len;
4514
0
            }
4515
0
          break;
4516
4517
0
          default:
4518
0
          PCRE2_DEBUG_UNREACHABLE();
4519
0
          return PCRE2_ERROR_INTERNAL;
4520
878k
          }
4521
4522
        /* Feptr is now past the end of the maximum run */
4523
4524
878k
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4525
4526
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4527
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
4528
        go too far. */
4529
4530
850k
        for(;;)
4531
6.13M
          {
4532
6.13M
          if (Feptr <= Lstart_eptr) break;
4533
5.28M
          RMATCH(Fecode, RM221);
4534
5.28M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4535
5.28M
          Feptr--;
4536
5.28M
          if (utf) BACKCHAR(Feptr);
4537
5.28M
          }
4538
850k
        }
4539
4540
      /* Match extended Unicode grapheme clusters. We will get here only if the
4541
      support is in the binary; otherwise a compile-time error occurs. */
4542
4543
66.6M
      else if (Lctype == OP_EXTUNI)
4544
119k
        {
4545
18.0M
        for (i = Lmin; i < Lmax; i++)
4546
18.0M
          {
4547
18.0M
          if (Feptr >= mb->end_subject)
4548
119k
            {
4549
119k
            SCHECK_PARTIAL();
4550
119k
            break;
4551
119k
            }
4552
17.9M
          else
4553
17.9M
            {
4554
17.9M
            GETCHARINCTEST(fc, Feptr);
4555
17.9M
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4556
17.9M
              utf, NULL);
4557
17.9M
            }
4558
17.9M
          CHECK_PARTIAL();
4559
17.9M
          }
4560
4561
        /* Feptr is now past the end of the maximum run */
4562
4563
119k
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4564
4565
        /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
4566
        of the run while backtracking because the use of \C in UTF mode can
4567
        cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
4568
        the use of \C in UTF mode is fraught with danger. */
4569
4570
119k
        for(;;)
4571
18.0M
          {
4572
18.0M
          int lgb, rgb;
4573
18.0M
          PCRE2_SPTR fptr;
4574
4575
18.0M
          if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4576
17.9M
          RMATCH(Fecode, RM219);
4577
17.9M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4578
4579
          /* Backtracking over an extended grapheme cluster involves inspecting
4580
          the previous two characters (if present) to see if a break is
4581
          permitted between them. */
4582
4583
17.9M
          Feptr--;
4584
17.9M
          if (!utf) fc = *Feptr; else
4585
4.02M
            {
4586
4.02M
            BACKCHAR(Feptr);
4587
4.02M
            GETCHAR(fc, Feptr);
4588
4.02M
            }
4589
17.9M
          rgb = UCD_GRAPHBREAK(fc);
4590
4591
17.9M
          for (;;)
4592
17.9M
            {
4593
17.9M
            if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4594
17.8M
            fptr = Feptr - 1;
4595
17.8M
            if (!utf) fc = *fptr; else
4596
4.02M
              {
4597
4.02M
              BACKCHAR(fptr);
4598
4.02M
              GETCHAR(fc, fptr);
4599
4.02M
              }
4600
17.8M
            lgb = UCD_GRAPHBREAK(fc);
4601
17.8M
            if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
4602
10.2k
            Feptr = fptr;
4603
10.2k
            rgb = lgb;
4604
10.2k
            }
4605
17.9M
          }
4606
119k
        }
4607
4608
66.5M
      else
4609
66.5M
#endif   /* SUPPORT_UNICODE */
4610
4611
66.5M
#ifdef SUPPORT_UNICODE
4612
66.5M
      if (utf)
4613
56.0M
        {
4614
56.0M
        switch(Lctype)
4615
56.0M
          {
4616
1.23k
          case OP_ANY:
4617
2.47k
          for (i = Lmin; i < Lmax; i++)
4618
1.23k
            {
4619
1.23k
            if (Feptr >= mb->end_subject)
4620
0
              {
4621
0
              SCHECK_PARTIAL();
4622
0
              break;
4623
0
              }
4624
1.23k
            if (IS_NEWLINE(Feptr)) break;
4625
1.23k
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4626
1.23k
                Feptr + 1 >= mb->end_subject &&
4627
1.23k
                NLBLOCK->nltype == NLTYPE_FIXED &&
4628
1.23k
                NLBLOCK->nllen == 2 &&
4629
1.23k
                UCHAR21(Feptr) == NLBLOCK->nl[0])
4630
0
              {
4631
0
              mb->hitend = TRUE;
4632
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4633
0
              }
4634
1.23k
            Feptr++;
4635
1.23k
            ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4636
1.23k
            }
4637
1.23k
          break;
4638
4639
3.91M
          case OP_ALLANY:
4640
3.91M
          if (Lmax < UINT32_MAX)
4641
2.38k
            {
4642
4.71k
            for (i = Lmin; i < Lmax; i++)
4643
2.38k
              {
4644
2.38k
              if (Feptr >= mb->end_subject)
4645
58
                {
4646
58
                SCHECK_PARTIAL();
4647
58
                break;
4648
58
                }
4649
2.32k
              Feptr++;
4650
2.32k
              ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4651
2.32k
              }
4652
2.38k
            }
4653
3.91M
          else
4654
3.91M
            {
4655
3.91M
            Feptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
4656
3.91M
            SCHECK_PARTIAL();
4657
3.91M
            }
4658
3.91M
          break;
4659
4660
          /* The "byte" (i.e. "code unit") case is the same as non-UTF */
4661
4662
3.91M
          case OP_ANYBYTE:
4663
152
          fc = Lmax - Lmin;
4664
152
          if (fc > (uint32_t)(mb->end_subject - Feptr))
4665
152
            {
4666
152
            Feptr = mb->end_subject;
4667
152
            SCHECK_PARTIAL();
4668
152
            }
4669
0
          else Feptr += fc;
4670
152
          break;
4671
4672
52.0M
          case OP_ANYNL:
4673
58.3M
          for (i = Lmin; i < Lmax; i++)
4674
52.0M
            {
4675
52.0M
            int len = 1;
4676
52.0M
            if (Feptr >= mb->end_subject)
4677
771k
              {
4678
771k
              SCHECK_PARTIAL();
4679
771k
              break;
4680
771k
              }
4681
51.3M
            GETCHARLEN(fc, Feptr, len);
4682
51.3M
            if (fc == CHAR_CR)
4683
0
              {
4684
0
              if (++Feptr >= mb->end_subject) break;
4685
0
              if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
4686
0
              }
4687
51.3M
            else
4688
51.3M
              {
4689
51.3M
              if (fc != CHAR_LF &&
4690
51.3M
                  (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4691
48.2M
                   (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4692
48.2M
#ifndef EBCDIC
4693
48.2M
                    && fc != 0x2028 && fc != 0x2029
4694
48.2M
#endif  /* Not EBCDIC */
4695
48.2M
                    )))
4696
45.0M
                break;
4697
6.27M
              Feptr += len;
4698
6.27M
              }
4699
51.3M
            }
4700
52.0M
          break;
4701
4702
52.0M
          case OP_NOT_HSPACE:
4703
3.10k
          case OP_HSPACE:
4704
96.0k
          for (i = Lmin; i < Lmax; i++)
4705
94.9k
            {
4706
94.9k
            BOOL gotspace;
4707
94.9k
            int len = 1;
4708
94.9k
            if (Feptr >= mb->end_subject)
4709
342
              {
4710
342
              SCHECK_PARTIAL();
4711
342
              break;
4712
342
              }
4713
94.6k
            GETCHARLEN(fc, Feptr, len);
4714
94.6k
            switch(fc)
4715
94.6k
              {
4716
1.70k
              HSPACE_CASES: gotspace = TRUE; break;
4717
92.9k
              default: gotspace = FALSE; break;
4718
94.6k
              }
4719
94.6k
            if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
4720
92.9k
            Feptr += len;
4721
92.9k
            }
4722
3.10k
          break;
4723
4724
5.39k
          case OP_NOT_VSPACE:
4725
7.63k
          case OP_VSPACE:
4726
95.9k
          for (i = Lmin; i < Lmax; i++)
4727
95.7k
            {
4728
95.7k
            BOOL gotspace;
4729
95.7k
            int len = 1;
4730
95.7k
            if (Feptr >= mb->end_subject)
4731
20
              {
4732
20
              SCHECK_PARTIAL();
4733
20
              break;
4734
20
              }
4735
95.7k
            GETCHARLEN(fc, Feptr, len);
4736
95.7k
            switch(fc)
4737
95.7k
              {
4738
5.63k
              VSPACE_CASES: gotspace = TRUE; break;
4739
90.1k
              default: gotspace = FALSE; break;
4740
95.7k
              }
4741
95.7k
            if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
4742
88.3k
            Feptr += len;
4743
88.3k
            }
4744
7.63k
          break;
4745
4746
7.63k
          case OP_NOT_DIGIT:
4747
0
          for (i = Lmin; i < Lmax; i++)
4748
0
            {
4749
0
            int len = 1;
4750
0
            if (Feptr >= mb->end_subject)
4751
0
              {
4752
0
              SCHECK_PARTIAL();
4753
0
              break;
4754
0
              }
4755
0
            GETCHARLEN(fc, Feptr, len);
4756
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
4757
0
            Feptr+= len;
4758
0
            }
4759
0
          break;
4760
4761
0
          case OP_DIGIT:
4762
0
          for (i = Lmin; i < Lmax; i++)
4763
0
            {
4764
0
            int len = 1;
4765
0
            if (Feptr >= mb->end_subject)
4766
0
              {
4767
0
              SCHECK_PARTIAL();
4768
0
              break;
4769
0
              }
4770
0
            GETCHARLEN(fc, Feptr, len);
4771
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
4772
0
            Feptr+= len;
4773
0
            }
4774
0
          break;
4775
4776
0
          case OP_NOT_WHITESPACE:
4777
0
          for (i = Lmin; i < Lmax; i++)
4778
0
            {
4779
0
            int len = 1;
4780
0
            if (Feptr >= mb->end_subject)
4781
0
              {
4782
0
              SCHECK_PARTIAL();
4783
0
              break;
4784
0
              }
4785
0
            GETCHARLEN(fc, Feptr, len);
4786
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
4787
0
            Feptr+= len;
4788
0
            }
4789
0
          break;
4790
4791
0
          case OP_WHITESPACE:
4792
0
          for (i = Lmin; i < Lmax; i++)
4793
0
            {
4794
0
            int len = 1;
4795
0
            if (Feptr >= mb->end_subject)
4796
0
              {
4797
0
              SCHECK_PARTIAL();
4798
0
              break;
4799
0
              }
4800
0
            GETCHARLEN(fc, Feptr, len);
4801
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
4802
0
            Feptr+= len;
4803
0
            }
4804
0
          break;
4805
4806
0
          case OP_NOT_WORDCHAR:
4807
0
          for (i = Lmin; i < Lmax; i++)
4808
0
            {
4809
0
            int len = 1;
4810
0
            if (Feptr >= mb->end_subject)
4811
0
              {
4812
0
              SCHECK_PARTIAL();
4813
0
              break;
4814
0
              }
4815
0
            GETCHARLEN(fc, Feptr, len);
4816
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
4817
0
            Feptr+= len;
4818
0
            }
4819
0
          break;
4820
4821
0
          case OP_WORDCHAR:
4822
0
          for (i = Lmin; i < Lmax; i++)
4823
0
            {
4824
0
            int len = 1;
4825
0
            if (Feptr >= mb->end_subject)
4826
0
              {
4827
0
              SCHECK_PARTIAL();
4828
0
              break;
4829
0
              }
4830
0
            GETCHARLEN(fc, Feptr, len);
4831
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
4832
0
            Feptr+= len;
4833
0
            }
4834
0
          break;
4835
4836
0
          default:
4837
0
          PCRE2_DEBUG_UNREACHABLE();
4838
0
          return PCRE2_ERROR_INTERNAL;
4839
56.0M
          }
4840
4841
56.0M
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4842
4843
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4844
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
4845
        too far. */
4846
4847
3.92M
        for(;;)
4848
294M
          {
4849
294M
          if (Feptr <= Lstart_eptr) break;
4850
290M
          RMATCH(Fecode, RM220);
4851
290M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4852
290M
          Feptr--;
4853
290M
          BACKCHAR(Feptr);
4854
290M
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
4855
290M
              UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
4856
0
            Feptr--;
4857
290M
          }
4858
3.92M
        }
4859
10.5M
      else
4860
10.5M
#endif  /* SUPPORT_UNICODE */
4861
4862
      /* Not UTF mode */
4863
10.5M
        {
4864
10.5M
        switch(Lctype)
4865
10.5M
          {
4866
2.53M
          case OP_ANY:
4867
13.4M
          for (i = Lmin; i < Lmax; i++)
4868
11.0M
            {
4869
11.0M
            if (Feptr >= mb->end_subject)
4870
40.8k
              {
4871
40.8k
              SCHECK_PARTIAL();
4872
40.8k
              break;
4873
40.8k
              }
4874
11.0M
            if (IS_NEWLINE(Feptr)) break;
4875
10.9M
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4876
10.9M
                Feptr + 1 >= mb->end_subject &&
4877
10.9M
                NLBLOCK->nltype == NLTYPE_FIXED &&
4878
10.9M
                NLBLOCK->nllen == 2 &&
4879
10.9M
                *Feptr == NLBLOCK->nl[0])
4880
0
              {
4881
0
              mb->hitend = TRUE;
4882
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4883
0
              }
4884
10.9M
            Feptr++;
4885
10.9M
            }
4886
2.53M
          break;
4887
4888
5.83M
          case OP_ALLANY:
4889
5.83M
          case OP_ANYBYTE:
4890
5.83M
          fc = Lmax - Lmin;
4891
5.83M
          if (fc > (uint32_t)(mb->end_subject - Feptr))
4892
47.7k
            {
4893
47.7k
            Feptr = mb->end_subject;
4894
47.7k
            SCHECK_PARTIAL();
4895
47.7k
            }
4896
5.79M
          else Feptr += fc;
4897
5.83M
          break;
4898
4899
5.83M
          case OP_ANYNL:
4900
122k
          for (i = Lmin; i < Lmax; i++)
4901
120k
            {
4902
120k
            if (Feptr >= mb->end_subject)
4903
5.88k
              {
4904
5.88k
              SCHECK_PARTIAL();
4905
5.88k
              break;
4906
5.88k
              }
4907
114k
            fc = *Feptr;
4908
114k
            if (fc == CHAR_CR)
4909
8.19k
              {
4910
8.19k
              if (++Feptr >= mb->end_subject) break;
4911
2.55k
              if (*Feptr == CHAR_LF) Feptr++;
4912
2.55k
              }
4913
106k
            else
4914
106k
              {
4915
106k
              if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4916
99.6k
                 (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4917
#if PCRE2_CODE_UNIT_WIDTH != 8
4918
                 && fc != 0x2028 && fc != 0x2029
4919
#endif
4920
99.6k
                 ))) break;
4921
23.5k
              Feptr++;
4922
23.5k
              }
4923
114k
            }
4924
95.9k
          break;
4925
4926
95.9k
          case OP_NOT_HSPACE:
4927
596k
          for (i = Lmin; i < Lmax; i++)
4928
595k
            {
4929
595k
            if (Feptr >= mb->end_subject)
4930
1.77k
              {
4931
1.77k
              SCHECK_PARTIAL();
4932
1.77k
              break;
4933
1.77k
              }
4934
594k
            switch(*Feptr)
4935
594k
              {
4936
586k
              default: Feptr++; break;
4937
16.0k
              HSPACE_BYTE_CASES:
4938
#if PCRE2_CODE_UNIT_WIDTH != 8
4939
              HSPACE_MULTIBYTE_CASES:
4940
#endif
4941
16.0k
              goto ENDLOOP00;
4942
594k
              }
4943
594k
            }
4944
9.41k
          ENDLOOP00:
4945
9.41k
          break;
4946
4947
381k
          case OP_HSPACE:
4948
400k
          for (i = Lmin; i < Lmax; i++)
4949
388k
            {
4950
388k
            if (Feptr >= mb->end_subject)
4951
1.94k
              {
4952
1.94k
              SCHECK_PARTIAL();
4953
1.94k
              break;
4954
1.94k
              }
4955
386k
            switch(*Feptr)
4956
386k
              {
4957
367k
              default: goto ENDLOOP01;
4958
367k
              HSPACE_BYTE_CASES:
4959
#if PCRE2_CODE_UNIT_WIDTH != 8
4960
              HSPACE_MULTIBYTE_CASES:
4961
#endif
4962
43.0k
              Feptr++; break;
4963
386k
              }
4964
386k
            }
4965
381k
          ENDLOOP01:
4966
381k
          break;
4967
4968
1.18M
          case OP_NOT_VSPACE:
4969
50.2M
          for (i = Lmin; i < Lmax; i++)
4970
50.2M
            {
4971
50.2M
            if (Feptr >= mb->end_subject)
4972
9.70k
              {
4973
9.70k
              SCHECK_PARTIAL();
4974
9.70k
              break;
4975
9.70k
              }
4976
50.2M
            switch(*Feptr)
4977
50.2M
              {
4978
49.1M
              default: Feptr++; break;
4979
4.26M
              VSPACE_BYTE_CASES:
4980
#if PCRE2_CODE_UNIT_WIDTH != 8
4981
              VSPACE_MULTIBYTE_CASES:
4982
#endif
4983
4.26M
              goto ENDLOOP02;
4984
50.2M
              }
4985
50.2M
            }
4986
1.18M
          ENDLOOP02:
4987
1.18M
          break;
4988
4989
21.3k
          case OP_VSPACE:
4990
30.8k
          for (i = Lmin; i < Lmax; i++)
4991
30.8k
            {
4992
30.8k
            if (Feptr >= mb->end_subject)
4993
7
              {
4994
7
              SCHECK_PARTIAL();
4995
7
              break;
4996
7
              }
4997
30.8k
            switch(*Feptr)
4998
30.8k
              {
4999
21.3k
              default: goto ENDLOOP03;
5000
27.1k
              VSPACE_BYTE_CASES:
5001
#if PCRE2_CODE_UNIT_WIDTH != 8
5002
              VSPACE_MULTIBYTE_CASES:
5003
#endif
5004
27.1k
              Feptr++; break;
5005
30.8k
              }
5006
30.8k
            }
5007
21.3k
          ENDLOOP03:
5008
21.3k
          break;
5009
5010
193
          case OP_NOT_DIGIT:
5011
280
          for (i = Lmin; i < Lmax; i++)
5012
223
            {
5013
223
            if (Feptr >= mb->end_subject)
5014
0
              {
5015
0
              SCHECK_PARTIAL();
5016
0
              break;
5017
0
              }
5018
223
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
5019
136
              break;
5020
87
            Feptr++;
5021
87
            }
5022
193
          break;
5023
5024
53.3k
          case OP_DIGIT:
5025
77.4k
          for (i = Lmin; i < Lmax; i++)
5026
76.6k
            {
5027
76.6k
            if (Feptr >= mb->end_subject)
5028
2.45k
              {
5029
2.45k
              SCHECK_PARTIAL();
5030
2.45k
              break;
5031
2.45k
              }
5032
74.1k
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
5033
50.0k
              break;
5034
24.1k
            Feptr++;
5035
24.1k
            }
5036
53.3k
          break;
5037
5038
273k
          case OP_NOT_WHITESPACE:
5039
6.91M
          for (i = Lmin; i < Lmax; i++)
5040
6.91M
            {
5041
6.91M
            if (Feptr >= mb->end_subject)
5042
58.0k
              {
5043
58.0k
              SCHECK_PARTIAL();
5044
58.0k
              break;
5045
58.0k
              }
5046
6.85M
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
5047
215k
              break;
5048
6.64M
            Feptr++;
5049
6.64M
            }
5050
273k
          break;
5051
5052
273k
          case OP_WHITESPACE:
5053
16.6k
          for (i = Lmin; i < Lmax; i++)
5054
14.3k
            {
5055
14.3k
            if (Feptr >= mb->end_subject)
5056
28
              {
5057
28
              SCHECK_PARTIAL();
5058
28
              break;
5059
28
              }
5060
14.3k
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
5061
12.0k
              break;
5062
2.29k
            Feptr++;
5063
2.29k
            }
5064
14.3k
          break;
5065
5066
14.3k
          case OP_NOT_WORDCHAR:
5067
162k
          for (i = Lmin; i < Lmax; i++)
5068
161k
            {
5069
161k
            if (Feptr >= mb->end_subject)
5070
1.27k
              {
5071
1.27k
              SCHECK_PARTIAL();
5072
1.27k
              break;
5073
1.27k
              }
5074
160k
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
5075
5.46k
              break;
5076
155k
            Feptr++;
5077
155k
            }
5078
6.78k
          break;
5079
5080
99.3k
          case OP_WORDCHAR:
5081
204k
          for (i = Lmin; i < Lmax; i++)
5082
165k
            {
5083
165k
            if (Feptr >= mb->end_subject)
5084
64
              {
5085
64
              SCHECK_PARTIAL();
5086
64
              break;
5087
64
              }
5088
165k
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
5089
60.3k
              break;
5090
105k
            Feptr++;
5091
105k
            }
5092
99.3k
          break;
5093
5094
99.3k
          default:
5095
0
          PCRE2_DEBUG_UNREACHABLE();
5096
0
          return PCRE2_ERROR_INTERNAL;
5097
10.5M
          }
5098
5099
10.5M
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
5100
5101
10.2M
        for (;;)
5102
83.4M
          {
5103
83.4M
          if (Feptr == Lstart_eptr) break;
5104
73.1M
          RMATCH(Fecode, RM34);
5105
73.1M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5106
73.1M
          Feptr--;
5107
73.1M
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
5108
73.1M
              Feptr[-1] == CHAR_CR) Feptr--;
5109
73.1M
          }
5110
10.2M
        }
5111
67.5M
      }
5112
15.1M
    break;  /* End of repeat character type processing */
5113
5114
15.1M
#undef Lstart_eptr
5115
15.1M
#undef Lmin
5116
15.1M
#undef Lmax
5117
15.1M
#undef Lctype
5118
15.1M
#undef Lpropvalue
5119
5120
5121
    /* ===================================================================== */
5122
    /* Match a back reference, possibly repeatedly. Look past the end of the
5123
    item to see if there is repeat information following. The OP_REF and
5124
    OP_REFI opcodes are used for a reference to a numbered group or to a
5125
    non-duplicated named group. For a duplicated named group, OP_DNREF and
5126
    OP_DNREFI are used. In this case we must scan the list of groups to which
5127
    the name refers, and use the first one that is set. */
5128
5129
15.1M
#define Lmin      F->temp_32[0]
5130
15.1M
#define Lmax      F->temp_32[1]
5131
15.1M
#define Lcaseless F->temp_32[2]
5132
15.1M
#define Lcaseopts F->temp_32[3]
5133
15.1M
#define Lstart    F->temp_sptr[0]
5134
15.1M
#define Loffset   F->temp_size
5135
5136
15.1M
    case OP_DNREF:
5137
0
    case OP_DNREFI:
5138
0
    Lcaseless = (Fop == OP_DNREFI);
5139
0
    Lcaseopts = (Fop == OP_DNREFI)? Fecode[1 + 2*IMM2_SIZE] : 0;
5140
0
      {
5141
0
      int count = GET2(Fecode, 1+IMM2_SIZE);
5142
0
      PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5143
0
      Fecode += 1 + 2*IMM2_SIZE + (Fop == OP_DNREFI? 1 : 0);
5144
5145
0
      while (count-- > 0)
5146
0
        {
5147
0
        Loffset = (GET2(slot, 0) << 1) - 2;
5148
0
        if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
5149
0
        slot += mb->name_entry_size;
5150
0
        }
5151
0
      }
5152
0
    goto REF_REPEAT;
5153
5154
0
    case OP_REF:
5155
0
    case OP_REFI:
5156
0
    Lcaseless = (Fop == OP_REFI);
5157
0
    Lcaseopts = (Fop == OP_REFI)? Fecode[1 + IMM2_SIZE] : 0;
5158
0
    Loffset = (GET2(Fecode, 1) << 1) - 2;
5159
0
    Fecode += 1 + IMM2_SIZE + (Fop == OP_REFI? 1 : 0);
5160
5161
    /* Set up for repetition, or handle the non-repeated case. The maximum and
5162
    minimum must be in the heap frame, but as they are short-term values, we
5163
    use temporary fields. */
5164
5165
0
    REF_REPEAT:
5166
0
    switch (*Fecode)
5167
0
      {
5168
0
      case OP_CRSTAR:
5169
0
      case OP_CRMINSTAR:
5170
0
      case OP_CRPLUS:
5171
0
      case OP_CRMINPLUS:
5172
0
      case OP_CRQUERY:
5173
0
      case OP_CRMINQUERY:
5174
0
      fc = *Fecode++ - OP_CRSTAR;
5175
0
      Lmin = rep_min[fc];
5176
0
      Lmax = rep_max[fc];
5177
0
      reptype = rep_typ[fc];
5178
0
      break;
5179
5180
0
      case OP_CRRANGE:
5181
0
      case OP_CRMINRANGE:
5182
0
      Lmin = GET2(Fecode, 1);
5183
0
      Lmax = GET2(Fecode, 1 + IMM2_SIZE);
5184
0
      reptype = rep_typ[*Fecode - OP_CRSTAR];
5185
0
      if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
5186
0
      Fecode += 1 + 2 * IMM2_SIZE;
5187
0
      break;
5188
5189
0
      default:                  /* No repeat follows */
5190
0
        {
5191
0
        rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &length);
5192
0
        if (rrc != 0)
5193
0
          {
5194
0
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5195
0
          CHECK_PARTIAL();
5196
0
          RRETURN(MATCH_NOMATCH);
5197
0
          }
5198
0
        }
5199
0
      Feptr += length;
5200
0
      continue;              /* With the main loop */
5201
0
      }
5202
5203
    /* Handle repeated back references. If a set group has length zero, just
5204
    continue with the main loop, because it matches however many times. For an
5205
    unset reference, if the minimum is zero, we can also just continue. We can
5206
    also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
5207
    group behave as a zero-length group. For any other unset cases, carrying
5208
    on will result in NOMATCH. */
5209
5210
0
    if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
5211
0
      {
5212
0
      if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
5213
0
      }
5214
0
    else  /* Group is not set */
5215
0
      {
5216
0
      if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
5217
0
        continue;
5218
0
      }
5219
5220
    /* First, ensure the minimum number of matches are present. */
5221
5222
0
    for (i = 1; i <= Lmin; i++)
5223
0
      {
5224
0
      PCRE2_SIZE slength;
5225
0
      rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5226
0
      if (rrc != 0)
5227
0
        {
5228
0
        if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5229
0
        CHECK_PARTIAL();
5230
0
        RRETURN(MATCH_NOMATCH);
5231
0
        }
5232
0
      Feptr += slength;
5233
0
      }
5234
5235
    /* If min = max, we are done. They are not both allowed to be zero. */
5236
5237
0
    if (Lmin == Lmax) continue;
5238
5239
    /* If minimizing, keep trying and advancing the pointer. */
5240
5241
0
    if (reptype == REPTYPE_MIN)
5242
0
      {
5243
0
      for (;;)
5244
0
        {
5245
0
        PCRE2_SIZE slength;
5246
0
        RMATCH(Fecode, RM20);
5247
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5248
0
        if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
5249
0
        rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5250
0
        if (rrc != 0)
5251
0
          {
5252
0
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5253
0
          CHECK_PARTIAL();
5254
0
          RRETURN(MATCH_NOMATCH);
5255
0
          }
5256
0
        Feptr += slength;
5257
0
        }
5258
5259
0
      PCRE2_UNREACHABLE(); /* Control never reaches here */
5260
0
      }
5261
5262
    /* If maximizing, find the longest string and work backwards, as long as
5263
    the matched lengths for each iteration are the same. */
5264
5265
0
    else
5266
0
      {
5267
0
      BOOL samelengths = TRUE;
5268
0
      Lstart = Feptr;     /* Starting position */
5269
0
      Flength = Fovector[Loffset+1] - Fovector[Loffset];
5270
5271
0
      for (i = Lmin; i < Lmax; i++)
5272
0
        {
5273
0
        PCRE2_SIZE slength;
5274
0
        rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5275
0
        if (rrc != 0)
5276
0
          {
5277
          /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
5278
          the soft partial matching case. */
5279
5280
0
          if (rrc > 0 && mb->partial != 0 &&
5281
0
              mb->end_subject > mb->start_used_ptr)
5282
0
            {
5283
0
            mb->hitend = TRUE;
5284
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5285
0
            }
5286
0
          break;
5287
0
          }
5288
5289
0
        if (slength != Flength) samelengths = FALSE;
5290
0
        Feptr += slength;
5291
0
        }
5292
5293
      /* If the length matched for each repetition is the same as the length of
5294
      the captured group, we can easily work backwards. This is the normal
5295
      case. However, in caseless UTF-8 mode there are pairs of case-equivalent
5296
      characters whose lengths (in terms of code units) differ. However, this
5297
      is very rare, so we handle it by re-matching fewer and fewer times. */
5298
5299
0
      if (samelengths)
5300
0
        {
5301
0
        while (Feptr >= Lstart)
5302
0
          {
5303
0
          RMATCH(Fecode, RM21);
5304
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5305
0
          Feptr -= Flength;
5306
0
          }
5307
0
        }
5308
5309
      /* The rare case of non-matching lengths. Re-scan the repetition for each
5310
      iteration. We know that match_ref() will succeed every time. */
5311
5312
0
      else
5313
0
        {
5314
0
        Lmax = i;
5315
0
        for (;;)
5316
0
          {
5317
0
          RMATCH(Fecode, RM22);
5318
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5319
0
          if (Feptr == Lstart) break; /* Failed after minimal repetition */
5320
0
          Feptr = Lstart;
5321
0
          Lmax--;
5322
0
          for (i = Lmin; i < Lmax; i++)
5323
0
            {
5324
0
            PCRE2_SIZE slength;
5325
0
            (void)match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5326
0
            Feptr += slength;
5327
0
            }
5328
0
          }
5329
0
        }
5330
5331
0
      RRETURN(MATCH_NOMATCH);
5332
0
      }
5333
5334
0
    PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
5335
5336
0
#undef Lcaseless
5337
0
#undef Lmin
5338
0
#undef Lmax
5339
0
#undef Lstart
5340
0
#undef Loffset
5341
5342
5343
5344
/* ========================================================================= */
5345
/*           Opcodes for the start of various parenthesized items            */
5346
/* ========================================================================= */
5347
5348
    /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
5349
    (*THEN) is within the current branch by comparing the address of OP_THEN
5350
    that is passed back with the end of the branch. If (*THEN) is within the
5351
    current branch, and the branch is one of two or more alternatives (it
5352
    either starts or ends with OP_ALT), we have reached the limit of THEN's
5353
    action, so convert the return code to NOMATCH, which will cause normal
5354
    backtracking to happen from now on. Otherwise, THEN is passed back to an
5355
    outer alternative. This implements Perl's treatment of parenthesized
5356
    groups, where a group not containing | does not affect the current
5357
    alternative, that is, (X) is NOT the same as (X|(*F)). */
5358
5359
5360
    /* ===================================================================== */
5361
    /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
5362
    bracket group, indicating that it may occur zero times. It may repeat
5363
    infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
5364
    the pattern. Brackets with fixed upper repeat limits are compiled as a
5365
    number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
5366
    Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
5367
5368
1.81k
#define Lnext_ecode F->temp_sptr[0]
5369
5370
112
    case OP_BRAZERO:
5371
112
    Lnext_ecode = Fecode + 1;
5372
112
    RMATCH(Lnext_ecode, RM9);
5373
104
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5374
800
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5375
104
    Fecode = Lnext_ecode + 1 + LINK_SIZE;
5376
104
    break;
5377
5378
0
    case OP_BRAMINZERO:
5379
0
    Lnext_ecode = Fecode + 1;
5380
0
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5381
0
    RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
5382
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5383
0
    Fecode++;
5384
0
    break;
5385
5386
0
#undef Lnext_ecode
5387
5388
0
    case OP_SKIPZERO:
5389
0
    Fecode++;
5390
0
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5391
0
    Fecode += 1 + LINK_SIZE;
5392
0
    break;
5393
5394
5395
    /* ===================================================================== */
5396
    /* Handle possessive brackets with an unlimited repeat. The end of these
5397
    brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
5398
    going further in the pattern. */
5399
5400
3.10k
#define Lframe_type    F->temp_32[0]
5401
5.41k
#define Lmatched_once  F->temp_32[1]
5402
1.68k
#define Lzero_allowed  F->temp_32[2]
5403
2.84k
#define Lstart_eptr    F->temp_sptr[0]
5404
1.49k
#define Lstart_group   F->temp_sptr[1]
5405
5406
0
    case OP_BRAPOSZERO:
5407
0
    Lzero_allowed = TRUE;                /* Zero repeat is allowed */
5408
0
    Fecode += 1;
5409
0
    if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
5410
0
      goto POSSESSIVE_CAPTURE;
5411
0
    goto POSSESSIVE_NON_CAPTURE;
5412
5413
0
    case OP_BRAPOS:
5414
0
    case OP_SBRAPOS:
5415
0
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5416
5417
0
    POSSESSIVE_NON_CAPTURE:
5418
0
    Lframe_type = GF_NOCAPTURE;          /* Remembered frame type */
5419
0
    goto POSSESSIVE_GROUP;
5420
5421
340
    case OP_CBRAPOS:
5422
1.41k
    case OP_SCBRAPOS:
5423
1.41k
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5424
5425
1.41k
    POSSESSIVE_CAPTURE:
5426
1.41k
    number = GET2(Fecode, 1+LINK_SIZE);
5427
1.41k
    Lframe_type = GF_CAPTURE | number;   /* Remembered frame type */
5428
5429
1.41k
    POSSESSIVE_GROUP:
5430
1.41k
    Lmatched_once = FALSE;               /* Never matched */
5431
1.41k
    Lstart_group = Fecode;               /* Start of this group */
5432
5433
1.41k
    for (;;)
5434
1.68k
      {
5435
1.68k
      Lstart_eptr = Feptr;               /* Position at group start */
5436
1.68k
      group_frame_type = Lframe_type;
5437
1.68k
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
5438
1.68k
      if (rrc == MATCH_KETRPOS)
5439
1.15k
        {
5440
1.15k
        Lmatched_once = TRUE;            /* Matched at least once */
5441
1.15k
        if (Feptr == Lstart_eptr)        /* Empty match; skip to end */
5442
1.07k
          {
5443
1.07k
          do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5444
1.07k
          break;
5445
1.07k
          }
5446
5447
76
        Fecode = Lstart_group;
5448
76
        continue;
5449
1.15k
        }
5450
5451
      /* See comment above about handling THEN. */
5452
5453
530
      if (rrc == MATCH_THEN)
5454
0
        {
5455
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5456
0
        if (mb->verb_ecode_ptr < next_ecode &&
5457
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5458
0
          rrc = MATCH_NOMATCH;
5459
0
        }
5460
5461
530
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5462
530
      Fecode += GET(Fecode, 1);
5463
530
      if (*Fecode != OP_ALT) break;
5464
530
      }
5465
5466
    /* Success if matched something or zero repeat allowed */
5467
5468
1.41k
    if (Lmatched_once || Lzero_allowed)
5469
1.15k
      {
5470
1.15k
      Fecode += 1 + LINK_SIZE;
5471
1.15k
      break;
5472
1.15k
      }
5473
5474
264
    RRETURN(MATCH_NOMATCH);
5475
5476
0
#undef Lmatched_once
5477
0
#undef Lzero_allowed
5478
0
#undef Lframe_type
5479
0
#undef Lstart_eptr
5480
0
#undef Lstart_group
5481
5482
5483
    /* ===================================================================== */
5484
    /* Handle non-capturing brackets that cannot match an empty string. When we
5485
    get to the final alternative within the brackets, as long as there are no
5486
    THEN's in the pattern, we can optimize by not recording a new backtracking
5487
    point. (Ideally we should test for a THEN within this group, but we don't
5488
    have that information.) Don't do this if we are at the very top level,
5489
    however, because that would make handling assertions and once-only brackets
5490
    messier when there is nothing to go back to. */
5491
5492
3.31M
#define Lframe_type F->temp_32[0]     /* Set for all that use GROUPLOOP */
5493
8.13k
#define Lnext_branch F->temp_sptr[0]  /* Used only in OP_BRA handling */
5494
5495
464k
    case OP_BRA:
5496
464k
    if (mb->hasthen || Frdepth == 0)
5497
462k
      {
5498
462k
      Lframe_type = 0;
5499
462k
      goto GROUPLOOP;
5500
462k
      }
5501
5502
1.95k
    for (;;)
5503
3.36k
      {
5504
3.36k
      Lnext_branch = Fecode + GET(Fecode, 1);
5505
3.36k
      if (*Lnext_branch != OP_ALT) break;
5506
5507
      /* This is never the final branch. We do not need to test for MATCH_THEN
5508
      here because this code is not used when there is a THEN in the pattern. */
5509
5510
1.41k
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
5511
1.40k
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5512
1.40k
      Fecode = Lnext_branch;
5513
1.40k
      }
5514
5515
    /* Hit the start of the final branch. Continue at this level. */
5516
5517
1.95k
    Fecode += PRIV(OP_lengths)[*Fecode];
5518
1.95k
    break;
5519
5520
0
#undef Lnext_branch
5521
5522
5523
    /* ===================================================================== */
5524
    /* Handle a capturing bracket, other than those that are possessive with an
5525
    unlimited repeat. */
5526
5527
305k
    case OP_CBRA:
5528
306k
    case OP_SCBRA:
5529
306k
    Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
5530
306k
    goto GROUPLOOP;
5531
5532
5533
    /* ===================================================================== */
5534
    /* Atomic groups and non-capturing brackets that can match an empty string
5535
    must record a backtracking point and also set up a chained frame. */
5536
5537
0
    case OP_ONCE:
5538
0
    case OP_SCRIPT_RUN:
5539
40
    case OP_SBRA:
5540
40
    Lframe_type = GF_NOCAPTURE | Fop;
5541
5542
768k
    GROUPLOOP:
5543
768k
    for (;;)
5544
2.54M
      {
5545
2.54M
      group_frame_type = Lframe_type;
5546
2.54M
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
5547
2.46M
      if (rrc == MATCH_THEN)
5548
0
        {
5549
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5550
0
        if (mb->verb_ecode_ptr < next_ecode &&
5551
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5552
0
          rrc = MATCH_NOMATCH;
5553
0
        }
5554
2.46M
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5555
2.46M
      Fecode += GET(Fecode, 1);
5556
2.46M
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5557
2.46M
      }
5558
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
5559
5560
0
#undef Lframe_type
5561
5562
5563
    /* ===================================================================== */
5564
    /* Pattern recursion either matches the current regex, or some
5565
    subexpression. The offset data is the offset to the starting bracket from
5566
    the start of the whole pattern. This is so that it works from duplicated
5567
    subpatterns. For a whole-pattern recursion, we have to infer the number
5568
    zero. */
5569
5570
847
#define Lframe_type F->temp_32[0]
5571
1.85k
#define Lstart_branch F->temp_sptr[0]
5572
5573
342
    case OP_RECURSE:
5574
342
    bracode = mb->start_code + GET(Fecode, 1);
5575
342
    number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
5576
5577
    /* If we are already in a pattern recursion, check for repeating the same
5578
    one without changing the subject pointer or the last referenced character
5579
    in the subject. This should catch convoluted mutual recursions; some
5580
    simple cases are caught at compile time. However, there are rare cases when
5581
    this check needs to be turned off. In this case, actual recursion loops
5582
    will be caught by the match or heap limits. */
5583
5584
342
    if (Fcurrent_recurse != RECURSE_UNSET)
5585
19
      {
5586
19
      offset = Flast_group_offset;
5587
54
      while (offset != PCRE2_UNSET)
5588
37
        {
5589
37
        N = (heapframe *)((char *)match_data->heapframes + offset);
5590
37
        P = (heapframe *)((char *)N - frame_size);
5591
37
        if (N->group_frame_type == (GF_RECURSE | number))
5592
2
          {
5593
2
          if (Feptr == P->eptr && mb->last_used_ptr == P->recurse_last_used &&
5594
2
               (mb->moptions & PCRE2_DISABLE_RECURSELOOP_CHECK) == 0)
5595
2
            return PCRE2_ERROR_RECURSELOOP;
5596
0
          break;
5597
2
          }
5598
35
        offset = P->last_group_offset;
5599
35
        }
5600
19
      }
5601
5602
    /* Remember the current last referenced character and then run the
5603
    recursion branch by branch. */
5604
5605
340
    F->recurse_last_used = mb->last_used_ptr;
5606
340
    Lstart_branch = bracode;
5607
340
    Lframe_type = GF_RECURSE | number;
5608
5609
340
    for (;;)
5610
507
      {
5611
507
      PCRE2_SPTR next_ecode;
5612
5613
507
      group_frame_type = Lframe_type;
5614
507
      RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
5615
505
      next_ecode = Lstart_branch + GET(Lstart_branch,1);
5616
5617
      /* Handle backtracking verbs, which are defined in a range that can
5618
      easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
5619
      escape beyond a recursion; they cause a NOMATCH for the entire recursion.
5620
5621
      When one of these verbs triggers, the current recursion group number is
5622
      recorded. If it matches the recursion we are processing, the verb
5623
      happened within the recursion and we must deal with it. Otherwise it must
5624
      have happened after the recursion completed, and so has to be passed
5625
      back. See comment above about handling THEN. */
5626
5627
505
      if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
5628
505
          mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
5629
0
        {
5630
0
        if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
5631
0
            (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
5632
0
          rrc = MATCH_NOMATCH;
5633
0
        else RRETURN(MATCH_NOMATCH);
5634
0
        }
5635
5636
      /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
5637
      OP_ACCEPT code. Nothing needs to be done here. */
5638
5639
505
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5640
505
      Lstart_branch = next_ecode;
5641
505
      if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
5642
505
      }
5643
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
5644
5645
0
#undef Lframe_type
5646
0
#undef Lstart_branch
5647
5648
5649
    /* ===================================================================== */
5650
    /* Positive assertions are like other groups except that PCRE doesn't allow
5651
    the effect of (*THEN) to escape beyond an assertion; it is therefore
5652
    treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
5653
    captures and mark retained. Any other return is an error. */
5654
5655
3.49k
#define Lframe_type  F->temp_32[0]
5656
5657
0
    case OP_ASSERT:
5658
658
    case OP_ASSERTBACK:
5659
676
    case OP_ASSERT_NA:
5660
676
    case OP_ASSERTBACK_NA:
5661
676
    Lframe_type = GF_NOCAPTURE | Fop;
5662
676
    for (;;)
5663
2.81k
      {
5664
2.81k
      group_frame_type = Lframe_type;
5665
2.81k
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
5666
2.80k
      if (rrc == MATCH_ACCEPT)
5667
0
        {
5668
0
        memcpy(Fovector,
5669
0
              (char *)assert_accept_frame + offsetof(heapframe, ovector),
5670
0
              assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5671
0
        Foffset_top = assert_accept_frame->offset_top;
5672
0
        Fmark = assert_accept_frame->mark;
5673
0
        break;
5674
0
        }
5675
2.80k
      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
5676
2.80k
      Fecode += GET(Fecode, 1);
5677
2.80k
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5678
2.80k
      }
5679
5680
0
    do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5681
0
    Fecode += 1 + LINK_SIZE;
5682
0
    break;
5683
5684
0
#undef Lframe_type
5685
5686
5687
    /* ===================================================================== */
5688
    /* Handle negative assertions. Loop for each non-matching branch as for
5689
    positive assertions. */
5690
5691
64
#define Lframe_type  F->temp_32[0]
5692
5693
16
    case OP_ASSERT_NOT:
5694
16
    case OP_ASSERTBACK_NOT:
5695
16
    Lframe_type  = GF_NOCAPTURE | Fop;
5696
5697
16
    for (;;)
5698
48
      {
5699
48
      group_frame_type = Lframe_type;
5700
48
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
5701
48
      switch(rrc)
5702
48
        {
5703
0
        case MATCH_ACCEPT:   /* Assertion matched, therefore it fails. */
5704
16
        case MATCH_MATCH:
5705
16
        RRETURN (MATCH_NOMATCH);
5706
5707
32
        case MATCH_NOMATCH:  /* Branch failed, try next if present. */
5708
32
        case MATCH_THEN:
5709
32
        Fecode += GET(Fecode, 1);
5710
32
        if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
5711
32
        break;
5712
5713
32
        case MATCH_COMMIT:   /* Assertion forced to fail, therefore continue. */
5714
0
        case MATCH_SKIP:
5715
0
        case MATCH_PRUNE:
5716
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5717
0
        goto ASSERT_NOT_FAILED;
5718
5719
0
        default:             /* Pass back any other return */
5720
0
        RRETURN(rrc);
5721
48
        }
5722
48
      }
5723
5724
    /* None of the branches have matched or there was a backtrack to (*COMMIT),
5725
    (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
5726
    negative assertion, so carry on. */
5727
5728
0
    ASSERT_NOT_FAILED:
5729
0
    Fecode += 1 + LINK_SIZE;
5730
0
    break;
5731
5732
0
#undef Lframe_type
5733
5734
    /* ===================================================================== */
5735
    /* Handle scan substring operation. */
5736
5737
0
#define Lframe_type          F->temp_32[0]
5738
0
#define Lextra_size          F->temp_32[1]
5739
0
#define Lsaved_moptions      F->temp_32[2]
5740
0
#define Lsaved_end_subject   F->temp_sptr[0]
5741
0
#define Lsaved_eptr          F->temp_sptr[1]
5742
0
#define Ltrue_end_extra      F->temp_size
5743
5744
0
    case OP_ASSERT_SCS:
5745
0
      {
5746
0
      PCRE2_SPTR ecode = Fecode + 1 + LINK_SIZE;
5747
0
      uint32_t extra_size = 0;
5748
0
      int count;
5749
0
      PCRE2_SPTR slot;
5750
5751
      /* Disable compiler warning. */
5752
0
      offset = 0;
5753
0
      (void)offset;
5754
5755
0
      for (;;)
5756
0
        {
5757
0
        if (*ecode == OP_CREF)
5758
0
          {
5759
0
          extra_size += 1+IMM2_SIZE;
5760
0
          offset = (GET2(ecode, 1) << 1) - 2;
5761
0
          ecode += 1+IMM2_SIZE;
5762
0
          if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET)
5763
0
            goto SCS_OFFSET_FOUND;
5764
0
          continue;
5765
0
          }
5766
5767
0
        if (*ecode != OP_DNCREF) RRETURN(MATCH_NOMATCH);
5768
5769
0
        count = GET2(ecode, 1 + IMM2_SIZE);
5770
0
        slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size;
5771
0
        extra_size += 1+2*IMM2_SIZE;
5772
0
        ecode += 1+2*IMM2_SIZE;
5773
5774
0
        while (count > 0)
5775
0
          {
5776
0
          offset = (GET2(slot, 0) << 1) - 2;
5777
0
          if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET)
5778
0
            goto SCS_OFFSET_FOUND;
5779
0
          slot += mb->name_entry_size;
5780
0
          count--;
5781
0
          }
5782
0
        }
5783
5784
0
      SCS_OFFSET_FOUND:
5785
5786
      /* Skip remaining options. */
5787
0
      for (;;)
5788
0
        {
5789
0
        if (*ecode == OP_CREF)
5790
0
          {
5791
0
          extra_size += 1+IMM2_SIZE;
5792
0
          ecode += 1+IMM2_SIZE;
5793
0
          }
5794
0
        else if (*ecode == OP_DNCREF)
5795
0
          {
5796
0
          extra_size += 1+2*IMM2_SIZE;
5797
0
          ecode += 1+2*IMM2_SIZE;
5798
0
          }
5799
0
        else break;
5800
0
        }
5801
5802
0
      Lextra_size = extra_size;
5803
0
      }
5804
5805
0
    Lsaved_end_subject = mb->end_subject;
5806
0
    Ltrue_end_extra = mb->true_end_subject - mb->end_subject;
5807
0
    Lsaved_eptr = Feptr;
5808
0
    Lsaved_moptions = mb->moptions;
5809
5810
0
    Feptr = mb->start_subject + Fovector[offset];
5811
0
    mb->true_end_subject = mb->end_subject =
5812
0
      mb->start_subject + Fovector[offset + 1];
5813
0
    mb->moptions &= ~PCRE2_NOTEOL;
5814
5815
0
    Lframe_type = GF_NOCAPTURE | Fop;
5816
0
    for (;;)
5817
0
      {
5818
0
      group_frame_type = Lframe_type;
5819
0
      RMATCH(Fecode + 1 + LINK_SIZE + Lextra_size, RM38);
5820
0
      if (rrc == MATCH_ACCEPT)
5821
0
        {
5822
0
        memcpy(Fovector,
5823
0
              (char *)assert_accept_frame + offsetof(heapframe, ovector),
5824
0
              assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5825
0
        Foffset_top = assert_accept_frame->offset_top;
5826
0
        Fmark = assert_accept_frame->mark;
5827
0
        break;
5828
0
        }
5829
5830
0
      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
5831
0
        {
5832
0
        mb->end_subject = Lsaved_end_subject;
5833
0
        mb->true_end_subject = mb->end_subject + Ltrue_end_extra;
5834
0
        mb->moptions = Lsaved_moptions;
5835
0
        RRETURN(rrc);
5836
0
        }
5837
5838
0
      Fecode += GET(Fecode, 1);
5839
0
      if (*Fecode != OP_ALT)
5840
0
        {
5841
0
        mb->end_subject = Lsaved_end_subject;
5842
0
        mb->true_end_subject = mb->end_subject + Ltrue_end_extra;
5843
0
        mb->moptions = Lsaved_moptions;
5844
0
        RRETURN(MATCH_NOMATCH);
5845
0
        }
5846
0
      Lextra_size = 0;
5847
0
      }
5848
5849
0
    do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5850
0
    Fecode += 1 + LINK_SIZE;
5851
0
    Feptr = Lsaved_eptr;
5852
0
    break;
5853
5854
0
#undef Lframe_type
5855
0
#undef Lextra_size
5856
0
#undef Lsaved_end_subject
5857
0
#undef Lsaved_eptr
5858
0
#undef Ltrue_end_extra
5859
0
#undef Lsave_moptions
5860
5861
    /* ===================================================================== */
5862
    /* The callout item calls an external function, if one is provided, passing
5863
    details of the match so far. This is mainly for debugging, though the
5864
    function is able to force a failure. */
5865
5866
0
    case OP_CALLOUT:
5867
0
    case OP_CALLOUT_STR:
5868
0
    rrc = do_callout(F, mb, &length);
5869
0
    if (rrc > 0) RRETURN(MATCH_NOMATCH);
5870
0
    if (rrc < 0) RRETURN(rrc);
5871
0
    Fecode += length;
5872
0
    break;
5873
5874
5875
    /* ===================================================================== */
5876
    /* Conditional group: compilation checked that there are no more than two
5877
    branches. If the condition is false, skipping the first branch takes us
5878
    past the end of the item if there is only one branch, but that's exactly
5879
    what we want. */
5880
5881
0
    case OP_COND:
5882
0
    case OP_SCOND:
5883
5884
    /* The variable Flength will be added to Fecode when the condition is
5885
    false, to get to the second branch. Setting it to the offset to the ALT or
5886
    KET, then incrementing Fecode achieves this effect. However, if the second
5887
    branch is non-existent, we must point to the KET so that the end of the
5888
    group is correctly processed. We now have Fecode pointing to the condition
5889
    or callout. */
5890
5891
0
    Flength = GET(Fecode, 1);    /* Offset to the second branch */
5892
0
    if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
5893
0
    Fecode += 1 + LINK_SIZE;     /* From this opcode */
5894
5895
    /* Because of the way auto-callout works during compile, a callout item is
5896
    inserted between OP_COND and an assertion condition. Such a callout can
5897
    also be inserted manually. */
5898
5899
0
    if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
5900
0
      {
5901
0
      rrc = do_callout(F, mb, &length);
5902
0
      if (rrc > 0) RRETURN(MATCH_NOMATCH);
5903
0
      if (rrc < 0) RRETURN(rrc);
5904
5905
      /* Advance Fecode past the callout, so it now points to the condition. We
5906
      must adjust Flength so that the value of Fecode+Flength is unchanged. */
5907
5908
0
      Fecode += length;
5909
0
      Flength -= length;
5910
0
      }
5911
5912
    /* Test the various possible conditions */
5913
5914
0
    condition = FALSE;
5915
0
    switch(*Fecode)
5916
0
      {
5917
0
      case OP_RREF:                  /* Group recursion test */
5918
0
      if (Fcurrent_recurse != RECURSE_UNSET)
5919
0
        {
5920
0
        number = GET2(Fecode, 1);
5921
0
        condition = (number == RREF_ANY || number == Fcurrent_recurse);
5922
0
        }
5923
0
      break;
5924
5925
0
      case OP_DNRREF:       /* Duplicate named group recursion test */
5926
0
      if (Fcurrent_recurse != RECURSE_UNSET)
5927
0
        {
5928
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
5929
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5930
0
        while (count-- > 0)
5931
0
          {
5932
0
          number = GET2(slot, 0);
5933
0
          condition = number == Fcurrent_recurse;
5934
0
          if (condition) break;
5935
0
          slot += mb->name_entry_size;
5936
0
          }
5937
0
        }
5938
0
      break;
5939
5940
0
      case OP_CREF:                         /* Numbered group used test */
5941
0
      offset = (GET2(Fecode, 1) << 1) - 2;  /* Doubled ref number */
5942
0
      condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5943
0
      break;
5944
5945
0
      case OP_DNCREF:      /* Duplicate named group used test */
5946
0
        {
5947
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
5948
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5949
0
        while (count-- > 0)
5950
0
          {
5951
0
          offset = (GET2(slot, 0) << 1) - 2;
5952
0
          condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5953
0
          if (condition) break;
5954
0
          slot += mb->name_entry_size;
5955
0
          }
5956
0
        }
5957
0
      break;
5958
5959
0
      case OP_FALSE:
5960
0
      case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
5961
0
      break;
5962
5963
0
      case OP_TRUE:
5964
0
      condition = TRUE;
5965
0
      break;
5966
5967
      /* The condition is an assertion. Run code similar to the assertion code
5968
      above. */
5969
5970
0
#define Lpositive      F->temp_32[0]
5971
0
#define Lstart_branch  F->temp_sptr[0]
5972
5973
0
      default:
5974
0
      Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
5975
0
      Lstart_branch = Fecode;
5976
5977
0
      for (;;)
5978
0
        {
5979
0
        group_frame_type = GF_CONDASSERT | *Fecode;
5980
0
        RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
5981
5982
0
        switch(rrc)
5983
0
          {
5984
0
          case MATCH_ACCEPT:  /* Save captures */
5985
0
          memcpy(Fovector,
5986
0
                (char *)assert_accept_frame + offsetof(heapframe, ovector),
5987
0
                assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5988
0
          Foffset_top = assert_accept_frame->offset_top;
5989
5990
          /* Fall through */
5991
          /* In the case of a match, the captures have already been put into
5992
          the current frame. */
5993
5994
0
          case MATCH_MATCH:
5995
0
          condition = Lpositive;   /* TRUE for positive assertion */
5996
0
          break;
5997
5998
          /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
5999
          assertion; it is therefore always treated as NOMATCH. */
6000
6001
0
          case MATCH_NOMATCH:
6002
0
          case MATCH_THEN:
6003
0
          Lstart_branch += GET(Lstart_branch, 1);
6004
0
          if (*Lstart_branch == OP_ALT) continue;  /* Try next branch */
6005
0
          condition = !Lpositive;  /* TRUE for negative assertion */
6006
0
          break;
6007
6008
          /* These force no match without checking other branches. */
6009
6010
0
          case MATCH_COMMIT:
6011
0
          case MATCH_SKIP:
6012
0
          case MATCH_PRUNE:
6013
0
          condition = !Lpositive;
6014
0
          break;
6015
6016
0
          default:
6017
0
          RRETURN(rrc);
6018
0
          }
6019
0
        break;  /* Out of the branch loop */
6020
0
        }
6021
6022
      /* If the condition is true, find the end of the assertion so that
6023
      advancing past it gets us to the start of the first branch. */
6024
6025
0
      if (condition)
6026
0
        {
6027
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
6028
0
        }
6029
0
      break;  /* End of assertion condition */
6030
0
      }
6031
6032
0
#undef Lpositive
6033
0
#undef Lstart_branch
6034
6035
    /* Choose branch according to the condition. */
6036
6037
0
    Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
6038
6039
    /* If the opcode is OP_SCOND it means we are at a repeated conditional
6040
    group that might match an empty string. We must therefore descend a level
6041
    so that the start is remembered for checking. For OP_COND we can just
6042
    continue at this level. */
6043
6044
0
    if (Fop == OP_SCOND)
6045
0
      {
6046
0
      group_frame_type  = GF_NOCAPTURE | Fop;
6047
0
      RMATCH(Fecode, RM35);
6048
0
      RRETURN(rrc);
6049
0
      }
6050
0
    break;
6051
6052
6053
6054
/* ========================================================================= */
6055
/*                  End of start of parenthesis opcodes                      */
6056
/* ========================================================================= */
6057
6058
6059
    /* ===================================================================== */
6060
    /* Move the subject pointer back by one fixed amount. This occurs at the
6061
    start of each branch that has a fixed length in a lookbehind assertion. If
6062
    we are too close to the start to move back, fail. When working with UTF-8
6063
    we move back a number of characters, not bytes. */
6064
6065
803
    case OP_REVERSE:
6066
803
    number = GET2(Fecode, 1);
6067
803
#ifdef SUPPORT_UNICODE
6068
803
    if (utf)
6069
0
      {
6070
      /* We used to do a simpler `while (number-- > 0)` but that triggers
6071
      clang's unsigned integer overflow sanitizer. */
6072
0
      while (number > 0)
6073
0
        {
6074
0
        --number;
6075
0
        if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
6076
0
        Feptr--;
6077
0
        BACKCHAR(Feptr);
6078
0
        }
6079
0
      }
6080
803
    else
6081
803
#endif
6082
6083
    /* No UTF support, or not in UTF mode: count is code unit count */
6084
6085
803
      {
6086
803
      if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
6087
601
      Feptr -= number;
6088
601
      }
6089
6090
    /* Save the earliest consulted character, then skip to next opcode */
6091
6092
601
    if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
6093
601
    Fecode += 1 + IMM2_SIZE;
6094
601
    break;
6095
6096
6097
    /* ===================================================================== */
6098
    /* Move the subject pointer back by a variable amount. This occurs at the
6099
    start of each branch of a lookbehind assertion when the branch has a
6100
    variable, but limited, length. A loop is needed to try matching the branch
6101
    after moving back different numbers of characters. If we are too close to
6102
    the start to move back even the minimum amount, fail. When working with
6103
    UTF-8 we move back a number of characters, not bytes. */
6104
6105
9.09k
#define Lmin F->temp_32[0]
6106
11.8k
#define Lmax F->temp_32[1]
6107
1.74k
#define Leptr F->temp_sptr[0]
6108
6109
1.74k
    case OP_VREVERSE:
6110
1.74k
    Lmin = GET2(Fecode, 1);
6111
1.74k
    Lmax = GET2(Fecode, 1 + IMM2_SIZE);
6112
1.74k
    Leptr = Feptr;
6113
6114
    /* Move back by the maximum branch length and then work forwards. This
6115
    ensures that items such as \d{3,5} get the maximum length, which is
6116
    relevant for captures, and makes for Perl compatibility. */
6117
6118
1.74k
#ifdef SUPPORT_UNICODE
6119
1.74k
    if (utf)
6120
0
      {
6121
0
      for (i = 0; i < Lmax; i++)
6122
0
        {
6123
0
        if (Feptr == mb->start_subject)
6124
0
          {
6125
0
          if (i < Lmin) RRETURN(MATCH_NOMATCH);
6126
0
          Lmax = i;
6127
0
          break;
6128
0
          }
6129
0
        Feptr--;
6130
0
        BACKCHAR(Feptr);
6131
0
        }
6132
0
      }
6133
1.74k
    else
6134
1.74k
#endif
6135
6136
    /* No UTF support or not in UTF mode */
6137
6138
1.74k
      {
6139
1.74k
      ptrdiff_t diff = Feptr - mb->start_subject;
6140
1.74k
      uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? (int)diff : 0);
6141
1.74k
      if (Lmin > available) RRETURN(MATCH_NOMATCH);
6142
1.56k
      if (Lmax > available) Lmax = available;
6143
1.56k
      Feptr -= Lmax;
6144
1.56k
      }
6145
6146
    /* Now try matching, moving forward one character on failure, until we
6147
    reach the minimum back length. */
6148
6149
1.56k
    for (;;)
6150
5.59k
      {
6151
5.59k
      RMATCH(Fecode + 1 + 2 * IMM2_SIZE, RM37);
6152
5.59k
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6153
5.59k
      if (Lmax-- <= Lmin) RRETURN(MATCH_NOMATCH);
6154
4.02k
      Feptr++;
6155
4.02k
#ifdef SUPPORT_UNICODE
6156
4.02k
      if (utf) { FORWARDCHARTEST(Feptr, mb->end_subject); }
6157
4.02k
#endif
6158
4.02k
      }
6159
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
6160
6161
0
#undef Lmin
6162
0
#undef Lmax
6163
0
#undef Leptr
6164
6165
    /* ===================================================================== */
6166
    /* An alternation is the end of a branch; scan along to find the end of the
6167
    bracketed group. */
6168
6169
482k
    case OP_ALT:
6170
482k
    branch_end = Fecode;
6171
1.88M
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
6172
482k
    break;
6173
6174
6175
    /* ===================================================================== */
6176
    /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
6177
    starting frame was added to the chained frames in order to remember the
6178
    starting subject position for the group. (Not true for OP_BRA when it's a
6179
    whole pattern recursion, but that is handled separately below.)*/
6180
6181
487k
    case OP_KET:
6182
487k
    case OP_KETRMIN:
6183
489k
    case OP_KETRMAX:
6184
490k
    case OP_KETRPOS:
6185
6186
490k
    bracode = Fecode - GET(Fecode, 1);
6187
6188
490k
    if (branch_end == NULL) branch_end = Fecode;
6189
490k
    branch_start = bracode;
6190
1.62M
    while (branch_start + GET(branch_start, 1) != branch_end)
6191
1.13M
      branch_start += GET(branch_start, 1);
6192
490k
    branch_end = NULL;
6193
6194
    /* Point N to the frame at the start of the most recent group, and P to its
6195
    predecessor. Remember the subject pointer at the start of the group. */
6196
6197
490k
    if (*bracode != OP_BRA && *bracode != OP_COND)
6198
308k
      {
6199
308k
      N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset);
6200
308k
      P = (heapframe *)((char *)N - frame_size);
6201
308k
      Flast_group_offset = P->last_group_offset;
6202
6203
#ifdef DEBUG_SHOW_RMATCH
6204
      fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
6205
        N->rdepth, N->group_frame_type,
6206
        (char *)P->eptr - (char *)mb->start_subject);
6207
#endif
6208
6209
      /* If we are at the end of an assertion that is a condition, first check
6210
      to see if we are at the end of a variable-length branch in a lookbehind.
6211
      If this is the case and we have not landed on the current character,
6212
      return no match. Compare code below for non-condition lookbehinds. In
6213
      other cases, return a match, discarding any intermediate backtracking
6214
      points. Copy back the mark setting and the captures into the frame before
6215
      N so that they are set on return. Doing this for all assertions, both
6216
      positive and negative, seems to match what Perl does. */
6217
6218
308k
      if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
6219
0
        {
6220
0
        if ((*bracode == OP_ASSERTBACK || *bracode == OP_ASSERTBACK_NOT) &&
6221
0
            branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6222
0
          RRETURN(MATCH_NOMATCH);
6223
0
        memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
6224
0
          Foffset_top * sizeof(PCRE2_SIZE));
6225
0
        P->offset_top = Foffset_top;
6226
0
        P->mark = Fmark;
6227
0
        Fback_frame = (char *)F - (char *)P;
6228
0
        RRETURN(MATCH_MATCH);
6229
0
        }
6230
308k
      }
6231
182k
    else P = NULL;   /* Indicates starting frame not recorded */
6232
6233
    /* The group was not a conditional assertion. */
6234
6235
490k
    switch (*bracode)
6236
490k
      {
6237
      /* Whole pattern recursion is handled as a recursion into group 0, but
6238
      the entire pattern is wrapped in OP_BRA/OP_KET rather than a capturing
6239
      group - a design mistake: it should perhaps have been capture group 0.
6240
      Anyway, that means the end of such recursion must be handled here. It is
6241
      detected by checking for an immediately following OP_END when we are
6242
      recursing in group 0. If this is not the end of a whole-pattern
6243
      recursion, there is nothing to be done. */
6244
6245
182k
      case OP_BRA:
6246
182k
      if (Fcurrent_recurse != 0 || Fecode[1+LINK_SIZE] != OP_END) break;
6247
6248
      /* It is the end of whole-pattern recursion. */
6249
6250
0
      offset = Flast_group_offset;
6251
6252
      /* Corrupted heapframes?. Trigger an assert and return an error */
6253
0
      PCRE2_ASSERT(offset != PCRE2_UNSET);
6254
0
      if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
6255
6256
0
      N = (heapframe *)((char *)match_data->heapframes + offset);
6257
0
      P = (heapframe *)((char *)N - frame_size);
6258
0
      Flast_group_offset = P->last_group_offset;
6259
6260
      /* Reinstate the previous set of captures and then carry on after the
6261
      recursion call. */
6262
6263
0
      memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
6264
0
        Foffset_top * sizeof(PCRE2_SIZE));
6265
0
      Foffset_top = P->offset_top;
6266
0
      Fcapture_last = P->capture_last;
6267
0
      Fcurrent_recurse = P->current_recurse;
6268
0
      Fecode = P->ecode + 1 + LINK_SIZE;
6269
0
      continue;  /* With next opcode */
6270
6271
0
      case OP_COND:     /* No need to do anything for these */
6272
0
      case OP_SCOND:
6273
0
      break;
6274
6275
      /* Non-atomic positive assertions are like OP_BRA, except that the
6276
      subject pointer must be put back to where it was at the start of the
6277
      assertion. For a variable lookbehind, check its end point. */
6278
6279
0
      case OP_ASSERTBACK_NA:
6280
0
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6281
0
        RRETURN(MATCH_NOMATCH);
6282
      /* Fall through */
6283
6284
0
      case OP_ASSERT_NA:
6285
0
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6286
0
      Feptr = P->eptr;
6287
0
      break;
6288
6289
      /* Atomic positive assertions are like OP_ONCE, except that in addition
6290
      the subject pointer must be put back to where it was at the start of the
6291
      assertion. For a variable lookbehind, check its end point. */
6292
6293
249
      case OP_ASSERTBACK:
6294
249
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6295
0
        RRETURN(MATCH_NOMATCH);
6296
      /* Fall through */
6297
6298
249
      case OP_ASSERT:
6299
249
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6300
249
      Feptr = P->eptr;
6301
      /* Fall through */
6302
6303
      /* For an atomic group, discard internal backtracking points. We must
6304
      also ensure that any remaining branches within the top-level of the group
6305
      are not tried. Do this by adjusting the code pointer within the backtrack
6306
      frame so that it points to the final branch. */
6307
6308
249
      case OP_ONCE:
6309
249
      Fback_frame = ((char *)F - (char *)P);
6310
249
      for (;;)
6311
1.06k
        {
6312
1.06k
        uint32_t y = GET(P->ecode,1);
6313
1.06k
        if ((P->ecode)[y] != OP_ALT) break;
6314
819
        P->ecode += y;
6315
819
        }
6316
249
      break;
6317
6318
      /* A matching negative assertion returns MATCH, which is turned into
6319
      NOMATCH at the assertion level. For a variable lookbehind, check its end
6320
      point. */
6321
6322
0
      case OP_ASSERTBACK_NOT:
6323
0
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6324
0
        RRETURN(MATCH_NOMATCH);
6325
      /* Fall through */
6326
6327
16
      case OP_ASSERT_NOT:
6328
16
      RRETURN(MATCH_MATCH);
6329
6330
      /* A scan substring group must preserve the current end_subject,
6331
      and restore it before the backtracking is performed into its sub
6332
      pattern. */
6333
6334
0
      case OP_ASSERT_SCS:
6335
0
      F->temp_sptr[0] = mb->end_subject;
6336
0
      mb->end_subject = P->temp_sptr[0];
6337
0
      mb->true_end_subject = mb->end_subject + P->temp_size;
6338
0
      Feptr = P->temp_sptr[1];
6339
6340
0
      RMATCH(Fecode + 1 + LINK_SIZE, RM39);
6341
6342
0
      mb->end_subject = F->temp_sptr[0];
6343
0
      mb->true_end_subject = mb->end_subject;
6344
0
      RRETURN(rrc);
6345
0
      break;
6346
6347
      /* At the end of a script run, apply the script-checking rules. This code
6348
      will never by exercised if Unicode support it not compiled, because in
6349
      that environment script runs cause an error at compile time. */
6350
6351
0
      case OP_SCRIPT_RUN:
6352
0
      if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
6353
0
      break;
6354
6355
      /* Whole-pattern recursion is coded as a recurse into group 0, and is
6356
      handled with OP_BRA above. Other recursion is handled here. */
6357
6358
305k
      case OP_CBRA:
6359
305k
      case OP_CBRAPOS:
6360
306k
      case OP_SCBRA:
6361
307k
      case OP_SCBRAPOS:
6362
307k
      number = GET2(bracode, 1+LINK_SIZE);
6363
6364
      /* Handle a recursively called group. We reinstate the previous set of
6365
      captures and then carry on after the recursion call. */
6366
6367
307k
      if (Fcurrent_recurse == number)
6368
0
        {
6369
0
        P = (heapframe *)((char *)N - frame_size);
6370
0
        memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
6371
0
          Foffset_top * sizeof(PCRE2_SIZE));
6372
0
        Foffset_top = P->offset_top;
6373
0
        Fcapture_last = P->capture_last;
6374
0
        Fcurrent_recurse = P->current_recurse;
6375
0
        Fecode = P->ecode + 1 + LINK_SIZE;
6376
0
        continue;  /* With next opcode */
6377
0
        }
6378
6379
      /* Deal with actual capturing. */
6380
6381
307k
      offset = (number << 1) - 2;
6382
307k
      Fcapture_last = number;
6383
307k
      Fovector[offset] = P->eptr - mb->start_subject;
6384
307k
      Fovector[offset+1] = Feptr - mb->start_subject;
6385
307k
      if (offset >= Foffset_top) Foffset_top = offset + 2;
6386
307k
      break;
6387
490k
      }  /* End actions relating to the starting opcode */
6388
6389
    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
6390
    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
6391
    at a time from the outer level. This must precede the empty string test -
6392
    in this case that test is done at the outer level. */
6393
6394
490k
    if (*Fecode == OP_KETRPOS)
6395
1.15k
      {
6396
1.15k
      memcpy((char *)P + offsetof(heapframe, eptr),
6397
1.15k
             (char *)F + offsetof(heapframe, eptr),
6398
1.15k
             frame_copy_size);
6399
1.15k
      RRETURN(MATCH_KETRPOS);
6400
1.15k
      }
6401
6402
    /* Handle the different kinds of closing brackets. A non-repeating ket
6403
    needs no special action, just continuing at this level. This also happens
6404
    for the repeating kets if the group matched no characters, in order to
6405
    forcibly break infinite loops. Otherwise, the repeating kets try the rest
6406
    of the pattern or restart from the preceding bracket, in the appropriate
6407
    order. */
6408
6409
489k
    if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
6410
34
      {
6411
34
      if (Fop == OP_KETRMIN)
6412
0
        {
6413
0
        RMATCH(Fecode + 1 + LINK_SIZE, RM6);
6414
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6415
0
        Fecode -= GET(Fecode, 1);
6416
0
        break;   /* End of ket processing */
6417
0
        }
6418
6419
      /* Repeat the maximum number of times (KETRMAX) */
6420
6421
34
      RMATCH(bracode, RM7);
6422
34
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6423
34
      }
6424
6425
    /* Carry on at this level for a non-repeating ket, or after matching an
6426
    empty string, or after repeating for a maximum number of times. */
6427
6428
489k
    Fecode += 1 + LINK_SIZE;
6429
489k
    break;
6430
6431
6432
    /* ===================================================================== */
6433
    /* Start and end of line assertions, not multiline mode. */
6434
6435
17.8k
    case OP_CIRC:   /* Start of line, unless PCRE2_NOTBOL is set. */
6436
17.8k
    if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
6437
17.4k
      RRETURN(MATCH_NOMATCH);
6438
351
    Fecode++;
6439
351
    break;
6440
6441
205k
    case OP_SOD:    /* Unconditional start of subject */
6442
205k
    if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
6443
2
    Fecode++;
6444
2
    break;
6445
6446
    /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
6447
    terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
6448
6449
186k
    case OP_DOLL:
6450
186k
    if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6451
186k
    if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
6452
6453
    /* Fall through */
6454
    /* Unconditional end of subject assertion (\z). */
6455
6456
37.2k
    case OP_EOD:
6457
37.2k
    if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH);
6458
8
    if (mb->partial != 0)
6459
0
      {
6460
0
      mb->hitend = TRUE;
6461
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6462
0
      }
6463
8
    Fecode++;
6464
8
    break;
6465
6466
    /* End of subject or ending \n assertion (\Z) */
6467
6468
6
    case OP_EODN:
6469
186k
    ASSERT_NL_OR_EOS:
6470
186k
    if (Feptr < mb->true_end_subject &&
6471
186k
        (!IS_NEWLINE(Feptr) || Feptr != mb->true_end_subject - mb->nllen))
6472
186k
      {
6473
186k
      if (mb->partial != 0 &&
6474
186k
          Feptr + 1 >= mb->end_subject &&
6475
186k
          NLBLOCK->nltype == NLTYPE_FIXED &&
6476
186k
          NLBLOCK->nllen == 2 &&
6477
186k
          UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6478
0
        {
6479
0
        mb->hitend = TRUE;
6480
0
        if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6481
0
        }
6482
186k
      RRETURN(MATCH_NOMATCH);
6483
186k
      }
6484
6485
    /* Either at end of string or \n before end. */
6486
6487
24
    if (mb->partial != 0)
6488
0
      {
6489
0
      mb->hitend = TRUE;
6490
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6491
0
      }
6492
24
    Fecode++;
6493
24
    break;
6494
6495
6496
    /* ===================================================================== */
6497
    /* Start and end of line assertions, multiline mode. */
6498
6499
    /* Start of subject unless notbol, or after any newline except for one at
6500
    the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
6501
6502
397k
    case OP_CIRCM:
6503
397k
    if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
6504
0
      RRETURN(MATCH_NOMATCH);
6505
397k
    if (Feptr != mb->start_subject &&
6506
397k
        ((Feptr == mb->end_subject &&
6507
397k
           (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
6508
397k
         !WAS_NEWLINE(Feptr)))
6509
397k
      RRETURN(MATCH_NOMATCH);
6510
228
    Fecode++;
6511
228
    break;
6512
6513
    /* Assert before any newline, or before end of subject unless noteol is
6514
    set. */
6515
6516
9.96M
    case OP_DOLLM:
6517
9.96M
    if (Feptr < mb->end_subject)
6518
9.96M
      {
6519
9.96M
      if (!IS_NEWLINE(Feptr))
6520
9.95M
        {
6521
9.95M
        if (mb->partial != 0 &&
6522
9.95M
            Feptr + 1 >= mb->end_subject &&
6523
9.95M
            NLBLOCK->nltype == NLTYPE_FIXED &&
6524
9.95M
            NLBLOCK->nllen == 2 &&
6525
9.95M
            UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6526
0
          {
6527
0
          mb->hitend = TRUE;
6528
0
          if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6529
0
          }
6530
9.95M
        RRETURN(MATCH_NOMATCH);
6531
9.95M
        }
6532
9.96M
      }
6533
4.47k
    else
6534
4.47k
      {
6535
4.47k
      if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6536
4.47k
      SCHECK_PARTIAL();
6537
4.47k
      }
6538
9.54k
    Fecode++;
6539
9.54k
    break;
6540
6541
6542
    /* ===================================================================== */
6543
    /* Start of match assertion */
6544
6545
693
    case OP_SOM:
6546
693
    if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
6547
12
    Fecode++;
6548
12
    break;
6549
6550
6551
    /* ===================================================================== */
6552
    /* Reset the start of match point */
6553
6554
0
    case OP_SET_SOM:
6555
0
    Fstart_match = Feptr;
6556
0
    Fecode++;
6557
0
    break;
6558
6559
6560
    /* ===================================================================== */
6561
    /* Word boundary assertions. Find out if the previous and current
6562
    characters are "word" characters. It takes a bit more work in UTF mode.
6563
    Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
6564
    not set. When it is set, use Unicode properties if available, even when not
6565
    in UTF mode. Remember the earliest and latest consulted characters. */
6566
6567
112k
    case OP_NOT_WORD_BOUNDARY:
6568
117k
    case OP_WORD_BOUNDARY:
6569
133k
    case OP_NOT_UCP_WORD_BOUNDARY:
6570
137k
    case OP_UCP_WORD_BOUNDARY:
6571
137k
    if (Feptr == mb->check_subject) prev_is_word = FALSE; else
6572
137k
      {
6573
137k
      PCRE2_SPTR lastptr = Feptr - 1;
6574
137k
#ifdef SUPPORT_UNICODE
6575
137k
      if (utf)
6576
19.9k
        {
6577
19.9k
        BACKCHAR(lastptr);
6578
19.9k
        GETCHAR(fc, lastptr);
6579
19.9k
        }
6580
117k
      else
6581
117k
#endif  /* SUPPORT_UNICODE */
6582
117k
      fc = *lastptr;
6583
137k
      if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
6584
137k
#ifdef SUPPORT_UNICODE
6585
137k
      if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
6586
19.9k
        {
6587
19.9k
        int chartype = UCD_CHARTYPE(fc);
6588
19.9k
        int category = PRIV(ucp_gentype)[chartype];
6589
19.9k
        prev_is_word = (category == ucp_L || category == ucp_N ||
6590
19.9k
          chartype == ucp_Mn || chartype == ucp_Pc);
6591
19.9k
        }
6592
117k
      else
6593
117k
#endif  /* SUPPORT_UNICODE */
6594
117k
      prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6595
137k
      }
6596
6597
    /* Get status of next character */
6598
6599
137k
    if (Feptr >= mb->end_subject)
6600
84
      {
6601
84
      SCHECK_PARTIAL();
6602
84
      cur_is_word = FALSE;
6603
84
      }
6604
137k
    else
6605
137k
      {
6606
137k
      PCRE2_SPTR nextptr = Feptr + 1;
6607
137k
#ifdef SUPPORT_UNICODE
6608
137k
      if (utf)
6609
20.1k
        {
6610
20.1k
        FORWARDCHARTEST(nextptr, mb->end_subject);
6611
20.1k
        GETCHAR(fc, Feptr);
6612
20.1k
        }
6613
117k
      else
6614
117k
#endif  /* SUPPORT_UNICODE */
6615
117k
      fc = *Feptr;
6616
137k
      if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
6617
137k
#ifdef SUPPORT_UNICODE
6618
137k
      if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
6619
20.1k
        {
6620
20.1k
        int chartype = UCD_CHARTYPE(fc);
6621
20.1k
        int category = PRIV(ucp_gentype)[chartype];
6622
20.1k
        cur_is_word = (category == ucp_L || category == ucp_N ||
6623
20.1k
          chartype == ucp_Mn || chartype == ucp_Pc);
6624
20.1k
        }
6625
117k
      else
6626
117k
#endif  /* SUPPORT_UNICODE */
6627
117k
      cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6628
137k
      }
6629
6630
    /* Now see if the situation is what we want */
6631
6632
137k
    if ((*Fecode++ == OP_WORD_BOUNDARY || Fop == OP_UCP_WORD_BOUNDARY)?
6633
129k
         cur_is_word == prev_is_word : cur_is_word != prev_is_word)
6634
26.8k
      RRETURN(MATCH_NOMATCH);
6635
110k
    break;
6636
6637
6638
    /* ===================================================================== */
6639
    /* Backtracking (*VERB)s, with and without arguments. Note that if the
6640
    pattern is successfully matched, we do not come back from RMATCH. */
6641
6642
110k
    case OP_MARK:
6643
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6644
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
6645
6646
    /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
6647
    argument, and we must check whether that argument matches this MARK's
6648
    argument. It is passed back in mb->verb_skip_ptr. If it does match, we
6649
    return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
6650
    position that corresponds to this mark. Otherwise, pass back the return
6651
    code unaltered. */
6652
6653
0
    if (rrc == MATCH_SKIP_ARG &&
6654
0
             PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
6655
0
      {
6656
0
      mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6657
0
      RRETURN(MATCH_SKIP);
6658
0
      }
6659
0
    RRETURN(rrc);
6660
6661
0
    case OP_FAIL:
6662
0
    RRETURN(MATCH_NOMATCH);
6663
6664
    /* Record the current recursing group number in mb->verb_current_recurse
6665
    when a backtracking return such as MATCH_COMMIT is given. This enables the
6666
    recurse processing to catch verbs from within the recursion. */
6667
6668
0
    case OP_COMMIT:
6669
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
6670
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6671
0
    mb->verb_current_recurse = Fcurrent_recurse;
6672
0
    RRETURN(MATCH_COMMIT);
6673
6674
0
    case OP_COMMIT_ARG:
6675
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6676
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
6677
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6678
0
    mb->verb_current_recurse = Fcurrent_recurse;
6679
0
    RRETURN(MATCH_COMMIT);
6680
6681
0
    case OP_PRUNE:
6682
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
6683
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6684
0
    mb->verb_current_recurse = Fcurrent_recurse;
6685
0
    RRETURN(MATCH_PRUNE);
6686
6687
0
    case OP_PRUNE_ARG:
6688
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6689
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
6690
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6691
0
    mb->verb_current_recurse = Fcurrent_recurse;
6692
0
    RRETURN(MATCH_PRUNE);
6693
6694
0
    case OP_SKIP:
6695
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
6696
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6697
0
    mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6698
0
    mb->verb_current_recurse = Fcurrent_recurse;
6699
0
    RRETURN(MATCH_SKIP);
6700
6701
    /* Note that, for Perl compatibility, SKIP with an argument does NOT set
6702
    nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
6703
    not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
6704
    that failed and any that precede it (either they also failed, or were not
6705
    triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
6706
    SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
6707
    set to the count of the one that failed. */
6708
6709
0
    case OP_SKIP_ARG:
6710
0
    mb->skip_arg_count++;
6711
0
    if (mb->skip_arg_count <= mb->ignore_skip_arg)
6712
0
      {
6713
0
      Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
6714
0
      break;
6715
0
      }
6716
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
6717
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6718
6719
    /* Pass back the current skip name and return the special MATCH_SKIP_ARG
6720
    return code. This will either be caught by a matching MARK, or get to the
6721
    top, where it causes a rematch with mb->ignore_skip_arg set to the value of
6722
    mb->skip_arg_count. */
6723
6724
0
    mb->verb_skip_ptr = Fecode + 2;
6725
0
    mb->verb_current_recurse = Fcurrent_recurse;
6726
0
    RRETURN(MATCH_SKIP_ARG);
6727
6728
    /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
6729
    the branch in which it occurs can be determined. */
6730
6731
0
    case OP_THEN:
6732
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
6733
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6734
0
    mb->verb_ecode_ptr = Fecode;
6735
0
    mb->verb_current_recurse = Fcurrent_recurse;
6736
0
    RRETURN(MATCH_THEN);
6737
6738
0
    case OP_THEN_ARG:
6739
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6740
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
6741
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6742
0
    mb->verb_ecode_ptr = Fecode;
6743
0
    mb->verb_current_recurse = Fcurrent_recurse;
6744
0
    RRETURN(MATCH_THEN);
6745
6746
6747
    /* ===================================================================== */
6748
    /* There's been some horrible disaster. Arrival here can only mean there is
6749
    something seriously wrong in the code above or the OP_xxx definitions. */
6750
6751
0
    default:
6752
0
    PCRE2_DEBUG_UNREACHABLE();
6753
0
    return PCRE2_ERROR_INTERNAL;
6754
585M
    }
6755
6756
  /* Do not insert any code in here without much thought; it is assumed
6757
  that "continue" in the code above comes out to here to repeat the main
6758
  loop. */
6759
6760
585M
  }  /* End of main loop */
6761
6762
0
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
6763
6764
/* ========================================================================= */
6765
/* The RRETURN() macro jumps here. The number that is saved in Freturn_id
6766
indicates which label we actually want to return to. The value in Frdepth is
6767
the index number of the frame in the vector. The return value has been placed
6768
in rrc. */
6769
6770
447M
#define LBL(val) case val: goto L_RM##val;
6771
6772
447M
RETURN_SWITCH:
6773
447M
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6774
447M
if (Frdepth == 0) return rrc;                     /* Exit from the top level */
6775
447M
F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
6776
447M
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
6777
6778
#ifdef DEBUG_SHOW_RMATCH
6779
fprintf(stderr, "++ RETURN %d to RM%d\n", rrc, Freturn_id);
6780
#endif
6781
6782
447M
switch (Freturn_id)
6783
447M
  {
6784
2.46M
  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6785
505
  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
6786
4.82M
  LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
6787
9.92M
  LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
6788
73.1M
  LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39)
6789
6790
0
#ifdef SUPPORT_WIDE_CHARS
6791
238k
  LBL(100) LBL(101) LBL(102) LBL(103)
6792
0
#endif
6793
6794
0
#ifdef SUPPORT_UNICODE
6795
222k
  LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
6796
2.99M
  LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
6797
290M
  LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
6798
5.28M
  LBL(221) LBL(222) LBL(223) LBL(224)
6799
0
#endif
6800
6801
0
  default:
6802
0
  PCRE2_DEBUG_UNREACHABLE();
6803
0
  return PCRE2_ERROR_INTERNAL;
6804
447M
  }
6805
447M
#undef LBL
6806
447M
}
6807
6808
6809
/*************************************************
6810
*           Match a Regular Expression           *
6811
*************************************************/
6812
6813
/* This function applies a compiled pattern to a subject string and picks out
6814
portions of the string if it matches. Two elements in the vector are set for
6815
each substring: the offsets to the start and end of the substring.
6816
6817
Arguments:
6818
  code            points to the compiled expression
6819
  subject         points to the subject string
6820
  length          length of subject string (may contain binary zeros)
6821
  start_offset    where to start in the subject string
6822
  options         option bits
6823
  match_data      points to a match_data block
6824
  mcontext        points a PCRE2 context
6825
6826
Returns:          > 0 => success; value is the number of ovector pairs filled
6827
                  = 0 => success, but ovector is not big enough
6828
                  = -1 => failed to match (PCRE2_ERROR_NOMATCH)
6829
                  = -2 => partial match (PCRE2_ERROR_PARTIAL)
6830
                  < -2 => some kind of unexpected problem
6831
*/
6832
6833
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
6834
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6835
  PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6836
  pcre2_match_context *mcontext)
6837
145k
{
6838
145k
int rc;
6839
145k
int was_zero_terminated = 0;
6840
145k
const uint8_t *start_bits = NULL;
6841
145k
const pcre2_real_code *re = (const pcre2_real_code *)code;
6842
6843
145k
BOOL anchored;
6844
145k
BOOL firstline;
6845
145k
BOOL has_first_cu = FALSE;
6846
145k
BOOL has_req_cu = FALSE;
6847
145k
BOOL startline;
6848
6849
145k
#if PCRE2_CODE_UNIT_WIDTH == 8
6850
145k
PCRE2_SPTR memchr_found_first_cu;
6851
145k
PCRE2_SPTR memchr_found_first_cu2;
6852
145k
#endif
6853
6854
145k
PCRE2_UCHAR first_cu = 0;
6855
145k
PCRE2_UCHAR first_cu2 = 0;
6856
145k
PCRE2_UCHAR req_cu = 0;
6857
145k
PCRE2_UCHAR req_cu2 = 0;
6858
6859
145k
PCRE2_SPTR bumpalong_limit;
6860
145k
PCRE2_SPTR end_subject;
6861
145k
PCRE2_SPTR true_end_subject;
6862
145k
PCRE2_SPTR start_match;
6863
145k
PCRE2_SPTR req_cu_ptr;
6864
145k
PCRE2_SPTR start_partial;
6865
145k
PCRE2_SPTR match_partial;
6866
6867
#ifdef SUPPORT_JIT
6868
BOOL use_jit;
6869
#endif
6870
6871
/* This flag is needed even when Unicode is not supported for convenience
6872
(it is used by the IS_NEWLINE macro). */
6873
6874
145k
BOOL utf = FALSE;
6875
6876
145k
#ifdef SUPPORT_UNICODE
6877
145k
BOOL ucp = FALSE;
6878
145k
BOOL allow_invalid;
6879
145k
uint32_t fragment_options = 0;
6880
#ifdef SUPPORT_JIT
6881
BOOL jit_checked_utf = FALSE;
6882
#endif
6883
145k
#endif  /* SUPPORT_UNICODE */
6884
6885
145k
PCRE2_SIZE frame_size;
6886
145k
PCRE2_SIZE heapframes_size;
6887
6888
/* We need to have mb as a pointer to a match block, because the IS_NEWLINE
6889
macro is used below, and it expects NLBLOCK to be defined as a pointer. */
6890
6891
145k
pcre2_callout_block cb;
6892
145k
match_block actual_match_block;
6893
145k
match_block *mb = &actual_match_block;
6894
6895
/* Recognize NULL, length 0 as an empty string. */
6896
6897
145k
if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
6898
6899
/* Plausibility checks */
6900
6901
145k
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
6902
145k
if (code == NULL || subject == NULL || match_data == NULL)
6903
0
  return PCRE2_ERROR_NULL;
6904
6905
145k
start_match = subject + start_offset;
6906
145k
req_cu_ptr = start_match - 1;
6907
145k
if (length == PCRE2_ZERO_TERMINATED)
6908
0
  {
6909
0
  length = PRIV(strlen)(subject);
6910
0
  was_zero_terminated = 1;
6911
0
  }
6912
145k
true_end_subject = end_subject = subject + length;
6913
6914
145k
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
6915
6916
/* Check that the first field in the block is the magic number. */
6917
6918
145k
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
6919
6920
/* Check the code unit width. */
6921
6922
145k
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
6923
0
  return PCRE2_ERROR_BADMODE;
6924
6925
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
6926
options variable for this function. Users of PCRE2 who are not calling the
6927
function directly would like to have a way of setting these flags, in the same
6928
way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with
6929
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
6930
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
6931
transfer to the options for this function. The bits are guaranteed to be
6932
adjacent, but do not have the same values. This bit of Boolean trickery assumes
6933
that the match-time bits are not more significant than the flag bits. If by
6934
accident this is not the case, a compile-time division by zero error will
6935
occur. */
6936
6937
435k
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
6938
290k
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
6939
145k
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
6940
145k
#undef FF
6941
145k
#undef OO
6942
6943
/* If the pattern was successfully studied with JIT support, we will run the
6944
JIT executable instead of the rest of this function. Most options must be set
6945
at compile time for the JIT code to be usable. */
6946
6947
#ifdef SUPPORT_JIT
6948
use_jit = (re->executable_jit != NULL &&
6949
          (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
6950
#endif
6951
6952
/* Initialize UTF/UCP parameters. */
6953
6954
145k
#ifdef SUPPORT_UNICODE
6955
145k
utf = (re->overall_options & PCRE2_UTF) != 0;
6956
145k
allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
6957
145k
ucp = (re->overall_options & PCRE2_UCP) != 0;
6958
145k
#endif  /* SUPPORT_UNICODE */
6959
6960
/* Convert the partial matching flags into an integer. */
6961
6962
145k
mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
6963
145k
              ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
6964
6965
/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
6966
time. */
6967
6968
145k
if (mb->partial != 0 &&
6969
145k
   ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
6970
0
  return PCRE2_ERROR_BADOPTION;
6971
6972
/* It is an error to set an offset limit without setting the flag at compile
6973
time. */
6974
6975
145k
if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
6976
145k
     (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
6977
0
  return PCRE2_ERROR_BADOFFSETLIMIT;
6978
6979
/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
6980
free the memory that was obtained. Set the field to NULL for no match cases. */
6981
6982
145k
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
6983
0
  {
6984
0
  match_data->memctl.free((void *)match_data->subject,
6985
0
    match_data->memctl.memory_data);
6986
0
  match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
6987
0
  }
6988
145k
match_data->subject = NULL;
6989
6990
/* Zero the error offset in case the first code unit is invalid UTF. */
6991
6992
145k
match_data->startchar = 0;
6993
6994
6995
/* ============================= JIT matching ============================== */
6996
6997
/* Prepare for JIT matching. Check a UTF string for validity unless no check is
6998
requested or invalid UTF can be handled. We check only the portion of the
6999
subject that might be be inspected during matching - from the offset minus the
7000
maximum lookbehind to the given length. This saves time when a small part of a
7001
large subject is being matched by the use of a starting offset. Note that the
7002
maximum lookbehind is a number of characters, not code units. */
7003
7004
#ifdef SUPPORT_JIT
7005
if (use_jit)
7006
  {
7007
#ifdef SUPPORT_UNICODE
7008
  if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
7009
    {
7010
7011
    /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
7012
    character start. */
7013
7014
#if PCRE2_CODE_UNIT_WIDTH != 32
7015
    if (start_match < end_subject && NOT_FIRSTCU(*start_match))
7016
      {
7017
      if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
7018
#if PCRE2_CODE_UNIT_WIDTH == 8
7019
      return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
7020
#else
7021
      return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
7022
#endif
7023
      }
7024
#endif  /* WIDTH != 32 */
7025
7026
    /* Move back by the maximum lookbehind, just in case it happens at the very
7027
    start of matching. */
7028
7029
#if PCRE2_CODE_UNIT_WIDTH != 32
7030
    for (unsigned int i = re->max_lookbehind; i > 0 && start_match > subject; i--)
7031
      {
7032
      start_match--;
7033
      while (start_match > subject &&
7034
#if PCRE2_CODE_UNIT_WIDTH == 8
7035
      (*start_match & 0xc0) == 0x80)
7036
#else  /* 16-bit */
7037
      (*start_match & 0xfc00) == 0xdc00)
7038
#endif
7039
        start_match--;
7040
      }
7041
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7042
7043
    /* In the 32-bit library, one code unit equals one character. However,
7044
    we cannot just subtract the lookbehind and then compare pointers, because
7045
    a very large lookbehind could create an invalid pointer. */
7046
7047
    if (start_offset >= re->max_lookbehind)
7048
      start_match -= re->max_lookbehind;
7049
    else
7050
      start_match = subject;
7051
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7052
7053
    /* Validate the relevant portion of the subject. Adjust the offset of an
7054
    invalid code point to be an absolute offset in the whole string. */
7055
7056
    match_data->rc = PRIV(valid_utf)(start_match,
7057
      length - (start_match - subject), &(match_data->startchar));
7058
    if (match_data->rc != 0)
7059
      {
7060
      match_data->startchar += start_match - subject;
7061
      return match_data->rc;
7062
      }
7063
    jit_checked_utf = TRUE;
7064
    }
7065
#endif  /* SUPPORT_UNICODE */
7066
7067
  /* If JIT returns BADOPTION, which means that the selected complete or
7068
  partial matching mode was not compiled, fall through to the interpreter. */
7069
7070
  rc = pcre2_jit_match(code, subject, length, start_offset, options,
7071
    match_data, mcontext);
7072
  if (rc != PCRE2_ERROR_JIT_BADOPTION)
7073
    {
7074
    match_data->subject_length = length;
7075
    if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
7076
      {
7077
      length = CU2BYTES(length + was_zero_terminated);
7078
      match_data->subject = match_data->memctl.malloc(length,
7079
        match_data->memctl.memory_data);
7080
      if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
7081
      memcpy((void *)match_data->subject, subject, length);
7082
      match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
7083
      }
7084
    return rc;
7085
    }
7086
  }
7087
#endif  /* SUPPORT_JIT */
7088
7089
/* ========================= End of JIT matching ========================== */
7090
7091
7092
/* Proceed with non-JIT matching. The default is to allow lookbehinds to the
7093
start of the subject. A UTF check when there is a non-zero offset may change
7094
this. */
7095
7096
145k
mb->check_subject = subject;
7097
7098
/* If a UTF subject string was not checked for validity in the JIT code above,
7099
check it here, and handle support for invalid UTF strings. The check above
7100
happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
7101
If we get here in those circumstances, it means the subject string is valid,
7102
but for some reason JIT matching was not successful. There is no need to check
7103
the subject again.
7104
7105
We check only the portion of the subject that might be be inspected during
7106
matching - from the offset minus the maximum lookbehind to the given length.
7107
This saves time when a small part of a large subject is being matched by the
7108
use of a starting offset. Note that the maximum lookbehind is a number of
7109
characters, not code units.
7110
7111
Note also that support for invalid UTF forces a check, overriding the setting
7112
of PCRE2_NO_CHECK_UTF. */
7113
7114
145k
#ifdef SUPPORT_UNICODE
7115
145k
if (utf &&
7116
#ifdef SUPPORT_JIT
7117
    !jit_checked_utf &&
7118
#endif
7119
145k
    ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
7120
838
  {
7121
838
#if PCRE2_CODE_UNIT_WIDTH != 32
7122
838
  BOOL skipped_bad_start = FALSE;
7123
838
#endif
7124
7125
  /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
7126
  character start. If we are handling invalid UTF, just skip over such code
7127
  units. Otherwise, give an appropriate error. */
7128
7129
838
#if PCRE2_CODE_UNIT_WIDTH != 32
7130
838
  if (allow_invalid)
7131
0
    {
7132
0
    while (start_match < end_subject && NOT_FIRSTCU(*start_match))
7133
0
      {
7134
0
      start_match++;
7135
0
      skipped_bad_start = TRUE;
7136
0
      }
7137
0
    }
7138
838
  else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
7139
0
    {
7140
0
    if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
7141
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7142
0
    return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
7143
#else
7144
    return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
7145
#endif
7146
0
    }
7147
838
#endif  /* WIDTH != 32 */
7148
7149
  /* The mb->check_subject field points to the start of UTF checking;
7150
  lookbehinds can go back no further than this. */
7151
7152
838
  mb->check_subject = start_match;
7153
7154
  /* Move back by the maximum lookbehind, just in case it happens at the very
7155
  start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
7156
  units above. */
7157
7158
838
#if PCRE2_CODE_UNIT_WIDTH != 32
7159
838
  if (!skipped_bad_start)
7160
838
    {
7161
838
    unsigned int i;
7162
838
    for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
7163
0
      {
7164
0
      mb->check_subject--;
7165
0
      while (mb->check_subject > subject &&
7166
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7167
0
      (*mb->check_subject & 0xc0) == 0x80)
7168
#else  /* 16-bit */
7169
      (*mb->check_subject & 0xfc00) == 0xdc00)
7170
#endif
7171
0
        mb->check_subject--;
7172
0
      }
7173
838
    }
7174
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7175
7176
  /* In the 32-bit library, one code unit equals one character. However,
7177
  we cannot just subtract the lookbehind and then compare pointers, because
7178
  a very large lookbehind could create an invalid pointer. */
7179
7180
  if (start_offset >= re->max_lookbehind)
7181
    mb->check_subject -= re->max_lookbehind;
7182
  else
7183
    mb->check_subject = subject;
7184
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7185
7186
  /* Validate the relevant portion of the subject. There's a loop in case we
7187
  encounter bad UTF in the characters preceding start_match which we are
7188
  scanning because of a lookbehind. */
7189
7190
838
  for (;;)
7191
838
    {
7192
838
    match_data->rc = PRIV(valid_utf)(mb->check_subject,
7193
838
      length - (mb->check_subject - subject), &(match_data->startchar));
7194
7195
838
    if (match_data->rc == 0) break;   /* Valid UTF string */
7196
7197
    /* Invalid UTF string. Adjust the offset to be an absolute offset in the
7198
    whole string. If we are handling invalid UTF strings, set end_subject to
7199
    stop before the bad code unit, and set the options to "not end of line".
7200
    Otherwise return the error. */
7201
7202
129
    match_data->startchar += mb->check_subject - subject;
7203
129
    if (!allow_invalid || match_data->rc > 0) return match_data->rc;
7204
0
    end_subject = subject + match_data->startchar;
7205
7206
    /* If the end precedes start_match, it means there is invalid UTF in the
7207
    extra code units we reversed over because of a lookbehind. Advance past the
7208
    first bad code unit, and then skip invalid character starting code units in
7209
    8-bit and 16-bit modes, and try again with the original end point. */
7210
7211
0
    if (end_subject < start_match)
7212
0
      {
7213
0
      mb->check_subject = end_subject + 1;
7214
0
#if PCRE2_CODE_UNIT_WIDTH != 32
7215
0
      while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
7216
0
        mb->check_subject++;
7217
0
#endif
7218
0
      end_subject = true_end_subject;
7219
0
      }
7220
7221
    /* Otherwise, set the not end of line option, and do the match. */
7222
7223
0
    else
7224
0
      {
7225
0
      fragment_options = PCRE2_NOTEOL;
7226
0
      break;
7227
0
      }
7228
0
    }
7229
838
  }
7230
144k
#endif  /* SUPPORT_UNICODE */
7231
7232
/* A NULL match context means "use a default context", but we take the memory
7233
control functions from the pattern. */
7234
7235
144k
if (mcontext == NULL)
7236
0
  {
7237
0
  mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
7238
0
  mb->memctl = re->memctl;
7239
0
  }
7240
144k
else mb->memctl = mcontext->memctl;
7241
7242
144k
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
7243
144k
firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0;
7244
144k
startline = (re->flags & PCRE2_STARTLINE) != 0;
7245
144k
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
7246
144k
  true_end_subject : subject + mcontext->offset_limit;
7247
7248
/* Initialize and set up the fixed fields in the callout block, with a pointer
7249
in the match block. */
7250
7251
144k
mb->cb = &cb;
7252
144k
cb.version = 2;
7253
144k
cb.subject = subject;
7254
144k
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
7255
144k
cb.callout_flags = 0;
7256
7257
/* Fill in the remaining fields in the match block, except for moptions, which
7258
gets set later. */
7259
7260
144k
mb->callout = mcontext->callout;
7261
144k
mb->callout_data = mcontext->callout_data;
7262
7263
144k
mb->start_subject = subject;
7264
144k
mb->start_offset = start_offset;
7265
144k
mb->end_subject = end_subject;
7266
144k
mb->true_end_subject = true_end_subject;
7267
144k
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
7268
144k
mb->allowemptypartial = (re->max_lookbehind > 0) ||
7269
144k
    (re->flags & PCRE2_MATCH_EMPTY) != 0;
7270
144k
mb->poptions = re->overall_options;          /* Pattern options */
7271
144k
mb->ignore_skip_arg = 0;
7272
144k
mb->mark = mb->nomatch_mark = NULL;          /* In case never set */
7273
7274
/* The name table is needed for finding all the numbers associated with a
7275
given name, for condition testing. The code follows the name table. */
7276
7277
144k
mb->name_table = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code));
7278
144k
mb->name_count = re->name_count;
7279
144k
mb->name_entry_size = re->name_entry_size;
7280
144k
mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start);
7281
7282
/* Process the \R and newline settings. */
7283
7284
144k
mb->bsr_convention = re->bsr_convention;
7285
144k
mb->nltype = NLTYPE_FIXED;
7286
144k
switch(re->newline_convention)
7287
144k
  {
7288
0
  case PCRE2_NEWLINE_CR:
7289
0
  mb->nllen = 1;
7290
0
  mb->nl[0] = CHAR_CR;
7291
0
  break;
7292
7293
144k
  case PCRE2_NEWLINE_LF:
7294
144k
  mb->nllen = 1;
7295
144k
  mb->nl[0] = CHAR_NL;
7296
144k
  break;
7297
7298
0
  case PCRE2_NEWLINE_NUL:
7299
0
  mb->nllen = 1;
7300
0
  mb->nl[0] = CHAR_NUL;
7301
0
  break;
7302
7303
0
  case PCRE2_NEWLINE_CRLF:
7304
0
  mb->nllen = 2;
7305
0
  mb->nl[0] = CHAR_CR;
7306
0
  mb->nl[1] = CHAR_NL;
7307
0
  break;
7308
7309
0
  case PCRE2_NEWLINE_ANY:
7310
0
  mb->nltype = NLTYPE_ANY;
7311
0
  break;
7312
7313
0
  case PCRE2_NEWLINE_ANYCRLF:
7314
0
  mb->nltype = NLTYPE_ANYCRLF;
7315
0
  break;
7316
7317
0
  default:
7318
0
  PCRE2_DEBUG_UNREACHABLE();
7319
0
  return PCRE2_ERROR_INTERNAL;
7320
144k
  }
7321
7322
/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
7323
vector at the end, whose size depends on the number of capturing parentheses in
7324
the pattern. It is not used at all if there are no capturing parentheses.
7325
7326
  frame_size                   is the total size of each frame
7327
  match_data->heapframes       is the pointer to the frames vector
7328
  match_data->heapframes_size  is the allocated size of the vector
7329
7330
We must pad the frame_size for alignment to ensure subsequent frames are as
7331
aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
7332
array, that does not guarantee it is suitably aligned for pointers, as some
7333
architectures have pointers that are larger than a size_t. */
7334
7335
144k
frame_size = (offsetof(heapframe, ovector) +
7336
144k
  re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) &
7337
144k
  ~(HEAPFRAME_ALIGNMENT - 1);
7338
7339
/* Limits set in the pattern override the match context only if they are
7340
smaller. */
7341
7342
144k
mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)?
7343
144k
  mcontext->heap_limit : re->limit_heap);
7344
7345
144k
mb->match_limit = (mcontext->match_limit < re->limit_match)?
7346
144k
  mcontext->match_limit : re->limit_match;
7347
7348
144k
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
7349
144k
  mcontext->depth_limit : re->limit_depth;
7350
7351
/* If a pattern has very many capturing parentheses, the frame size may be very
7352
large. Set the initial frame vector size to ensure that there are at least 10
7353
available frames, but enforce a minimum of START_FRAMES_SIZE. If this is
7354
greater than the heap limit, get as large a vector as possible. */
7355
7356
144k
heapframes_size = frame_size * 10;
7357
144k
if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE;
7358
144k
if (heapframes_size / 1024 > mb->heap_limit)
7359
0
  {
7360
0
  PCRE2_SIZE max_size = 1024 * mb->heap_limit;
7361
0
  if (max_size < frame_size) return PCRE2_ERROR_HEAPLIMIT;
7362
0
  heapframes_size = max_size;
7363
0
  }
7364
7365
/* If an existing frame vector in the match_data block is large enough, we can
7366
use it. Otherwise, free any pre-existing vector and get a new one. */
7367
7368
144k
if (match_data->heapframes_size < heapframes_size)
7369
177
  {
7370
177
  match_data->memctl.free(match_data->heapframes,
7371
177
    match_data->memctl.memory_data);
7372
177
  match_data->heapframes = match_data->memctl.malloc(heapframes_size,
7373
177
    match_data->memctl.memory_data);
7374
177
  if (match_data->heapframes == NULL)
7375
0
    {
7376
0
    match_data->heapframes_size = 0;
7377
0
    return PCRE2_ERROR_NOMEMORY;
7378
0
    }
7379
177
  match_data->heapframes_size = heapframes_size;
7380
177
  }
7381
7382
/* Write to the ovector within the first frame to mark every capture unset and
7383
to avoid uninitialized memory read errors when it is copied to a new frame. */
7384
7385
144k
memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff,
7386
144k
  frame_size - offsetof(heapframe, ovector));
7387
7388
/* Pointers to the individual character tables */
7389
7390
144k
mb->lcc = re->tables + lcc_offset;
7391
144k
mb->fcc = re->tables + fcc_offset;
7392
144k
mb->ctypes = re->tables + ctypes_offset;
7393
7394
/* Set up the first code unit to match, if available. If there's no first code
7395
unit there may be a bitmap of possible first characters. */
7396
7397
144k
if ((re->flags & PCRE2_FIRSTSET) != 0)
7398
692
  {
7399
692
  has_first_cu = TRUE;
7400
692
  first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
7401
692
  if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
7402
2
    {
7403
2
    first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
7404
2
#ifdef SUPPORT_UNICODE
7405
2
#if PCRE2_CODE_UNIT_WIDTH == 8
7406
2
    if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
7407
#else
7408
    if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
7409
#endif
7410
2
#endif  /* SUPPORT_UNICODE */
7411
2
    }
7412
692
  }
7413
144k
else
7414
144k
  if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
7415
1.34k
    start_bits = re->start_bitmap;
7416
7417
/* There may also be a "last known required character" set. */
7418
7419
144k
if ((re->flags & PCRE2_LASTSET) != 0)
7420
606
  {
7421
606
  has_req_cu = TRUE;
7422
606
  req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
7423
606
  if ((re->flags & PCRE2_LASTCASELESS) != 0)
7424
33
    {
7425
33
    req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
7426
33
#ifdef SUPPORT_UNICODE
7427
33
#if PCRE2_CODE_UNIT_WIDTH == 8
7428
33
    if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
7429
#else
7430
    if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
7431
#endif
7432
33
#endif  /* SUPPORT_UNICODE */
7433
33
    }
7434
606
  }
7435
7436
7437
/* ==========================================================================*/
7438
7439
/* Loop for handling unanchored repeated matching attempts; for anchored regexs
7440
the loop runs just once. */
7441
7442
144k
#ifdef SUPPORT_UNICODE
7443
144k
FRAGMENT_RESTART:
7444
144k
#endif
7445
7446
144k
start_partial = match_partial = NULL;
7447
144k
mb->hitend = FALSE;
7448
7449
144k
#if PCRE2_CODE_UNIT_WIDTH == 8
7450
144k
memchr_found_first_cu = NULL;
7451
144k
memchr_found_first_cu2 = NULL;
7452
144k
#endif
7453
7454
144k
for(;;)
7455
465k
  {
7456
465k
  PCRE2_SPTR new_start_match;
7457
7458
  /* ----------------- Start of match optimizations ---------------- */
7459
7460
  /* There are some optimizations that avoid running the match if a known
7461
  starting point is not found, or if a known later code unit is not present.
7462
  However, there is an option (settable at compile time) that disables these,
7463
  for testing and for ensuring that all callouts do actually occur. */
7464
7465
465k
  if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
7466
465k
    {
7467
    /* If firstline is TRUE, the start of the match is constrained to the first
7468
    line of a multiline string. That is, the match must be before or at the
7469
    first newline following the start of matching. Temporarily adjust
7470
    end_subject so that we stop the scans for a first code unit at a newline.
7471
    If the match fails at the newline, later code breaks the loop. */
7472
7473
465k
    if (firstline)
7474
0
      {
7475
0
      PCRE2_SPTR t = start_match;
7476
0
#ifdef SUPPORT_UNICODE
7477
0
      if (utf)
7478
0
        {
7479
0
        while (t < end_subject && !IS_NEWLINE(t))
7480
0
          {
7481
0
          t++;
7482
0
          ACROSSCHAR(t < end_subject, t, t++);
7483
0
          }
7484
0
        }
7485
0
      else
7486
0
#endif
7487
0
      while (t < end_subject && !IS_NEWLINE(t)) t++;
7488
0
      end_subject = t;
7489
0
      }
7490
7491
    /* Anchored: check the first code unit if one is recorded. This may seem
7492
    pointless but it can help in detecting a no match case without scanning for
7493
    the required code unit. */
7494
7495
465k
    if (anchored)
7496
69.9k
      {
7497
69.9k
      if (has_first_cu || start_bits != NULL)
7498
57
        {
7499
57
        BOOL ok = start_match < end_subject;
7500
57
        if (ok)
7501
55
          {
7502
55
          PCRE2_UCHAR c = UCHAR21TEST(start_match);
7503
55
          ok = has_first_cu && (c == first_cu || c == first_cu2);
7504
55
          if (!ok && start_bits != NULL)
7505
40
            {
7506
#if PCRE2_CODE_UNIT_WIDTH != 8
7507
            if (c > 255) c = 255;
7508
#endif
7509
40
            ok = (start_bits[c/8] & (1u << (c&7))) != 0;
7510
40
            }
7511
55
          }
7512
57
        if (!ok)
7513
23
          {
7514
23
          rc = MATCH_NOMATCH;
7515
23
          break;
7516
23
          }
7517
57
        }
7518
69.9k
      }
7519
7520
    /* Not anchored. Advance to a unique first code unit if there is one. */
7521
7522
395k
    else
7523
395k
      {
7524
395k
      if (has_first_cu)
7525
1.26k
        {
7526
1.26k
        if (first_cu != first_cu2)  /* Caseless */
7527
2
          {
7528
          /* In 16-bit and 32_bit modes we have to do our own search, so can
7529
          look for both cases at once. */
7530
7531
#if PCRE2_CODE_UNIT_WIDTH != 8
7532
          PCRE2_UCHAR smc;
7533
          while (start_match < end_subject &&
7534
                (smc = UCHAR21TEST(start_match)) != first_cu &&
7535
                 smc != first_cu2)
7536
            start_match++;
7537
#else
7538
          /* In 8-bit mode, the use of memchr() gives a big speed up, even
7539
          though we have to call it twice in order to find the earliest
7540
          occurrence of the code unit in either of its cases. Caching is used
7541
          to remember the positions of previously found code units. This can
7542
          make a huge difference when the strings are very long and only one
7543
          case is actually present. */
7544
7545
2
          PCRE2_SPTR pp1 = NULL;
7546
2
          PCRE2_SPTR pp2 = NULL;
7547
2
          PCRE2_SIZE searchlength = end_subject - start_match;
7548
7549
          /* If we haven't got a previously found position for first_cu, or if
7550
          the current starting position is later, we need to do a search. If
7551
          the code unit is not found, set it to the end. */
7552
7553
2
          if (memchr_found_first_cu == NULL ||
7554
2
              start_match > memchr_found_first_cu)
7555
2
            {
7556
2
            pp1 = memchr(start_match, first_cu, searchlength);
7557
2
            memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
7558
2
            }
7559
7560
          /* If the start is before a previously found position, use the
7561
          previous position, or NULL if a previous search failed. */
7562
7563
0
          else pp1 = (memchr_found_first_cu == end_subject)? NULL :
7564
0
            memchr_found_first_cu;
7565
7566
          /* Do the same thing for the other case. */
7567
7568
2
          if (memchr_found_first_cu2 == NULL ||
7569
2
              start_match > memchr_found_first_cu2)
7570
2
            {
7571
2
            pp2 = memchr(start_match, first_cu2, searchlength);
7572
2
            memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
7573
2
            }
7574
7575
0
          else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
7576
0
            memchr_found_first_cu2;
7577
7578
          /* Set the start to the end of the subject if neither case was found.
7579
          Otherwise, use the earlier found point. */
7580
7581
2
          if (pp1 == NULL)
7582
2
            start_match = (pp2 == NULL)? end_subject : pp2;
7583
0
          else
7584
0
            start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
7585
7586
2
#endif  /* 8-bit handling */
7587
2
          }
7588
7589
        /* The caseful case is much simpler. */
7590
7591
1.26k
        else
7592
1.26k
          {
7593
#if PCRE2_CODE_UNIT_WIDTH != 8
7594
          while (start_match < end_subject && UCHAR21TEST(start_match) !=
7595
                 first_cu)
7596
            start_match++;
7597
#else
7598
1.26k
          start_match = memchr(start_match, first_cu, end_subject - start_match);
7599
1.26k
          if (start_match == NULL) start_match = end_subject;
7600
1.26k
#endif
7601
1.26k
          }
7602
7603
        /* If we can't find the required first code unit, having reached the
7604
        true end of the subject, break the bumpalong loop, to force a match
7605
        failure, except when doing partial matching, when we let the next cycle
7606
        run at the end of the subject. To see why, consider the pattern
7607
        /(?<=abc)def/, which partially matches "abc", even though the string
7608
        does not contain the starting character "d". If we have not reached the
7609
        true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
7610
        temporarily modified) we also let the cycle run, because the matching
7611
        string is legitimately allowed to start with the first code unit of a
7612
        newline. */
7613
7614
1.26k
        if (mb->partial == 0 && start_match >= mb->end_subject)
7615
281
          {
7616
281
          rc = MATCH_NOMATCH;
7617
281
          break;
7618
281
          }
7619
1.26k
        }
7620
7621
      /* If there's no first code unit, advance to just after a linebreak for a
7622
      multiline match if required. */
7623
7624
394k
      else if (startline)
7625
0
        {
7626
0
        if (start_match > mb->start_subject + start_offset)
7627
0
          {
7628
0
#ifdef SUPPORT_UNICODE
7629
0
          if (utf)
7630
0
            {
7631
0
            while (start_match < end_subject && !WAS_NEWLINE(start_match))
7632
0
              {
7633
0
              start_match++;
7634
0
              ACROSSCHAR(start_match < end_subject, start_match, start_match++);
7635
0
              }
7636
0
            }
7637
0
          else
7638
0
#endif
7639
0
          while (start_match < end_subject && !WAS_NEWLINE(start_match))
7640
0
            start_match++;
7641
7642
          /* If we have just passed a CR and the newline option is ANY or
7643
          ANYCRLF, and we are now at a LF, advance the match position by one
7644
          more code unit. */
7645
7646
0
          if (start_match[-1] == CHAR_CR &&
7647
0
               (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
7648
0
               start_match < end_subject &&
7649
0
               UCHAR21TEST(start_match) == CHAR_NL)
7650
0
            start_match++;
7651
0
          }
7652
0
        }
7653
7654
      /* If there's no first code unit or a requirement for a multiline line
7655
      start, advance to a non-unique first code unit if any have been
7656
      identified. The bitmap contains only 256 bits. When code units are 16 or
7657
      32 bits wide, all code units greater than 254 set the 255 bit. */
7658
7659
394k
      else if (start_bits != NULL)
7660
49.1k
        {
7661
100k
        while (start_match < end_subject)
7662
99.7k
          {
7663
99.7k
          uint32_t c = UCHAR21TEST(start_match);
7664
#if PCRE2_CODE_UNIT_WIDTH != 8
7665
          if (c > 255) c = 255;
7666
#endif
7667
99.7k
          if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
7668
51.0k
          start_match++;
7669
51.0k
          }
7670
7671
        /* See comment above in first_cu checking about the next few lines. */
7672
7673
49.1k
        if (mb->partial == 0 && start_match >= mb->end_subject)
7674
554
          {
7675
554
          rc = MATCH_NOMATCH;
7676
554
          break;
7677
554
          }
7678
49.1k
        }
7679
395k
      }   /* End first code unit handling */
7680
7681
    /* Restore fudged end_subject */
7682
7683
464k
    end_subject = mb->end_subject;
7684
7685
    /* The following two optimizations must be disabled for partial matching. */
7686
7687
464k
    if (mb->partial == 0)
7688
464k
      {
7689
464k
      PCRE2_SPTR p;
7690
7691
      /* The minimum matching length is a lower bound; no string of that length
7692
      may actually match the pattern. Although the value is, strictly, in
7693
      characters, we treat it as code units to avoid spending too much time in
7694
      this optimization. */
7695
7696
464k
      if (end_subject - start_match < re->minlength)
7697
1.82k
        {
7698
1.82k
        rc = MATCH_NOMATCH;
7699
1.82k
        break;
7700
1.82k
        }
7701
7702
      /* If req_cu is set, we know that that code unit must appear in the
7703
      subject for the (non-partial) match to succeed. If the first code unit is
7704
      set, req_cu must be later in the subject; otherwise the test starts at
7705
      the match point. This optimization can save a huge amount of backtracking
7706
      in patterns with nested unlimited repeats that aren't going to match.
7707
      Writing separate code for caseful/caseless versions makes it go faster,
7708
      as does using an autoincrement and backing off on a match. As in the case
7709
      of the first code unit, using memchr() in the 8-bit library gives a big
7710
      speed up. Unlike the first_cu check above, we do not need to call
7711
      memchr() twice in the caseless case because we only need to check for the
7712
      presence of the character in either case, not find the first occurrence.
7713
7714
      The search can be skipped if the code unit was found later than the
7715
      current starting point in a previous iteration of the bumpalong loop.
7716
7717
      HOWEVER: when the subject string is very, very long, searching to its end
7718
      can take a long time, and give bad performance on quite ordinary
7719
      anchored patterns. This showed up when somebody was matching something
7720
      like /^\d+C/ on a 32-megabyte string... so we don't do this when the
7721
      string is sufficiently long, but it's worth searching a lot more for
7722
      unanchored patterns. */
7723
7724
462k
      p = start_match + (has_first_cu? 1:0);
7725
462k
      if (has_req_cu && p > req_cu_ptr)
7726
1.52k
        {
7727
1.52k
        PCRE2_SIZE check_length = end_subject - start_match;
7728
7729
1.52k
        if (check_length < REQ_CU_MAX ||
7730
1.52k
              (!anchored && check_length < REQ_CU_MAX * 1000))
7731
1.52k
          {
7732
1.52k
          if (req_cu != req_cu2)  /* Caseless */
7733
15
            {
7734
#if PCRE2_CODE_UNIT_WIDTH != 8
7735
            while (p < end_subject)
7736
              {
7737
              uint32_t pp = UCHAR21INCTEST(p);
7738
              if (pp == req_cu || pp == req_cu2) { p--; break; }
7739
              }
7740
#else  /* 8-bit code units */
7741
15
            PCRE2_SPTR pp = p;
7742
15
            p = memchr(pp, req_cu, end_subject - pp);
7743
15
            if (p == NULL)
7744
12
              {
7745
12
              p = memchr(pp, req_cu2, end_subject - pp);
7746
12
              if (p == NULL) p = end_subject;
7747
12
              }
7748
15
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
7749
15
            }
7750
7751
          /* The caseful case */
7752
7753
1.50k
          else
7754
1.50k
            {
7755
#if PCRE2_CODE_UNIT_WIDTH != 8
7756
            while (p < end_subject)
7757
              {
7758
              if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
7759
              }
7760
7761
#else  /* 8-bit code units */
7762
1.50k
            p = memchr(p, req_cu, end_subject - p);
7763
1.50k
            if (p == NULL) p = end_subject;
7764
1.50k
#endif
7765
1.50k
            }
7766
7767
          /* If we can't find the required code unit, break the bumpalong loop,
7768
          forcing a match failure. */
7769
7770
1.52k
          if (p >= end_subject)
7771
177
            {
7772
177
            rc = MATCH_NOMATCH;
7773
177
            break;
7774
177
            }
7775
7776
          /* If we have found the required code unit, save the point where we
7777
          found it, so that we don't search again next time round the bumpalong
7778
          loop if the start hasn't yet passed this code unit. */
7779
7780
1.34k
          req_cu_ptr = p;
7781
1.34k
          }
7782
1.52k
        }
7783
462k
      }
7784
464k
    }
7785
7786
  /* ------------ End of start of match optimizations ------------ */
7787
7788
  /* Give no match if we have passed the bumpalong limit. */
7789
7790
462k
  if (start_match > bumpalong_limit)
7791
0
    {
7792
0
    rc = MATCH_NOMATCH;
7793
0
    break;
7794
0
    }
7795
7796
  /* OK, we can now run the match. If "hitend" is set afterwards, remember the
7797
  first starting point for which a partial match was found. */
7798
7799
462k
  cb.start_match = (PCRE2_SIZE)(start_match - subject);
7800
462k
  cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
7801
7802
462k
  mb->start_used_ptr = start_match;
7803
462k
  mb->last_used_ptr = start_match;
7804
462k
#ifdef SUPPORT_UNICODE
7805
462k
  mb->moptions = options | fragment_options;
7806
#else
7807
  mb->moptions = options;
7808
#endif
7809
462k
  mb->match_call_count = 0;
7810
462k
  mb->end_offset_top = 0;
7811
462k
  mb->skip_arg_count = 0;
7812
7813
#ifdef DEBUG_SHOW_OPS
7814
  fprintf(stderr, "++ Calling match()\n");
7815
#endif
7816
7817
462k
  rc = match(start_match, mb->start_code, re->top_bracket, frame_size,
7818
462k
    match_data, mb);
7819
7820
#ifdef DEBUG_SHOW_OPS
7821
  fprintf(stderr, "++ match() returned %d\n\n", rc);
7822
#endif
7823
7824
462k
  if (mb->hitend && start_partial == NULL)
7825
0
    {
7826
0
    start_partial = mb->start_used_ptr;
7827
0
    match_partial = start_match;
7828
0
    }
7829
7830
462k
  switch(rc)
7831
462k
    {
7832
    /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7833
    the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7834
    entirely. The only way we can do that is to re-do the match at the same
7835
    point, with a flag to force SKIP with an argument to be ignored. Just
7836
    treating this case as NOMATCH does not work because it does not check other
7837
    alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7838
7839
0
    case MATCH_SKIP_ARG:
7840
0
    new_start_match = start_match;
7841
0
    mb->ignore_skip_arg = mb->skip_arg_count;
7842
0
    break;
7843
7844
    /* SKIP passes back the next starting point explicitly, but if it is no
7845
    greater than the match we have just done, treat it as NOMATCH. */
7846
7847
0
    case MATCH_SKIP:
7848
0
    if (mb->verb_skip_ptr > start_match)
7849
0
      {
7850
0
      new_start_match = mb->verb_skip_ptr;
7851
0
      break;
7852
0
      }
7853
    /* Fall through */
7854
7855
    /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7856
    exactly like PRUNE. Unset ignore SKIP-with-argument. */
7857
7858
390k
    case MATCH_NOMATCH:
7859
390k
    case MATCH_PRUNE:
7860
390k
    case MATCH_THEN:
7861
390k
    mb->ignore_skip_arg = 0;
7862
390k
    new_start_match = start_match + 1;
7863
390k
#ifdef SUPPORT_UNICODE
7864
390k
    if (utf)
7865
74.9k
      ACROSSCHAR(new_start_match < end_subject, new_start_match,
7866
390k
        new_start_match++);
7867
390k
#endif
7868
390k
    break;
7869
7870
    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
7871
7872
0
    case MATCH_COMMIT:
7873
0
    rc = MATCH_NOMATCH;
7874
0
    goto ENDLOOP;
7875
7876
    /* Any other return is either a match, or some kind of error. */
7877
7878
72.3k
    default:
7879
72.3k
    goto ENDLOOP;
7880
462k
    }
7881
7882
  /* Control reaches here for the various types of "no match at this point"
7883
  result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7884
7885
390k
  rc = MATCH_NOMATCH;
7886
7887
  /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
7888
  newline in the subject (though it may continue over the newline). Therefore,
7889
  if we have just failed to match, starting at a newline, do not continue. */
7890
7891
390k
  if (firstline && IS_NEWLINE(start_match)) break;
7892
7893
  /* Advance to new matching position */
7894
7895
390k
  start_match = new_start_match;
7896
7897
  /* Break the loop if the pattern is anchored or if we have passed the end of
7898
  the subject. */
7899
7900
390k
  if (anchored || start_match > end_subject) break;
7901
7902
  /* If we have just passed a CR and we are now at a LF, and the pattern does
7903
  not contain any explicit matches for \r or \n, and the newline option is CRLF
7904
  or ANY or ANYCRLF, advance the match position by one more code unit. In
7905
  normal matching start_match will aways be greater than the first position at
7906
  this stage, but a failed *SKIP can cause a return at the same point, which is
7907
  why the first test exists. */
7908
7909
320k
  if (start_match > subject + start_offset &&
7910
320k
      start_match[-1] == CHAR_CR &&
7911
320k
      start_match < end_subject &&
7912
320k
      *start_match == CHAR_NL &&
7913
320k
      (re->flags & PCRE2_HASCRORLF) == 0 &&
7914
320k
        (mb->nltype == NLTYPE_ANY ||
7915
173
         mb->nltype == NLTYPE_ANYCRLF ||
7916
173
         mb->nllen == 2))
7917
0
    start_match++;
7918
7919
320k
  mb->mark = NULL;   /* Reset for start of next match attempt */
7920
320k
  }                  /* End of for(;;) "bumpalong" loop */
7921
7922
/* ==========================================================================*/
7923
7924
/* When we reach here, one of the following stopping conditions is true:
7925
7926
(1) The match succeeded, either completely, or partially;
7927
7928
(2) The pattern is anchored or the match was failed after (*COMMIT);
7929
7930
(3) We are past the end of the subject or the bumpalong limit;
7931
7932
(4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
7933
    this option requests that a match occur at or before the first newline in
7934
    the subject.
7935
7936
(5) Some kind of error occurred.
7937
7938
*/
7939
7940
144k
ENDLOOP:
7941
7942
/* If end_subject != true_end_subject, it means we are handling invalid UTF,
7943
and have just processed a non-terminal fragment. If this resulted in no match
7944
or a partial match we must carry on to the next fragment (a partial match is
7945
returned to the caller only at the very end of the subject). A loop is used to
7946
avoid trying to match against empty fragments; if the pattern can match an
7947
empty string it would have done so already. */
7948
7949
144k
#ifdef SUPPORT_UNICODE
7950
144k
if (utf && end_subject != true_end_subject &&
7951
144k
    (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
7952
0
  {
7953
0
  for (;;)
7954
0
    {
7955
    /* Advance past the first bad code unit, and then skip invalid character
7956
    starting code units in 8-bit and 16-bit modes. */
7957
7958
0
    start_match = end_subject + 1;
7959
7960
0
#if PCRE2_CODE_UNIT_WIDTH != 32
7961
0
    while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
7962
0
      start_match++;
7963
0
#endif
7964
7965
    /* If we have hit the end of the subject, there isn't another non-empty
7966
    fragment, so give up. */
7967
7968
0
    if (start_match >= true_end_subject)
7969
0
      {
7970
0
      rc = MATCH_NOMATCH;  /* In case it was partial */
7971
0
      match_partial = NULL;
7972
0
      break;
7973
0
      }
7974
7975
    /* Check the rest of the subject */
7976
7977
0
    mb->check_subject = start_match;
7978
0
    rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
7979
0
      &(match_data->startchar));
7980
7981
    /* The rest of the subject is valid UTF. */
7982
7983
0
    if (rc == 0)
7984
0
      {
7985
0
      mb->end_subject = end_subject = true_end_subject;
7986
0
      fragment_options = PCRE2_NOTBOL;
7987
0
      goto FRAGMENT_RESTART;
7988
0
      }
7989
7990
    /* A subsequent UTF error has been found; if the next fragment is
7991
    non-empty, set up to process it. Otherwise, let the loop advance. */
7992
7993
0
    else if (rc < 0)
7994
0
      {
7995
0
      mb->end_subject = end_subject = start_match + match_data->startchar;
7996
0
      if (end_subject > start_match)
7997
0
        {
7998
0
        fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
7999
0
        goto FRAGMENT_RESTART;
8000
0
        }
8001
0
      }
8002
0
    }
8003
0
  }
8004
144k
#endif  /* SUPPORT_UNICODE */
8005
8006
/* Fill in fields that are always returned in the match data. */
8007
8008
144k
match_data->code = re;
8009
144k
match_data->mark = mb->mark;
8010
144k
match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
8011
8012
/* Handle a fully successful match. Set the return code to the number of
8013
captured strings, or 0 if there were too many to fit into the ovector, and then
8014
set the remaining returned values before returning. Make a copy of the subject
8015
string if requested. */
8016
8017
144k
if (rc == MATCH_MATCH)
8018
72.3k
  {
8019
72.3k
  match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
8020
72.3k
    0 : (int)mb->end_offset_top/2 + 1;
8021
72.3k
  match_data->subject_length = length;
8022
72.3k
  match_data->startchar = start_match - subject;
8023
72.3k
  match_data->leftchar = mb->start_used_ptr - subject;
8024
72.3k
  match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
8025
71.9k
    mb->last_used_ptr : mb->end_match_ptr) - subject;
8026
72.3k
  if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
8027
0
    {
8028
0
    length = CU2BYTES(length + was_zero_terminated);
8029
0
    match_data->subject = match_data->memctl.malloc(length,
8030
0
      match_data->memctl.memory_data);
8031
0
    if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
8032
0
    memcpy((void *)match_data->subject, subject, length);
8033
0
    match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
8034
0
    }
8035
72.3k
  else match_data->subject = subject;
8036
8037
72.3k
  return match_data->rc;
8038
72.3k
  }
8039
8040
/* Control gets here if there has been a partial match, an error, or if the
8041
overall match attempt has failed at all permitted starting positions. Any mark
8042
data is in the nomatch_mark field. */
8043
8044
72.5k
match_data->mark = mb->nomatch_mark;
8045
8046
/* For anything other than nomatch or partial match, just return the code. */
8047
8048
72.5k
if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
8049
8050
/* Handle a partial match. If a "soft" partial match was requested, searching
8051
for a complete match will have continued, and the value of rc at this point
8052
will be MATCH_NOMATCH. For a "hard" partial match, it will already be
8053
PCRE2_ERROR_PARTIAL. */
8054
8055
72.5k
else if (match_partial != NULL)
8056
0
  {
8057
0
  match_data->subject = subject;
8058
0
  match_data->subject_length = length;
8059
0
  match_data->ovector[0] = match_partial - subject;
8060
0
  match_data->ovector[1] = end_subject - subject;
8061
0
  match_data->startchar = match_partial - subject;
8062
0
  match_data->leftchar = start_partial - subject;
8063
0
  match_data->rightchar = end_subject - subject;
8064
0
  match_data->rc = PCRE2_ERROR_PARTIAL;
8065
0
  }
8066
8067
/* Else this is the classic nomatch case. */
8068
8069
72.5k
else match_data->rc = PCRE2_ERROR_NOMATCH;
8070
8071
72.5k
return match_data->rc;
8072
144k
}
8073
8074
/* These #undefs are here to enable unity builds with CMake. */
8075
8076
#undef NLBLOCK /* Block containing newline information */
8077
#undef PSSTART /* Field containing processed string start */
8078
#undef PSEND   /* Field containing processed string end */
8079
8080
/* End of pcre2_match.c */