Coverage Report

Created: 2026-04-01 06:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/pcre/pcre2lib/pcre2_match.c
Line
Count
Source
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2015-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#include "pcre2_internal.h"
47
48
/* These defines enable debugging code */
49
50
/* #define DEBUG_FRAMES_DISPLAY */
51
/* #define DEBUG_SHOW_OPS */
52
/* #define DEBUG_SHOW_RMATCH */
53
54
#ifdef DEBUG_FRAMES_DISPLAY
55
#include <stdarg.h>
56
#endif
57
58
#ifdef DEBUG_SHOW_OPS
59
static const char *OP_names[] = { OP_NAME_LIST };
60
#endif
61
62
/* These defines identify the name of the block containing "static"
63
information, and fields within it. */
64
65
505M
#define NLBLOCK mb              /* Block containing newline information */
66
627k
#define PSSTART start_subject   /* Field containing processed string start */
67
125M
#define PSEND   end_subject     /* Field containing processed string end */
68
69
329k
#define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */
70
71
/* Masks for identifying the public options that are permitted at match time. */
72
73
#define PUBLIC_MATCH_OPTIONS \
74
5.97k
  (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
75
5.97k
   PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
76
5.97k
   PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT| \
77
5.97k
   PCRE2_DISABLE_RECURSELOOP_CHECK)
78
79
#define PUBLIC_JIT_MATCH_OPTIONS \
80
   (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
81
    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
82
    PCRE2_COPY_MATCHED_SUBJECT)
83
84
/* Non-error returns from and within the match() function. Error returns are
85
externally defined PCRE2_ERROR_xxx codes, which are all negative. */
86
87
7.26k
#define MATCH_MATCH        1
88
505M
#define MATCH_NOMATCH      0
89
90
/* Special internal returns used in the match() function. Make them
91
sufficiently negative to avoid the external error codes. */
92
93
1.81k
#define MATCH_ACCEPT       (-999)
94
48.5k
#define MATCH_KETRPOS      (-998)
95
/* The next 5 must be kept together and in sequence so that a test that checks
96
for any one of them can use a range. */
97
1.28k
#define MATCH_COMMIT       (-997)
98
327k
#define MATCH_PRUNE        (-996)
99
0
#define MATCH_SKIP         (-995)
100
50
#define MATCH_SKIP_ARG     (-994)
101
1.57M
#define MATCH_THEN         (-993)
102
640
#define MATCH_BACKTRACK_MAX MATCH_THEN
103
640
#define MATCH_BACKTRACK_MIN MATCH_COMMIT
104
105
/* Group frame type values. Zero means the frame is not a group frame. The
106
lower 16 bits are used for data (e.g. the capture number). Group frames are
107
used for most groups so that information about the start is easily available at
108
the end without having to scan back through intermediate frames (backtrack
109
points). */
110
111
129k
#define GF_CAPTURE     0x00010000u
112
638
#define GF_NOCAPTURE   0x00020000u
113
82.6k
#define GF_CONDASSERT  0x00030000u
114
250k
#define GF_RECURSE     0x00040000u
115
116
/* Masks for the identity and data parts of the group frame type. */
117
118
333k
#define GF_IDMASK(a)   ((a) & 0xffff0000u)
119
644
#define GF_DATAMASK(a) ((a) & 0x0000ffffu)
120
121
/* Repetition types */
122
123
enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
124
125
/* Min and max values for the common repeats; a maximum of UINT32_MAX =>
126
infinity. */
127
128
static const uint32_t rep_min[] = {
129
  0, 0,       /* * and *? */
130
  1, 1,       /* + and +? */
131
  0, 0,       /* ? and ?? */
132
  0, 0,       /* dummy placefillers for OP_CR[MIN]RANGE */
133
  0, 1, 0 };  /* OP_CRPOS{STAR, PLUS, QUERY} */
134
135
static const uint32_t rep_max[] = {
136
  UINT32_MAX, UINT32_MAX,      /* * and *? */
137
  UINT32_MAX, UINT32_MAX,      /* + and +? */
138
  1, 1,                        /* ? and ?? */
139
  0, 0,                        /* dummy placefillers for OP_CR[MIN]RANGE */
140
  UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
141
142
/* Repetition types - must include OP_CRPOSRANGE (not needed above) */
143
144
static const uint32_t rep_typ[] = {
145
  REPTYPE_MAX, REPTYPE_MIN,    /* * and *? */
146
  REPTYPE_MAX, REPTYPE_MIN,    /* + and +? */
147
  REPTYPE_MAX, REPTYPE_MIN,    /* ? and ?? */
148
  REPTYPE_MAX, REPTYPE_MIN,    /* OP_CRRANGE and OP_CRMINRANGE */
149
  REPTYPE_POS, REPTYPE_POS,    /* OP_CRPOSSTAR, OP_CRPOSPLUS */
150
  REPTYPE_POS, REPTYPE_POS };  /* OP_CRPOSQUERY, OP_CRPOSRANGE */
151
152
/* Numbers for RMATCH calls at backtracking points. When these lists are
153
changed, the code at RETURN_SWITCH below must be updated in sync.  */
154
155
enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
156
       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
157
       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
158
       RM31,  RM32, RM33, RM34, RM35, RM36, RM37 };
159
160
#ifdef SUPPORT_WIDE_CHARS
161
enum { RM100=100, RM101 };
162
#endif
163
164
#ifdef SUPPORT_UNICODE
165
enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
166
       RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
167
       RM216,     RM217, RM218, RM219, RM220, RM221, RM222, RM223,
168
       RM224,     RM225 };
169
#endif
170
171
/* Define short names for general fields in the current backtrack frame, which
172
is always pointed to by the F variable. Occasional references to fields in
173
other frames are written out explicitly. There are also some fields in the
174
current frame whose names start with "temp" that are used for short-term,
175
localised backtracking memory. These are #defined with Lxxx names at the point
176
of use and undefined afterwards. */
177
178
1.00G
#define Fback_frame        F->back_frame
179
410k
#define Fcapture_last      F->capture_last
180
418k
#define Fcurrent_recurse   F->current_recurse
181
2.42G
#define Fecode             F->ecode
182
3.90G
#define Feptr              F->eptr
183
504M
#define Fgroup_frame_type  F->group_frame_type
184
745k
#define Flast_group_offset F->last_group_offset
185
241M
#define Flength            F->length
186
330k
#define Fmark              F->mark
187
1.51G
#define Frdepth            F->rdepth
188
346k
#define Fstart_match       F->start_match
189
542k
#define Foffset_top        F->offset_top
190
312
#define Foccu              F->occu
191
2.14G
#define Fop                F->op
192
163k
#define Fovector           F->ovector
193
1.00G
#define Freturn_id         F->return_id
194
195
196
#ifdef DEBUG_FRAMES_DISPLAY
197
/*************************************************
198
*      Display current frames and contents       *
199
*************************************************/
200
201
/* This debugging function displays the current set of frames and their
202
contents. It is not called automatically from anywhere, the intention being
203
that calls can be inserted where necessary when debugging frame-related
204
problems.
205
206
Arguments:
207
  f           the file to write to
208
  F           the current top frame
209
  P           a previous frame of interest
210
  frame_size  the frame size
211
  mb          points to the match block
212
  match_data  points to the match data block
213
  s           identification text
214
215
Returns:    nothing
216
*/
217
218
static void
219
display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
220
  match_block *mb, pcre2_match_data *match_data, const char *s, ...)
221
{
222
uint32_t i;
223
heapframe *Q;
224
va_list ap;
225
va_start(ap, s);
226
227
fprintf(f, "FRAMES ");
228
vfprintf(f, s, ap);
229
va_end(ap);
230
231
if (P != NULL) fprintf(f, " P=%lu",
232
  ((char *)P - (char *)(match_data->heapframes))/frame_size);
233
fprintf(f, "\n");
234
235
for (i = 0, Q = match_data->heapframes;
236
     Q <= F;
237
     i++, Q = (heapframe *)((char *)Q + frame_size))
238
  {
239
  fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
240
    i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
241
    Q->back_frame, Q->return_id);
242
243
  if (Q->last_group_offset == PCRE2_UNSET)
244
    fprintf(f, " lgoffset=unset\n");
245
  else
246
    fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
247
  }
248
}
249
250
#endif
251
252
253
254
/*************************************************
255
*                Process a callout               *
256
*************************************************/
257
258
/* This function is called for all callouts, whether "standalone" or at the
259
start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
260
OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
261
with fixed values.
262
263
Arguments:
264
  F          points to the current backtracking frame
265
  mb         points to the match block
266
  lengthptr  where to return the length of the callout item
267
268
Returns:     the return from the callout
269
             or 0 if no callout function exists
270
*/
271
272
static int
273
do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
274
0
{
275
0
int rc;
276
0
PCRE2_SIZE save0, save1;
277
0
PCRE2_SIZE *callout_ovector;
278
0
pcre2_callout_block *cb;
279
280
0
*lengthptr = (*Fecode == OP_CALLOUT)?
281
0
  PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
282
283
0
if (mb->callout == NULL) return 0;   /* No callout function provided */
284
285
/* The original matching code (pre 10.30) worked directly with the ovector
286
passed by the user, and this was passed to callouts. Now that the working
287
ovector is in the backtracking frame, it no longer needs to reserve space for
288
the overall match offsets (which would waste space in the frame). For backward
289
compatibility, however, we pass capture_top and offset_vector to the callout as
290
if for the extended ovector, and we ensure that the first two slots are unset
291
by preserving and restoring their current contents. Picky compilers complain if
292
references such as Fovector[-2] are use directly, so we set up a separate
293
pointer. */
294
295
0
callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
296
297
/* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
298
are set externally. The first 3 never change; the last is updated for each
299
bumpalong. */
300
301
0
cb = mb->cb;
302
0
cb->capture_top      = (uint32_t)Foffset_top/2 + 1;
303
0
cb->capture_last     = Fcapture_last;
304
0
cb->offset_vector    = callout_ovector;
305
0
cb->mark             = mb->nomatch_mark;
306
0
cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
307
0
cb->pattern_position = GET(Fecode, 1);
308
0
cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
309
310
0
if (*Fecode == OP_CALLOUT)  /* Numerical callout */
311
0
  {
312
0
  cb->callout_number = Fecode[1 + 2*LINK_SIZE];
313
0
  cb->callout_string_offset = 0;
314
0
  cb->callout_string = NULL;
315
0
  cb->callout_string_length = 0;
316
0
  }
317
0
else  /* String callout */
318
0
  {
319
0
  cb->callout_number = 0;
320
0
  cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
321
0
  cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
322
0
  cb->callout_string_length =
323
0
    *lengthptr - (1 + 4*LINK_SIZE) - 2;
324
0
  }
325
326
0
save0 = callout_ovector[0];
327
0
save1 = callout_ovector[1];
328
0
callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
329
0
rc = mb->callout(cb, mb->callout_data);
330
0
callout_ovector[0] = save0;
331
0
callout_ovector[1] = save1;
332
0
cb->callout_flags = 0;
333
0
return rc;
334
0
}
335
336
337
338
/*************************************************
339
*          Match a back-reference                *
340
*************************************************/
341
342
/* This function is called only when it is known that the offset lies within
343
the offsets that have so far been used in the match. Note that in caseless
344
UTF-8 mode, the number of subject bytes matched may be different to the number
345
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
346
seems unlikely.)
347
348
Arguments:
349
  offset      index into the offset vector
350
  caseless    TRUE if caseless
351
  F           the current backtracking frame pointer
352
  mb          points to match block
353
  lengthptr   pointer for returning the length matched
354
355
Returns:      = 0 sucessful match; number of code units matched is set
356
              < 0 no match
357
              > 0 partial match
358
*/
359
360
static int
361
match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb,
362
  PCRE2_SIZE *lengthptr)
363
23.2k
{
364
23.2k
PCRE2_SPTR p;
365
23.2k
PCRE2_SIZE length;
366
23.2k
PCRE2_SPTR eptr;
367
23.2k
PCRE2_SPTR eptr_start;
368
369
/* Deal with an unset group. The default is no match, but there is an option to
370
match an empty string. */
371
372
23.2k
if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
373
23.2k
  {
374
23.2k
  if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
375
0
    {
376
0
    *lengthptr = 0;
377
0
    return 0;      /* Match */
378
0
    }
379
23.2k
  else return -1;  /* No match */
380
23.2k
  }
381
382
/* Separate the caseless and UTF cases for speed. */
383
384
4
eptr = eptr_start = Feptr;
385
4
p = mb->start_subject + Fovector[offset];
386
4
length = Fovector[offset+1] - Fovector[offset];
387
388
4
if (caseless)
389
4
  {
390
4
#if defined SUPPORT_UNICODE
391
4
  BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
392
393
4
  if (utf || (mb->poptions & PCRE2_UCP) != 0)
394
0
    {
395
0
    PCRE2_SPTR endptr = p + length;
396
397
    /* Match characters up to the end of the reference. NOTE: the number of
398
    code units matched may differ, because in UTF-8 there are some characters
399
    whose upper and lower case codes have different numbers of bytes. For
400
    example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
401
    bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
402
    sequence of two of the latter. It is important, therefore, to check the
403
    length along the reference, not along the subject (earlier code did this
404
    wrong). UCP without uses Unicode properties but without UTF encoding. */
405
406
0
    while (p < endptr)
407
0
      {
408
0
      uint32_t c, d;
409
0
      const ucd_record *ur;
410
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
411
412
0
      if (utf)
413
0
        {
414
0
        GETCHARINC(c, eptr);
415
0
        GETCHARINC(d, p);
416
0
        }
417
0
      else
418
0
        {
419
0
        c = *eptr++;
420
0
        d = *p++;
421
0
        }
422
423
0
      ur = GET_UCD(d);
424
0
      if (c != d && c != (uint32_t)((int)d + ur->other_case))
425
0
        {
426
0
        const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
427
0
        for (;;)
428
0
          {
429
0
          if (c < *pp) return -1;  /* No match */
430
0
          if (c == *pp++) break;
431
0
          }
432
0
        }
433
0
      }
434
0
    }
435
4
  else
436
4
#endif
437
438
  /* Not in UTF or UCP mode */
439
4
    {
440
4
    for (; length > 0; length--)
441
0
      {
442
0
      uint32_t cc, cp;
443
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
444
0
      cc = UCHAR21TEST(eptr);
445
0
      cp = UCHAR21TEST(p);
446
0
      if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
447
0
        return -1;  /* No match */
448
0
      p++;
449
0
      eptr++;
450
0
      }
451
4
    }
452
4
  }
453
454
/* In the caseful case, we can just compare the code units, whether or not we
455
are in UTF and/or UCP mode. When partial matching, we have to do this unit by
456
unit. */
457
458
0
else
459
0
  {
460
0
  if (mb->partial != 0)
461
0
    {
462
0
    for (; length > 0; length--)
463
0
      {
464
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
465
0
      if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /* No match */
466
0
      }
467
0
    }
468
469
  /* Not partial matching */
470
471
0
  else
472
0
    {
473
0
    if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
474
0
    if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1;  /* No match */
475
0
    eptr += length;
476
0
    }
477
0
  }
478
479
4
*lengthptr = eptr - eptr_start;
480
4
return 0;  /* Match */
481
4
}
482
483
484
485
/******************************************************************************
486
*******************************************************************************
487
                   "Recursion" in the match() function
488
489
The original match() function was highly recursive, but this proved to be the
490
source of a number of problems over the years, mostly because of the relatively
491
small system stacks that are commonly found. As new features were added to
492
patterns, various kludges were invented to reduce the amount of stack used,
493
making the code hard to understand in places.
494
495
A version did exist that used individual frames on the heap instead of calling
496
match() recursively, but this ran substantially slower. The current version is
497
a refactoring that uses a vector of frames to remember backtracking points.
498
This runs no slower, and possibly even a bit faster than the original recursive
499
implementation.
500
501
At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50
502
frames) was allocated on the system stack. If this was not big enough, the heap
503
was used for a larger vector. However, it turns out that there are environments
504
where taking as little as 20KiB from the system stack is an embarrassment.
505
After another refactoring, the heap is used exclusively, but a pointer the
506
frames vector and its size are cached in the match_data block, so that there is
507
no new memory allocation if the same match_data block is used for multiple
508
matches (unless the frames vector has to be extended).
509
*******************************************************************************
510
******************************************************************************/
511
512
513
514
515
/*************************************************
516
*       Macros for the match() function          *
517
*************************************************/
518
519
/* These macros pack up tests that are used for partial matching several times
520
in the code. The second one is used when we already know we are past the end of
521
the subject. We set the "hit end" flag if the pointer is at the end of the
522
subject and either (a) the pointer is past the earliest inspected character
523
(i.e. something has been matched, even if not part of the actual matched
524
string), or (b) the pattern contains a lookbehind. These are the conditions for
525
which adding more characters may allow the current match to continue.
526
527
For hard partial matching, we immediately return a partial match. Otherwise,
528
carrying on means that a complete match on the current subject will be sought.
529
A partial match is returned only if no complete match can be found. */
530
531
#define CHECK_PARTIAL()\
532
22.9M
  if (Feptr >= mb->end_subject) \
533
22.9M
    { \
534
1.59M
    SCHECK_PARTIAL(); \
535
1.59M
    }
536
537
#define SCHECK_PARTIAL()\
538
8.68M
  if (mb->partial != 0 && \
539
8.68M
      (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
540
8.68M
    { \
541
0
    mb->hitend = TRUE; \
542
0
    if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
543
0
    }
544
545
546
/* These macros are used to implement backtracking. They simulate a recursive
547
call to the match() function by means of a local vector of frames which
548
remember the backtracking points. */
549
550
#define RMATCH(ra,rb)\
551
451M
  {\
552
451M
  start_ecode = ra;\
553
504M
  Freturn_id = rb;\
554
451M
  goto MATCH_RECURSE;\
555
504M
  L_##rb:;\
556
504M
  }
557
558
#define RRETURN(ra)\
559
106M
  {\
560
106M
  rrc = ra;\
561
106M
  goto RETURN_SWITCH;\
562
504M
  }
563
564
565
566
/*************************************************
567
*         Match from current position            *
568
*************************************************/
569
570
/* This function is called to run one match attempt at a single starting point
571
in the subject.
572
573
Performance note: It might be tempting to extract commonly used fields from the
574
mb structure (e.g. end_subject) into individual variables to improve
575
performance. Tests using gcc on a SPARC disproved this; in the first case, it
576
made performance worse.
577
578
Arguments:
579
   start_eptr   starting character in subject
580
   start_ecode  starting position in compiled code
581
   top_bracket  number of capturing parentheses in the pattern
582
   frame_size   size of each backtracking frame
583
   match_data   pointer to the match_data block
584
   mb           pointer to "static" variables block
585
586
Returns:        MATCH_MATCH if matched            )  these values are >= 0
587
                MATCH_NOMATCH if failed to match  )
588
                negative MATCH_xxx value for PRUNE, SKIP, etc
589
                negative PCRE2_ERROR_xxx value if aborted by an error condition
590
                (e.g. stopped by repeated call or depth limit)
591
*/
592
593
static int
594
match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket,
595
  PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
596
329k
{
597
/* Frame-handling variables */
598
599
329k
heapframe *F;           /* Current frame pointer */
600
329k
heapframe *N = NULL;    /* Temporary frame pointers */
601
329k
heapframe *P = NULL;
602
603
329k
heapframe *frames_top;  /* End of frames vector */
604
329k
heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
605
329k
PCRE2_SIZE frame_copy_size;   /* Amount to copy when creating a new frame */
606
607
/* Local variables that do not need to be preserved over calls to RRMATCH(). */
608
609
329k
PCRE2_SPTR branch_end = NULL;
610
329k
PCRE2_SPTR branch_start;
611
329k
PCRE2_SPTR bracode;     /* Temp pointer to start of group */
612
329k
PCRE2_SIZE offset;      /* Used for group offsets */
613
329k
PCRE2_SIZE length;      /* Used for various length calculations */
614
615
329k
int rrc;                /* Return from functions & backtracking "recursions" */
616
329k
#ifdef SUPPORT_UNICODE
617
329k
int proptype;           /* Type of character property */
618
329k
#endif
619
620
329k
uint32_t i;             /* Used for local loops */
621
329k
uint32_t fc;            /* Character values */
622
329k
uint32_t number;        /* Used for group and other numbers */
623
329k
uint32_t reptype = 0;   /* Type of repetition (0 to avoid compiler warning) */
624
329k
uint32_t group_frame_type;  /* Specifies type for new group frames */
625
626
329k
BOOL condition;         /* Used in conditional groups */
627
329k
BOOL cur_is_word;       /* Used in "word" tests */
628
329k
BOOL prev_is_word;      /* Used in "word" tests */
629
630
/* UTF and UCP flags */
631
632
329k
#ifdef SUPPORT_UNICODE
633
329k
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
634
329k
BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
635
#else
636
BOOL utf = FALSE;  /* Required for convenience even when no Unicode support */
637
#endif
638
639
/* This is the length of the last part of a backtracking frame that must be
640
copied when a new frame is created. */
641
642
329k
frame_copy_size = frame_size - offsetof(heapframe, eptr);
643
644
/* Set up the first frame and the end of the frames vector. */
645
646
329k
F = match_data->heapframes;
647
329k
frames_top = (heapframe *)((char *)F + match_data->heapframes_size);
648
649
329k
Frdepth = 0;                        /* "Recursion" depth */
650
329k
Fcapture_last = 0;                  /* Number of most recent capture */
651
329k
Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
652
329k
Fstart_match = Feptr = start_eptr;  /* Current data pointer and start match */
653
329k
Fmark = NULL;                       /* Most recent mark */
654
329k
Foffset_top = 0;                    /* End of captures within the frame */
655
329k
Flast_group_offset = PCRE2_UNSET;   /* Saved frame of most recent group */
656
329k
group_frame_type = 0;               /* Not a start of group frame */
657
329k
goto NEW_FRAME;                     /* Start processing with this frame */
658
659
/* Come back here when we want to create a new frame for remembering a
660
backtracking point. */
661
662
504M
MATCH_RECURSE:
663
664
/* Set up a new backtracking frame. If the vector is full, get a new one,
665
doubling the size, but constrained by the heap limit (which is in KiB). */
666
667
504M
N = (heapframe *)((char *)F + frame_size);
668
504M
if ((heapframe *)((char *)N + frame_size) >= frames_top)
669
0
  {
670
0
  heapframe *new;
671
0
  PCRE2_SIZE newsize;
672
0
  PCRE2_SIZE usedsize = (char *)N - (char *)(match_data->heapframes);
673
674
0
  if (match_data->heapframes_size >= PCRE2_SIZE_MAX / 2)
675
0
    {
676
0
    if (match_data->heapframes_size == PCRE2_SIZE_MAX - 1)
677
0
      return PCRE2_ERROR_NOMEMORY;
678
0
    newsize = PCRE2_SIZE_MAX - 1;
679
0
    }
680
0
  else
681
0
    newsize = match_data->heapframes_size * 2;
682
683
0
  if (newsize / 1024 >= mb->heap_limit)
684
0
    {
685
0
    PCRE2_SIZE old_size = match_data->heapframes_size / 1024;
686
0
    if (mb->heap_limit <= old_size)
687
0
      return PCRE2_ERROR_HEAPLIMIT;
688
0
    else
689
0
      {
690
0
      PCRE2_SIZE max_delta = 1024 * (mb->heap_limit - old_size);
691
0
      int over_bytes = match_data->heapframes_size % 1024;
692
0
      if (over_bytes) max_delta -= (1024 - over_bytes);
693
0
      newsize = match_data->heapframes_size + max_delta;
694
0
      }
695
0
    }
696
697
  /* With a heap limit set, the permitted additional size may not be enough for
698
  another frame, so do a final check. */
699
700
0
  if (newsize - usedsize < frame_size) return PCRE2_ERROR_HEAPLIMIT;
701
0
  new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data);
702
0
  if (new == NULL) return PCRE2_ERROR_NOMEMORY;
703
0
  memcpy(new, match_data->heapframes, usedsize);
704
705
0
  N = (heapframe *)((char *)new + usedsize);
706
0
  F = (heapframe *)((char *)N - frame_size);
707
708
0
  match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data);
709
0
  match_data->heapframes = new;
710
0
  match_data->heapframes_size = newsize;
711
0
  frames_top = (heapframe *)((char *)new + newsize);
712
0
  }
713
714
#ifdef DEBUG_SHOW_RMATCH
715
fprintf(stderr, "++ RMATCH %d frame=%d", Freturn_id, Frdepth + 1);
716
if (group_frame_type != 0)
717
  {
718
  fprintf(stderr, " type=%x ", group_frame_type);
719
  switch (GF_IDMASK(group_frame_type))
720
    {
721
    case GF_CAPTURE:
722
    fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
723
    break;
724
725
    case GF_NOCAPTURE:
726
    fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
727
    break;
728
729
    case GF_CONDASSERT:
730
    fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
731
    break;
732
733
    case GF_RECURSE:
734
    fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
735
    break;
736
737
    default:
738
    fprintf(stderr, "*** unknown ***");
739
    break;
740
    }
741
  }
742
fprintf(stderr, "\n");
743
#endif
744
745
/* Copy those fields that must be copied into the new frame, increase the
746
"recursion" depth (i.e. the new frame's index) and then make the new frame
747
current. */
748
749
504M
memcpy((char *)N + offsetof(heapframe, eptr),
750
504M
       (char *)F + offsetof(heapframe, eptr),
751
504M
       frame_copy_size);
752
753
504M
N->rdepth = Frdepth + 1;
754
504M
F = N;
755
756
/* Carry on processing with a new frame. */
757
758
504M
NEW_FRAME:
759
504M
Fgroup_frame_type = group_frame_type;
760
504M
Fecode = start_ecode;      /* Starting code pointer */
761
504M
Fback_frame = frame_size;  /* Default is go back one frame */
762
763
/* If this is a special type of group frame, remember its offset for quick
764
access at the end of the group. If this is a recursion, set a new current
765
recursion value. */
766
767
504M
if (group_frame_type != 0)
768
250k
  {
769
250k
  Flast_group_offset = (char *)F - (char *)match_data->heapframes;
770
250k
  if (GF_IDMASK(group_frame_type) == GF_RECURSE)
771
644
    Fcurrent_recurse = GF_DATAMASK(group_frame_type);
772
250k
  group_frame_type = 0;
773
250k
  }
774
775
776
/* ========================================================================= */
777
/* This is the main processing loop. First check that we haven't recorded too
778
many backtracks (search tree is too large), or that we haven't exceeded the
779
recursive depth limit (used too many backtracking frames). If not, process the
780
opcodes. */
781
782
504M
if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
783
504M
if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
784
785
#ifdef DEBUG_SHOW_OPS
786
fprintf(stderr, "\n++ New frame: type=0x%x subject offset %ld\n",
787
  GF_IDMASK(Fgroup_frame_type), Feptr - mb->start_subject);
788
#endif
789
790
504M
for (;;)
791
929M
  {
792
#ifdef DEBUG_SHOW_OPS
793
fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
794
  OP_names[*Fecode]);
795
#endif
796
797
929M
  Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
798
929M
  switch(Fop)
799
929M
    {
800
    /* ===================================================================== */
801
    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
802
    any currently open capturing brackets. Unlike reaching the end of a group,
803
    where we know the starting frame is at the top of the chained frames, in
804
    this case we have to search back for the relevant frame in case other types
805
    of group that use chained frames have intervened. Multiple OP_CLOSEs always
806
    come innermost first, which matches the chain order. We can ignore this in
807
    a recursion, because captures are not passed out of recursions. */
808
809
0
    case OP_CLOSE:
810
0
    if (Fcurrent_recurse == RECURSE_UNSET)
811
0
      {
812
0
      number = GET2(Fecode, 1);
813
0
      offset = Flast_group_offset;
814
0
      for(;;)
815
0
        {
816
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
817
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
818
0
        P = (heapframe *)((char *)N - frame_size);
819
0
        if (N->group_frame_type == (GF_CAPTURE | number)) break;
820
0
        offset = P->last_group_offset;
821
0
        }
822
0
      offset = (number << 1) - 2;
823
0
      Fcapture_last = number;
824
0
      Fovector[offset] = P->eptr - mb->start_subject;
825
0
      Fovector[offset+1] = Feptr - mb->start_subject;
826
0
      if (offset >= Foffset_top) Foffset_top = offset + 2;
827
0
      }
828
0
    Fecode += PRIV(OP_lengths)[*Fecode];
829
0
    break;
830
831
832
    /* ===================================================================== */
833
    /* Real or forced end of the pattern, assertion, or recursion. In an
834
    assertion ACCEPT, update the last used pointer and remember the current
835
    frame so that the captures and mark can be fished out of it. */
836
837
0
    case OP_ASSERT_ACCEPT:
838
0
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
839
0
    assert_accept_frame = F;
840
0
    RRETURN(MATCH_ACCEPT);
841
842
    /* For ACCEPT within a recursion, we have to find the most recent
843
    recursion. If not in a recursion, fall through to code that is common with
844
    OP_END. */
845
846
0
    case OP_ACCEPT:
847
0
    if (Fcurrent_recurse != RECURSE_UNSET)
848
0
      {
849
#ifdef DEBUG_SHOW_OPS
850
      fprintf(stderr, "++ Accept within recursion\n");
851
#endif
852
0
      offset = Flast_group_offset;
853
0
      for(;;)
854
0
        {
855
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
856
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
857
0
        P = (heapframe *)((char *)N - frame_size);
858
0
        if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
859
0
        offset = P->last_group_offset;
860
0
        }
861
862
      /* N is now the frame of the recursion; the previous frame is at the
863
      OP_RECURSE position. Go back there, copying the current subject position
864
      and mark, and the start_match position (\K might have changed it), and
865
      then move on past the OP_RECURSE. */
866
867
0
      P->eptr = Feptr;
868
0
      P->mark = Fmark;
869
0
      P->start_match = Fstart_match;
870
0
      F = P;
871
0
      Fecode += 1 + LINK_SIZE;
872
0
      continue;
873
0
      }
874
    /* Fall through */
875
876
    /* OP_END itself can never be reached within a recursion because that is
877
    picked up when the OP_KET that always precedes OP_END is reached. */
878
879
5.45k
    case OP_END:
880
881
    /* Fail for an empty string match if either PCRE2_NOTEMPTY is set, or if
882
    PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the
883
    subject. In both cases, backtracking will then try other alternatives, if
884
    any. */
885
886
5.45k
    if (Feptr == Fstart_match &&
887
4.88k
         ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
888
4.88k
           ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
889
4.03k
             Fstart_match == mb->start_subject + mb->start_offset)))
890
4.03k
      {
891
#ifdef DEBUG_SHOW_OPS
892
      fprintf(stderr, "++ Backtrack because empty string\n");
893
#endif
894
4.03k
      RRETURN(MATCH_NOMATCH);
895
0
      }
896
897
    /* Fail if PCRE2_ENDANCHORED is set and the end of the match is not
898
    the end of the subject. After (*ACCEPT) we fail the entire match (at this
899
    position) but backtrack if we've reached the end of the pattern. This
900
    applies whether or not we are in a recursion. */
901
902
1.42k
    if (Feptr < mb->end_subject &&
903
1.19k
        ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
904
0
      {
905
0
      if (Fop == OP_END)
906
0
        {
907
#ifdef DEBUG_SHOW_OPS
908
        fprintf(stderr, "++ Backtrack because not at end (endanchored set)\n");
909
#endif
910
0
        RRETURN(MATCH_NOMATCH);
911
0
        }
912
913
#ifdef DEBUG_SHOW_OPS
914
      fprintf(stderr, "++ Failed ACCEPT not at end (endanchnored set)\n");
915
#endif
916
0
      return MATCH_NOMATCH;   /* (*ACCEPT) */
917
0
      }
918
919
    /* We have a successful match of the whole pattern. Record the result and
920
    then do a direct return from the function. If there is space in the offset
921
    vector, set any pairs that follow the highest-numbered captured string but
922
    are less than the number of capturing groups in the pattern to PCRE2_UNSET.
923
    It is documented that this happens. "Gaps" are set to PCRE2_UNSET
924
    dynamically. It is only those at the end that need setting here. */
925
926
1.42k
    mb->end_match_ptr = Feptr;           /* Record where we ended */
927
1.42k
    mb->end_offset_top = Foffset_top;    /* and how many extracts were taken */
928
1.42k
    mb->mark = Fmark;                    /* and the last success mark */
929
1.42k
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
930
931
1.42k
    match_data->ovector[0] = Fstart_match - mb->start_subject;
932
1.42k
    match_data->ovector[1] = Feptr - mb->start_subject;
933
934
    /* Set i to the smaller of the sizes of the external and frame ovectors. */
935
936
1.42k
    i = 2 * ((top_bracket + 1 > match_data->oveccount)?
937
1.42k
      match_data->oveccount : top_bracket + 1);
938
1.42k
    memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
939
2.26k
    while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET;
940
1.42k
    return MATCH_MATCH;  /* Note: NOT RRETURN */
941
942
943
    /*===================================================================== */
944
    /* Match any single character type except newline; have to take care with
945
    CRLF newlines and partial matching. */
946
947
2.54M
    case OP_ANY:
948
2.54M
    if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
949
2.53M
    if (mb->partial != 0 &&
950
0
        Feptr == mb->end_subject - 1 &&
951
0
        NLBLOCK->nltype == NLTYPE_FIXED &&
952
0
        NLBLOCK->nllen == 2 &&
953
0
        UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
954
0
      {
955
0
      mb->hitend = TRUE;
956
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
957
0
      }
958
    /* Fall through */
959
960
    /* Match any single character whatsoever. */
961
962
7.98M
    case OP_ALLANY:
963
7.98M
    if (Feptr >= mb->end_subject)  /* DO NOT merge the Feptr++ here; it must */
964
77.3k
      {                            /* not be updated before SCHECK_PARTIAL. */
965
77.3k
      SCHECK_PARTIAL();
966
77.3k
      RRETURN(MATCH_NOMATCH);
967
0
      }
968
7.90M
    Feptr++;
969
7.90M
#ifdef SUPPORT_UNICODE
970
7.90M
    if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
971
7.90M
#endif
972
7.90M
    Fecode++;
973
7.90M
    break;
974
975
976
    /* ===================================================================== */
977
    /* Match a single code unit, even in UTF mode. This opcode really does
978
    match any code unit, even newline. (It really should be called ANYCODEUNIT,
979
    of course - the byte name is from pre-16 bit days.) */
980
981
2.14k
    case OP_ANYBYTE:
982
2.14k
    if (Feptr >= mb->end_subject)   /* DO NOT merge the Feptr++ here; it must */
983
54
      {                             /* not be updated before SCHECK_PARTIAL. */
984
54
      SCHECK_PARTIAL();
985
54
      RRETURN(MATCH_NOMATCH);
986
0
      }
987
2.09k
    Feptr++;
988
2.09k
    Fecode++;
989
2.09k
    break;
990
991
992
    /* ===================================================================== */
993
    /* Match a single character, casefully */
994
995
163M
    case OP_CHAR:
996
163M
#ifdef SUPPORT_UNICODE
997
163M
    if (utf)
998
71.3M
      {
999
71.3M
      Flength = 1;
1000
71.3M
      Fecode++;
1001
71.3M
      GETCHARLEN(fc, Fecode, Flength);
1002
71.3M
      if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
1003
1.48M
        {
1004
1.48M
        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
1005
1.48M
        RRETURN(MATCH_NOMATCH);
1006
0
        }
1007
70.8M
      for (; Flength > 0; Flength--)
1008
69.8M
        {
1009
69.8M
        if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
1010
947k
        }
1011
69.8M
      }
1012
92.0M
    else
1013
92.0M
#endif
1014
1015
    /* Not UTF mode */
1016
92.0M
      {
1017
92.0M
      if (mb->end_subject - Feptr < 1)
1018
485k
        {
1019
485k
        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
1020
485k
        RRETURN(MATCH_NOMATCH);
1021
0
        }
1022
91.5M
      if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
1023
240k
      Fecode += 2;
1024
240k
      }
1025
1.18M
    break;
1026
1027
1028
    /* ===================================================================== */
1029
    /* Match a single character, caselessly. If we are at the end of the
1030
    subject, give up immediately. We get here only when the pattern character
1031
    has at most one other case. Characters with more than two cases are coded
1032
    as OP_PROP with the pseudo-property PT_CLIST. */
1033
1034
22.9M
    case OP_CHARI:
1035
22.9M
    if (Feptr >= mb->end_subject)
1036
120k
      {
1037
120k
      SCHECK_PARTIAL();
1038
120k
      RRETURN(MATCH_NOMATCH);
1039
0
      }
1040
1041
22.7M
#ifdef SUPPORT_UNICODE
1042
22.7M
    if (utf)
1043
3.38M
      {
1044
3.38M
      Flength = 1;
1045
3.38M
      Fecode++;
1046
3.38M
      GETCHARLEN(fc, Fecode, Flength);
1047
1048
      /* If the pattern character's value is < 128, we know that its other case
1049
      (if any) is also < 128 (and therefore only one code unit long in all
1050
      code-unit widths), so we can use the fast lookup table. We checked above
1051
      that there is at least one character left in the subject. */
1052
1053
3.38M
      if (fc < 128)
1054
3.35M
        {
1055
3.35M
        uint32_t cc = UCHAR21(Feptr);
1056
3.35M
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1057
155k
        Fecode++;
1058
155k
        Feptr++;
1059
155k
        }
1060
1061
      /* Otherwise we must pick up the subject character and use Unicode
1062
      property support to test its other case. Note that we cannot use the
1063
      value of "Flength" to check for sufficient bytes left, because the other
1064
      case of the character may have more or fewer code units. */
1065
1066
27.3k
      else
1067
27.3k
        {
1068
27.3k
        uint32_t dc;
1069
27.3k
        GETCHARINC(dc, Feptr);
1070
27.3k
        Fecode += Flength;
1071
27.3k
        if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1072
43
        }
1073
3.38M
      }
1074
1075
    /* If UCP is set without UTF we must do the same as above, but with one
1076
    character per code unit. */
1077
1078
19.3M
    else if (ucp)
1079
0
      {
1080
0
      uint32_t cc = UCHAR21(Feptr);
1081
0
      fc = Fecode[1];
1082
0
      if (fc < 128)
1083
0
        {
1084
0
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1085
0
        }
1086
0
      else
1087
0
        {
1088
0
        if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1089
0
        }
1090
0
      Feptr++;
1091
0
      Fecode += 2;
1092
0
      }
1093
1094
19.3M
    else
1095
19.3M
#endif   /* SUPPORT_UNICODE */
1096
1097
    /* Not UTF or UCP mode; use the table for characters < 256. */
1098
19.3M
      {
1099
19.3M
      if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
1100
19.3M
          != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
1101
324k
      Feptr++;
1102
324k
      Fecode += 2;
1103
324k
      }
1104
480k
    break;
1105
1106
1107
    /* ===================================================================== */
1108
    /* Match not a single character. */
1109
1110
480k
    case OP_NOT:
1111
214k
    case OP_NOTI:
1112
214k
    if (Feptr >= mb->end_subject)
1113
939
      {
1114
939
      SCHECK_PARTIAL();
1115
939
      RRETURN(MATCH_NOMATCH);
1116
0
      }
1117
1118
213k
#ifdef SUPPORT_UNICODE
1119
213k
    if (utf)
1120
4.38k
      {
1121
4.38k
      uint32_t ch;
1122
4.38k
      Fecode++;
1123
4.38k
      GETCHARINC(ch, Fecode);
1124
4.38k
      GETCHARINC(fc, Feptr);
1125
4.38k
      if (ch == fc)
1126
87
        {
1127
87
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1128
0
        }
1129
4.30k
      else if (Fop == OP_NOTI)   /* If caseless */
1130
2.67k
        {
1131
2.67k
        if (ch > 127)
1132
0
          ch = UCD_OTHERCASE(ch);
1133
2.67k
        else
1134
2.67k
          ch = (mb->fcc)[ch];
1135
2.67k
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1136
2.58k
        }
1137
4.38k
      }
1138
1139
    /* UCP without UTF is as above, but with one character per code unit. */
1140
1141
209k
    else if (ucp)
1142
0
      {
1143
0
      uint32_t ch;
1144
0
      fc = UCHAR21INC(Feptr);
1145
0
      ch = Fecode[1];
1146
0
      Fecode += 2;
1147
1148
0
      if (ch == fc)
1149
0
        {
1150
0
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1151
0
        }
1152
0
      else if (Fop == OP_NOTI)   /* If caseless */
1153
0
        {
1154
0
        if (ch > 127)
1155
0
          ch = UCD_OTHERCASE(ch);
1156
0
        else
1157
0
          ch = (mb->fcc)[ch];
1158
0
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1159
0
        }
1160
0
      }
1161
1162
209k
    else
1163
209k
#endif  /* SUPPORT_UNICODE */
1164
1165
    /* Neither UTF nor UCP is set */
1166
1167
209k
      {
1168
209k
      uint32_t ch = Fecode[1];
1169
209k
      fc = UCHAR21INC(Feptr);
1170
209k
      if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
1171
207k
        RRETURN(MATCH_NOMATCH);
1172
207k
      Fecode += 2;
1173
207k
      }
1174
211k
    break;
1175
1176
1177
    /* ===================================================================== */
1178
    /* Match a single character repeatedly. */
1179
1180
9.14M
#define Loclength    F->temp_size
1181
227M
#define Lstart_eptr  F->temp_sptr[0]
1182
8.91M
#define Lcharptr     F->temp_sptr[1]
1183
489M
#define Lmin         F->temp_32[0]
1184
367M
#define Lmax         F->temp_32[1]
1185
244M
#define Lc           F->temp_32[2]
1186
11.6M
#define Loc          F->temp_32[3]
1187
1188
211k
    case OP_EXACT:
1189
0
    case OP_EXACTI:
1190
0
    Lmin = Lmax = GET2(Fecode, 1);
1191
0
    Fecode += 1 + IMM2_SIZE;
1192
0
    goto REPEATCHAR;
1193
1194
0
    case OP_POSUPTO:
1195
0
    case OP_POSUPTOI:
1196
0
    reptype = REPTYPE_POS;
1197
0
    Lmin = 0;
1198
0
    Lmax = GET2(Fecode, 1);
1199
0
    Fecode += 1 + IMM2_SIZE;
1200
0
    goto REPEATCHAR;
1201
1202
0
    case OP_UPTO:
1203
0
    case OP_UPTOI:
1204
0
    reptype = REPTYPE_MAX;
1205
0
    Lmin = 0;
1206
0
    Lmax = GET2(Fecode, 1);
1207
0
    Fecode += 1 + IMM2_SIZE;
1208
0
    goto REPEATCHAR;
1209
1210
0
    case OP_MINUPTO:
1211
0
    case OP_MINUPTOI:
1212
0
    reptype = REPTYPE_MIN;
1213
0
    Lmin = 0;
1214
0
    Lmax = GET2(Fecode, 1);
1215
0
    Fecode += 1 + IMM2_SIZE;
1216
0
    goto REPEATCHAR;
1217
1218
15.8k
    case OP_POSSTAR:
1219
2.22M
    case OP_POSSTARI:
1220
2.22M
    reptype = REPTYPE_POS;
1221
2.22M
    Lmin = 0;
1222
2.22M
    Lmax = UINT32_MAX;
1223
2.22M
    Fecode++;
1224
2.22M
    goto REPEATCHAR;
1225
1226
108k
    case OP_POSPLUS:
1227
196k
    case OP_POSPLUSI:
1228
196k
    reptype = REPTYPE_POS;
1229
196k
    Lmin = 1;
1230
196k
    Lmax = UINT32_MAX;
1231
196k
    Fecode++;
1232
196k
    goto REPEATCHAR;
1233
1234
8.83M
    case OP_POSQUERY:
1235
11.0M
    case OP_POSQUERYI:
1236
11.0M
    reptype = REPTYPE_POS;
1237
11.0M
    Lmin = 0;
1238
11.0M
    Lmax = 1;
1239
11.0M
    Fecode++;
1240
11.0M
    goto REPEATCHAR;
1241
1242
4.15k
    case OP_STAR:
1243
7.51k
    case OP_STARI:
1244
8.35k
    case OP_MINSTAR:
1245
11.3k
    case OP_MINSTARI:
1246
77.6k
    case OP_PLUS:
1247
79.0k
    case OP_PLUSI:
1248
79.4k
    case OP_MINPLUS:
1249
83.0k
    case OP_MINPLUSI:
1250
106M
    case OP_QUERY:
1251
106M
    case OP_QUERYI:
1252
108M
    case OP_MINQUERY:
1253
108M
    case OP_MINQUERYI:
1254
108M
    fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
1255
108M
    Lmin = rep_min[fc];
1256
108M
    Lmax = rep_max[fc];
1257
108M
    reptype = rep_typ[fc];
1258
1259
    /* Common code for all repeated single-character matches. We first check
1260
    for the minimum number of characters. If the minimum equals the maximum, we
1261
    are done. Otherwise, if minimizing, check the rest of the pattern for a
1262
    match; if there isn't one, advance up to the maximum, one character at a
1263
    time.
1264
1265
    If maximizing, advance up to the maximum number of matching characters,
1266
    until Feptr is past the end of the maximum run. If possessive, we are
1267
    then done (no backing up). Otherwise, match at this position; anything
1268
    other than no match is immediately returned. For nomatch, back up one
1269
    character, unless we are matching \R and the last thing matched was
1270
    \r\n, in which case, back up two code units until we reach the first
1271
    optional character position.
1272
1273
    The various UTF/non-UTF and caseful/caseless cases are handled separately,
1274
    for speed. */
1275
1276
122M
    REPEATCHAR:
1277
122M
#ifdef SUPPORT_UNICODE
1278
122M
    if (utf)
1279
5.86M
      {
1280
5.86M
      Flength = 1;
1281
5.86M
      Lcharptr = Fecode;
1282
5.86M
      GETCHARLEN(fc, Fecode, Flength);
1283
5.86M
      Fecode += Flength;
1284
1285
      /* Handle multi-code-unit character matching, caseful and caseless. */
1286
1287
5.86M
      if (Flength > 1)
1288
3.04M
        {
1289
3.04M
        uint32_t othercase;
1290
1291
3.04M
        if (Fop >= OP_STARI &&     /* Caseless */
1292
8.83k
            (othercase = UCD_OTHERCASE(fc)) != fc)
1293
156
          Loclength = PRIV(ord2utf)(othercase, Foccu);
1294
3.04M
        else Loclength = 0;
1295
1296
3.04M
        for (i = 1; i <= Lmin; i++)
1297
8.64k
          {
1298
8.64k
          if (Feptr <= mb->end_subject - Flength &&
1299
8.64k
            memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1300
8.64k
          else if (Loclength > 0 &&
1301
0
                   Feptr <= mb->end_subject - Loclength &&
1302
0
                   memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1303
0
            Feptr += Loclength;
1304
8.64k
          else
1305
8.64k
            {
1306
8.64k
            CHECK_PARTIAL();
1307
8.64k
            RRETURN(MATCH_NOMATCH);
1308
0
            }
1309
8.64k
          }
1310
1311
3.04M
        if (Lmin == Lmax) continue;
1312
1313
3.04M
        if (reptype == REPTYPE_MIN)
1314
0
          {
1315
0
          for (;;)
1316
0
            {
1317
0
            RMATCH(Fecode, RM202);
1318
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1319
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1320
0
            if (Feptr <= mb->end_subject - Flength &&
1321
0
              memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1322
0
            else if (Loclength > 0 &&
1323
0
                     Feptr <= mb->end_subject - Loclength &&
1324
0
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1325
0
              Feptr += Loclength;
1326
0
            else
1327
0
              {
1328
0
              CHECK_PARTIAL();
1329
0
              RRETURN(MATCH_NOMATCH);
1330
0
              }
1331
0
            }
1332
          /* Control never gets here */
1333
0
          }
1334
1335
3.04M
        else  /* Maximize */
1336
3.04M
          {
1337
3.04M
          Lstart_eptr = Feptr;
1338
3.04M
          for (i = Lmin; i < Lmax; i++)
1339
3.04M
            {
1340
3.04M
            if (Feptr <= mb->end_subject - Flength &&
1341
3.04M
                memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
1342
0
              Feptr += Flength;
1343
3.04M
            else if (Loclength > 0 &&
1344
156
                     Feptr <= mb->end_subject - Loclength &&
1345
156
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1346
0
              Feptr += Loclength;
1347
3.04M
            else
1348
3.04M
              {
1349
3.04M
              CHECK_PARTIAL();
1350
3.04M
              break;
1351
3.04M
              }
1352
3.04M
            }
1353
1354
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1355
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1356
          go too far. */
1357
1358
3.04M
          if (reptype != REPTYPE_POS) for(;;)
1359
0
            {
1360
0
            if (Feptr <= Lstart_eptr) break;
1361
0
            RMATCH(Fecode, RM203);
1362
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1363
0
            Feptr--;
1364
0
            BACKCHAR(Feptr);
1365
0
            }
1366
3.04M
          }
1367
3.04M
        break;   /* End of repeated wide character handling */
1368
3.04M
        }
1369
1370
      /* Length of UTF character is 1. Put it into the preserved variable and
1371
      fall through to the non-UTF code. */
1372
1373
2.81M
      Lc = fc;
1374
2.81M
      }
1375
116M
    else
1376
116M
#endif  /* SUPPORT_UNICODE */
1377
1378
    /* When not in UTF mode, load a single-code-unit character. Then proceed as
1379
    above, using Unicode casing if either UTF or UCP is set. */
1380
1381
116M
    Lc = *Fecode++;
1382
1383
    /* Caseless comparison */
1384
1385
119M
    if (Fop >= OP_STARI)
1386
5.83M
      {
1387
5.83M
#if PCRE2_CODE_UNIT_WIDTH == 8
1388
5.83M
#ifdef SUPPORT_UNICODE
1389
5.83M
      if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1390
5.83M
      else
1391
5.83M
#endif  /* SUPPORT_UNICODE */
1392
      /* Lc will be < 128 in UTF-8 mode. */
1393
5.83M
      Loc = mb->fcc[Lc];
1394
#else /* 16-bit & 32-bit */
1395
#ifdef SUPPORT_UNICODE
1396
      if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1397
      else
1398
#endif  /* SUPPORT_UNICODE */
1399
      Loc = TABLE_GET(Lc, mb->fcc, Lc);
1400
#endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
1401
1402
5.84M
      for (i = 1; i <= Lmin; i++)
1403
85.1k
        {
1404
85.1k
        uint32_t cc;                 /* Faster than PCRE2_UCHAR */
1405
85.1k
        if (Feptr >= mb->end_subject)
1406
2.65k
          {
1407
2.65k
          SCHECK_PARTIAL();
1408
2.65k
          RRETURN(MATCH_NOMATCH);
1409
0
          }
1410
82.5k
        cc = UCHAR21TEST(Feptr);
1411
82.5k
        if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1412
3.13k
        Feptr++;
1413
3.13k
        }
1414
5.75M
      if (Lmin == Lmax) continue;
1415
1416
5.75M
      if (reptype == REPTYPE_MIN)
1417
664k
        {
1418
664k
        for (;;)
1419
752k
          {
1420
752k
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1421
752k
          RMATCH(Fecode, RM25);
1422
752k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1423
752k
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1424
665k
          if (Feptr >= mb->end_subject)
1425
1.62k
            {
1426
1.62k
            SCHECK_PARTIAL();
1427
1.62k
            RRETURN(MATCH_NOMATCH);
1428
0
            }
1429
663k
          cc = UCHAR21TEST(Feptr);
1430
663k
          if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1431
87.6k
          Feptr++;
1432
87.6k
          }
1433
        /* Control never gets here */
1434
664k
        }
1435
1436
5.09M
      else  /* Maximize */
1437
5.09M
        {
1438
5.09M
        Lstart_eptr = Feptr;
1439
5.13M
        for (i = Lmin; i < Lmax; i++)
1440
5.11M
          {
1441
5.11M
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1442
5.11M
          if (Feptr >= mb->end_subject)
1443
51.6k
            {
1444
51.6k
            SCHECK_PARTIAL();
1445
51.6k
            break;
1446
51.6k
            }
1447
5.06M
          cc = UCHAR21TEST(Feptr);
1448
5.06M
          if (Lc != cc && Loc != cc) break;
1449
40.6k
          Feptr++;
1450
40.6k
          }
1451
5.09M
        if (reptype != REPTYPE_POS) for (;;)
1452
653k
          {
1453
653k
          if (Feptr == Lstart_eptr) break;
1454
1.25k
          RMATCH(Fecode, RM26);
1455
1.25k
          Feptr--;
1456
1.25k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1457
1.25k
          }
1458
5.09M
        }
1459
5.75M
      }
1460
1461
    /* Caseful comparisons (includes all multi-byte characters) */
1462
1463
113M
    else
1464
113M
      {
1465
113M
      for (i = 1; i <= Lmin; i++)
1466
174k
        {
1467
174k
        if (Feptr >= mb->end_subject)
1468
2.37k
          {
1469
2.37k
          SCHECK_PARTIAL();
1470
2.37k
          RRETURN(MATCH_NOMATCH);
1471
0
          }
1472
172k
        if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1473
1.38k
        }
1474
1475
113M
      if (Lmin == Lmax) continue;
1476
1477
113M
      if (reptype == REPTYPE_MIN)
1478
1.50M
        {
1479
1.50M
        for (;;)
1480
1.52M
          {
1481
1.52M
          RMATCH(Fecode, RM27);
1482
1.52M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1483
1.52M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1484
1.50M
          if (Feptr >= mb->end_subject)
1485
7.25k
            {
1486
7.25k
            SCHECK_PARTIAL();
1487
7.25k
            RRETURN(MATCH_NOMATCH);
1488
0
            }
1489
1.49M
          if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1490
14.5k
          }
1491
        /* Control never gets here */
1492
1.50M
        }
1493
111M
      else  /* Maximize */
1494
111M
        {
1495
111M
        Lstart_eptr = Feptr;
1496
112M
        for (i = Lmin; i < Lmax; i++)
1497
111M
          {
1498
111M
          if (Feptr >= mb->end_subject)
1499
70.5k
            {
1500
70.5k
            SCHECK_PARTIAL();
1501
70.5k
            break;
1502
70.5k
            }
1503
1504
111M
          if (Lc != UCHAR21TEST(Feptr)) break;
1505
446k
          Feptr++;
1506
446k
          }
1507
1508
111M
        if (reptype != REPTYPE_POS) for (;;)
1509
106M
          {
1510
106M
          if (Feptr <= Lstart_eptr) break;
1511
412k
          RMATCH(Fecode, RM28);
1512
412k
          Feptr--;
1513
412k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1514
412k
          }
1515
111M
        }
1516
113M
      }
1517
116M
    break;
1518
1519
116M
#undef Loclength
1520
116M
#undef Lstart_eptr
1521
116M
#undef Lcharptr
1522
116M
#undef Lmin
1523
116M
#undef Lmax
1524
116M
#undef Lc
1525
116M
#undef Loc
1526
1527
1528
    /* ===================================================================== */
1529
    /* Match a negated single one-byte character repeatedly. This is almost a
1530
    repeat of the code for a repeated single character, but I haven't found a
1531
    nice way of commoning these up that doesn't require a test of the
1532
    positive/negative option for each character match. Maybe that wouldn't add
1533
    very much to the time taken, but character matching *is* what this is all
1534
    about... */
1535
1536
116M
#define Lstart_eptr  F->temp_sptr[0]
1537
116M
#define Lmin         F->temp_32[0]
1538
116M
#define Lmax         F->temp_32[1]
1539
116M
#define Lc           F->temp_32[2]
1540
116M
#define Loc          F->temp_32[3]
1541
1542
116M
    case OP_NOTEXACT:
1543
0
    case OP_NOTEXACTI:
1544
0
    Lmin = Lmax = GET2(Fecode, 1);
1545
0
    Fecode += 1 + IMM2_SIZE;
1546
0
    goto REPEATNOTCHAR;
1547
1548
0
    case OP_NOTUPTO:
1549
0
    case OP_NOTUPTOI:
1550
0
    Lmin = 0;
1551
0
    Lmax = GET2(Fecode, 1);
1552
0
    reptype = REPTYPE_MAX;
1553
0
    Fecode += 1 + IMM2_SIZE;
1554
0
    goto REPEATNOTCHAR;
1555
1556
0
    case OP_NOTMINUPTO:
1557
0
    case OP_NOTMINUPTOI:
1558
0
    Lmin = 0;
1559
0
    Lmax = GET2(Fecode, 1);
1560
0
    reptype = REPTYPE_MIN;
1561
0
    Fecode += 1 + IMM2_SIZE;
1562
0
    goto REPEATNOTCHAR;
1563
1564
0
    case OP_NOTPOSSTAR:
1565
0
    case OP_NOTPOSSTARI:
1566
0
    reptype = REPTYPE_POS;
1567
0
    Lmin = 0;
1568
0
    Lmax = UINT32_MAX;
1569
0
    Fecode++;
1570
0
    goto REPEATNOTCHAR;
1571
1572
366
    case OP_NOTPOSPLUS:
1573
8.64k
    case OP_NOTPOSPLUSI:
1574
8.64k
    reptype = REPTYPE_POS;
1575
8.64k
    Lmin = 1;
1576
8.64k
    Lmax = UINT32_MAX;
1577
8.64k
    Fecode++;
1578
8.64k
    goto REPEATNOTCHAR;
1579
1580
0
    case OP_NOTPOSQUERY:
1581
2
    case OP_NOTPOSQUERYI:
1582
2
    reptype = REPTYPE_POS;
1583
2
    Lmin = 0;
1584
2
    Lmax = 1;
1585
2
    Fecode++;
1586
2
    goto REPEATNOTCHAR;
1587
1588
0
    case OP_NOTPOSUPTO:
1589
0
    case OP_NOTPOSUPTOI:
1590
0
    reptype = REPTYPE_POS;
1591
0
    Lmin = 0;
1592
0
    Lmax = GET2(Fecode, 1);
1593
0
    Fecode += 1 + IMM2_SIZE;
1594
0
    goto REPEATNOTCHAR;
1595
1596
412
    case OP_NOTSTAR:
1597
412
    case OP_NOTSTARI:
1598
412
    case OP_NOTMINSTAR:
1599
412
    case OP_NOTMINSTARI:
1600
2.65k
    case OP_NOTPLUS:
1601
99.1k
    case OP_NOTPLUSI:
1602
100k
    case OP_NOTMINPLUS:
1603
116k
    case OP_NOTMINPLUSI:
1604
123k
    case OP_NOTQUERY:
1605
133k
    case OP_NOTQUERYI:
1606
133k
    case OP_NOTMINQUERY:
1607
144k
    case OP_NOTMINQUERYI:
1608
144k
    fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1609
144k
    Lmin = rep_min[fc];
1610
144k
    Lmax = rep_max[fc];
1611
144k
    reptype = rep_typ[fc];
1612
1613
    /* Common code for all repeated single-character non-matches. */
1614
1615
152k
    REPEATNOTCHAR:
1616
152k
    GETCHARINCTEST(Lc, Fecode);
1617
1618
    /* The code is duplicated for the caseless and caseful cases, for speed,
1619
    since matching characters is likely to be quite common. First, ensure the
1620
    minimum number of matches are present. If Lmin = Lmax, we are done.
1621
    Otherwise, if minimizing, keep trying the rest of the expression and
1622
    advancing one matching character if failing, up to the maximum.
1623
    Alternatively, if maximizing, find the maximum number of characters and
1624
    work backwards. */
1625
1626
152k
    if (Fop >= OP_NOTSTARI)     /* Caseless */
1627
141k
      {
1628
141k
#ifdef SUPPORT_UNICODE
1629
141k
      if ((utf || ucp) && Lc > 127)
1630
0
        Loc = UCD_OTHERCASE(Lc);
1631
141k
      else
1632
141k
#endif /* SUPPORT_UNICODE */
1633
1634
141k
      Loc = TABLE_GET(Lc, mb->fcc, Lc);  /* Other case from table */
1635
1636
141k
#ifdef SUPPORT_UNICODE
1637
141k
      if (utf)
1638
17.8k
        {
1639
17.8k
        uint32_t d;
1640
20.2k
        for (i = 1; i <= Lmin; i++)
1641
2.81k
          {
1642
2.81k
          if (Feptr >= mb->end_subject)
1643
220
            {
1644
220
            SCHECK_PARTIAL();
1645
220
            RRETURN(MATCH_NOMATCH);
1646
0
            }
1647
2.59k
          GETCHARINC(d, Feptr);
1648
2.59k
          if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1649
2.38k
          }
1650
17.8k
        }
1651
123k
      else
1652
123k
#endif  /* SUPPORT_UNICODE */
1653
1654
      /* Not UTF mode */
1655
123k
        {
1656
239k
        for (i = 1; i <= Lmin; i++)
1657
118k
          {
1658
118k
          if (Feptr >= mb->end_subject)
1659
357
            {
1660
357
            SCHECK_PARTIAL();
1661
357
            RRETURN(MATCH_NOMATCH);
1662
0
            }
1663
117k
          if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1664
115k
          Feptr++;
1665
115k
          }
1666
123k
        }
1667
1668
139k
      if (Lmin == Lmax) continue;  /* Finished for exact count */
1669
1670
139k
      if (reptype == REPTYPE_MIN)
1671
26.3k
        {
1672
26.3k
#ifdef SUPPORT_UNICODE
1673
26.3k
        if (utf)
1674
10.1k
          {
1675
10.1k
          uint32_t d;
1676
10.1k
          for (;;)
1677
25.8k
            {
1678
25.8k
            RMATCH(Fecode, RM204);
1679
25.8k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1680
25.8k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1681
19.0k
            if (Feptr >= mb->end_subject)
1682
2.52k
              {
1683
2.52k
              SCHECK_PARTIAL();
1684
2.52k
              RRETURN(MATCH_NOMATCH);
1685
0
              }
1686
16.5k
            GETCHARINC(d, Feptr);
1687
16.5k
            if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1688
15.6k
            }
1689
10.1k
          }
1690
16.1k
        else
1691
16.1k
#endif  /*SUPPORT_UNICODE */
1692
1693
        /* Not UTF mode */
1694
16.1k
          {
1695
16.1k
          for (;;)
1696
561k
            {
1697
561k
            RMATCH(Fecode, RM29);
1698
561k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1699
561k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1700
560k
            if (Feptr >= mb->end_subject)
1701
1.20k
              {
1702
1.20k
              SCHECK_PARTIAL();
1703
1.20k
              RRETURN(MATCH_NOMATCH);
1704
0
              }
1705
559k
            if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1706
544k
            Feptr++;
1707
544k
            }
1708
16.1k
          }
1709
        /* Control never gets here */
1710
26.3k
        }
1711
1712
      /* Maximize case */
1713
1714
112k
      else
1715
112k
        {
1716
112k
        Lstart_eptr = Feptr;
1717
1718
112k
#ifdef SUPPORT_UNICODE
1719
112k
        if (utf)
1720
7.22k
          {
1721
7.22k
          uint32_t d;
1722
24.0k
          for (i = Lmin; i < Lmax; i++)
1723
19.2k
            {
1724
19.2k
            int len = 1;
1725
19.2k
            if (Feptr >= mb->end_subject)
1726
1.63k
              {
1727
1.63k
              SCHECK_PARTIAL();
1728
1.63k
              break;
1729
1.63k
              }
1730
17.5k
            GETCHARLEN(d, Feptr, len);
1731
17.5k
            if (Lc == d || Loc == d) break;
1732
16.8k
            Feptr += len;
1733
16.8k
            }
1734
1735
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1736
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1737
          go too far. */
1738
1739
7.22k
          if (reptype != REPTYPE_POS) for(;;)
1740
24.0k
            {
1741
24.0k
            if (Feptr <= Lstart_eptr) break;
1742
16.8k
            RMATCH(Fecode, RM205);
1743
16.8k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1744
16.8k
            Feptr--;
1745
16.8k
            BACKCHAR(Feptr);
1746
16.8k
            }
1747
7.22k
          }
1748
105k
        else
1749
105k
#endif  /* SUPPORT_UNICODE */
1750
1751
        /* Not UTF mode */
1752
105k
          {
1753
5.39M
          for (i = Lmin; i < Lmax; i++)
1754
5.39M
            {
1755
5.39M
            if (Feptr >= mb->end_subject)
1756
14.5k
              {
1757
14.5k
              SCHECK_PARTIAL();
1758
14.5k
              break;
1759
14.5k
              }
1760
5.37M
            if (Lc == *Feptr || Loc == *Feptr) break;
1761
5.28M
            Feptr++;
1762
5.28M
            }
1763
105k
          if (reptype != REPTYPE_POS) for (;;)
1764
5.14M
            {
1765
5.14M
            if (Feptr == Lstart_eptr) break;
1766
5.05M
            RMATCH(Fecode, RM30);
1767
5.05M
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1768
5.05M
            Feptr--;
1769
5.05M
            }
1770
105k
          }
1771
112k
        }
1772
139k
      }
1773
1774
    /* Caseful comparisons */
1775
1776
11.1k
    else
1777
11.1k
      {
1778
11.1k
#ifdef SUPPORT_UNICODE
1779
11.1k
      if (utf)
1780
6.93k
        {
1781
6.93k
        uint32_t d;
1782
6.93k
        for (i = 1; i <= Lmin; i++)
1783
0
          {
1784
0
          if (Feptr >= mb->end_subject)
1785
0
            {
1786
0
            SCHECK_PARTIAL();
1787
0
            RRETURN(MATCH_NOMATCH);
1788
0
            }
1789
0
          GETCHARINC(d, Feptr);
1790
0
          if (Lc == d) RRETURN(MATCH_NOMATCH);
1791
0
          }
1792
6.93k
        }
1793
4.25k
      else
1794
4.25k
#endif
1795
      /* Not UTF mode */
1796
4.25k
        {
1797
7.50k
        for (i = 1; i <= Lmin; i++)
1798
3.51k
          {
1799
3.51k
          if (Feptr >= mb->end_subject)
1800
169
            {
1801
169
            SCHECK_PARTIAL();
1802
169
            RRETURN(MATCH_NOMATCH);
1803
0
            }
1804
3.34k
          if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1805
3.24k
          }
1806
4.25k
        }
1807
1808
10.9k
      if (Lmin == Lmax) continue;
1809
1810
10.9k
      if (reptype == REPTYPE_MIN)
1811
982
        {
1812
982
#ifdef SUPPORT_UNICODE
1813
982
        if (utf)
1814
250
          {
1815
250
          uint32_t d;
1816
250
          for (;;)
1817
481
            {
1818
481
            RMATCH(Fecode, RM206);
1819
481
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1820
481
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1821
250
            if (Feptr >= mb->end_subject)
1822
9
              {
1823
9
              SCHECK_PARTIAL();
1824
9
              RRETURN(MATCH_NOMATCH);
1825
0
              }
1826
241
            GETCHARINC(d, Feptr);
1827
241
            if (Lc == d) RRETURN(MATCH_NOMATCH);
1828
231
            }
1829
250
          }
1830
732
        else
1831
732
#endif
1832
        /* Not UTF mode */
1833
732
          {
1834
732
          for (;;)
1835
90.1k
            {
1836
90.1k
            RMATCH(Fecode, RM31);
1837
90.1k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1838
90.1k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1839
90.1k
            if (Feptr >= mb->end_subject)
1840
572
              {
1841
572
              SCHECK_PARTIAL();
1842
572
              RRETURN(MATCH_NOMATCH);
1843
0
              }
1844
89.5k
            if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1845
89.4k
            }
1846
732
          }
1847
        /* Control never gets here */
1848
982
        }
1849
1850
      /* Maximize case */
1851
1852
9.94k
      else
1853
9.94k
        {
1854
9.94k
        Lstart_eptr = Feptr;
1855
1856
9.94k
#ifdef SUPPORT_UNICODE
1857
9.94k
        if (utf)
1858
6.68k
          {
1859
6.68k
          uint32_t d;
1860
13.1k
          for (i = Lmin; i < Lmax; i++)
1861
6.68k
            {
1862
6.68k
            int len = 1;
1863
6.68k
            if (Feptr >= mb->end_subject)
1864
0
              {
1865
0
              SCHECK_PARTIAL();
1866
0
              break;
1867
0
              }
1868
6.68k
            GETCHARLEN(d, Feptr, len);
1869
6.68k
            if (Lc == d) break;
1870
6.45k
            Feptr += len;
1871
6.45k
            }
1872
1873
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1874
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1875
          go too far. */
1876
1877
6.68k
          if (reptype != REPTYPE_POS) for(;;)
1878
13.1k
            {
1879
13.1k
            if (Feptr <= Lstart_eptr) break;
1880
6.45k
            RMATCH(Fecode, RM207);
1881
6.45k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1882
6.45k
            Feptr--;
1883
6.45k
            BACKCHAR(Feptr);
1884
6.45k
            }
1885
6.68k
          }
1886
3.25k
        else
1887
3.25k
#endif
1888
        /* Not UTF mode */
1889
3.25k
          {
1890
97.4k
          for (i = Lmin; i < Lmax; i++)
1891
97.2k
            {
1892
97.2k
            if (Feptr >= mb->end_subject)
1893
1.32k
              {
1894
1.32k
              SCHECK_PARTIAL();
1895
1.32k
              break;
1896
1.32k
              }
1897
95.8k
            if (Lc == *Feptr) break;
1898
94.2k
            Feptr++;
1899
94.2k
            }
1900
3.25k
          if (reptype != REPTYPE_POS) for (;;)
1901
94.4k
            {
1902
94.4k
            if (Feptr == Lstart_eptr) break;
1903
91.5k
            RMATCH(Fecode, RM32);
1904
91.5k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1905
91.5k
            Feptr--;
1906
91.5k
            }
1907
3.25k
          }
1908
9.94k
        }
1909
10.9k
      }
1910
122k
    break;
1911
1912
122k
#undef Lstart_eptr
1913
122k
#undef Lmin
1914
122k
#undef Lmax
1915
122k
#undef Lc
1916
122k
#undef Loc
1917
1918
1919
    /* ===================================================================== */
1920
    /* Match a bit-mapped character class, possibly repeatedly. These opcodes
1921
    are used when all the characters in the class have values in the range
1922
    0-255, and either the matching is caseful, or the characters are in the
1923
    range 0-127 when UTF processing is enabled. The only difference between
1924
    OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1925
    encountered. */
1926
1927
1.85M
#define Lmin               F->temp_32[0]
1928
1.81M
#define Lmax               F->temp_32[1]
1929
690k
#define Lstart_eptr        F->temp_sptr[0]
1930
1.50M
#define Lbyte_map_address  F->temp_sptr[1]
1931
1.06M
#define Lbyte_map          ((unsigned char *)Lbyte_map_address)
1932
1933
122k
    case OP_NCLASS:
1934
436k
    case OP_CLASS:
1935
436k
      {
1936
436k
      Lbyte_map_address = Fecode + 1;           /* Save for matching */
1937
436k
      Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
1938
1939
      /* Look past the end of the item to see if there is repeat information
1940
      following. Then obey similar code to character type repeats. */
1941
1942
436k
      switch (*Fecode)
1943
436k
        {
1944
28.8k
        case OP_CRSTAR:
1945
48.2k
        case OP_CRMINSTAR:
1946
70.4k
        case OP_CRPLUS:
1947
71.8k
        case OP_CRMINPLUS:
1948
90.4k
        case OP_CRQUERY:
1949
145k
        case OP_CRMINQUERY:
1950
299k
        case OP_CRPOSSTAR:
1951
313k
        case OP_CRPOSPLUS:
1952
322k
        case OP_CRPOSQUERY:
1953
322k
        fc = *Fecode++ - OP_CRSTAR;
1954
322k
        Lmin = rep_min[fc];
1955
322k
        Lmax = rep_max[fc];
1956
322k
        reptype = rep_typ[fc];
1957
322k
        break;
1958
1959
0
        case OP_CRRANGE:
1960
0
        case OP_CRMINRANGE:
1961
0
        case OP_CRPOSRANGE:
1962
0
        Lmin = GET2(Fecode, 1);
1963
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
1964
0
        if (Lmax == 0) Lmax = UINT32_MAX;       /* Max 0 => infinity */
1965
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
1966
0
        Fecode += 1 + 2 * IMM2_SIZE;
1967
0
        break;
1968
1969
114k
        default:               /* No repeat follows */
1970
114k
        Lmin = Lmax = 1;
1971
114k
        break;
1972
436k
        }
1973
1974
      /* First, ensure the minimum number of matches are present. */
1975
1976
436k
#ifdef SUPPORT_UNICODE
1977
436k
      if (utf)
1978
116k
        {
1979
168k
        for (i = 1; i <= Lmin; i++)
1980
91.9k
          {
1981
91.9k
          if (Feptr >= mb->end_subject)
1982
765
            {
1983
765
            SCHECK_PARTIAL();
1984
765
            RRETURN(MATCH_NOMATCH);
1985
0
            }
1986
91.1k
          GETCHARINC(fc, Feptr);
1987
91.1k
          if (fc > 255)
1988
1.98k
            {
1989
1.98k
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1990
1.38k
            }
1991
89.1k
          else
1992
89.1k
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1993
51.8k
          }
1994
116k
        }
1995
319k
      else
1996
319k
#endif
1997
      /* Not UTF mode */
1998
319k
        {
1999
357k
        for (i = 1; i <= Lmin; i++)
2000
59.3k
          {
2001
59.3k
          if (Feptr >= mb->end_subject)
2002
4.48k
            {
2003
4.48k
            SCHECK_PARTIAL();
2004
4.48k
            RRETURN(MATCH_NOMATCH);
2005
0
            }
2006
54.9k
          fc = *Feptr++;
2007
#if PCRE2_CODE_UNIT_WIDTH != 8
2008
          if (fc > 255)
2009
            {
2010
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2011
            }
2012
          else
2013
#endif
2014
54.9k
          if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2015
37.7k
          }
2016
319k
        }
2017
2018
      /* If Lmax == Lmin we are done. Continue with main loop. */
2019
2020
375k
      if (Lmin == Lmax) continue;
2021
2022
      /* If minimizing, keep testing the rest of the expression and advancing
2023
      the pointer while it matches the class. */
2024
2025
307k
      if (reptype == REPTYPE_MIN)
2026
75.5k
        {
2027
75.5k
#ifdef SUPPORT_UNICODE
2028
75.5k
        if (utf)
2029
17.1k
          {
2030
17.1k
          for (;;)
2031
50.3k
            {
2032
50.3k
            RMATCH(Fecode, RM200);
2033
50.3k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2034
50.3k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2035
42.1k
            if (Feptr >= mb->end_subject)
2036
1.40k
              {
2037
1.40k
              SCHECK_PARTIAL();
2038
1.40k
              RRETURN(MATCH_NOMATCH);
2039
0
              }
2040
40.7k
            GETCHARINC(fc, Feptr);
2041
40.7k
            if (fc > 255)
2042
1.31k
              {
2043
1.31k
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2044
329
              }
2045
39.4k
            else
2046
39.4k
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2047
33.2k
            }
2048
17.1k
          }
2049
58.4k
        else
2050
58.4k
#endif
2051
        /* Not UTF mode */
2052
58.4k
          {
2053
58.4k
          for (;;)
2054
235k
            {
2055
235k
            RMATCH(Fecode, RM23);
2056
235k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2057
235k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2058
201k
            if (Feptr >= mb->end_subject)
2059
4.49k
              {
2060
4.49k
              SCHECK_PARTIAL();
2061
4.49k
              RRETURN(MATCH_NOMATCH);
2062
0
              }
2063
196k
            fc = *Feptr++;
2064
#if PCRE2_CODE_UNIT_WIDTH != 8
2065
            if (fc > 255)
2066
              {
2067
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2068
              }
2069
            else
2070
#endif
2071
196k
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2072
177k
            }
2073
58.4k
          }
2074
        /* Control never gets here */
2075
75.5k
        }
2076
2077
      /* If maximizing, find the longest possible run, then work backwards. */
2078
2079
231k
      else
2080
231k
        {
2081
231k
        Lstart_eptr = Feptr;
2082
2083
231k
#ifdef SUPPORT_UNICODE
2084
231k
        if (utf)
2085
15.9k
          {
2086
129k
          for (i = Lmin; i < Lmax; i++)
2087
122k
            {
2088
122k
            int len = 1;
2089
122k
            if (Feptr >= mb->end_subject)
2090
2.15k
              {
2091
2.15k
              SCHECK_PARTIAL();
2092
2.15k
              break;
2093
2.15k
              }
2094
120k
            GETCHARLEN(fc, Feptr, len);
2095
120k
            if (fc > 255)
2096
3.10k
              {
2097
3.10k
              if (Fop == OP_CLASS) break;
2098
3.10k
              }
2099
116k
            else
2100
116k
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2101
113k
            Feptr += len;
2102
113k
            }
2103
2104
15.9k
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2105
2106
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2107
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2108
          go too far. */
2109
2110
13.8k
          for (;;)
2111
120k
            {
2112
120k
            RMATCH(Fecode, RM201);
2113
120k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2114
120k
            if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2115
106k
            BACKCHAR(Feptr);
2116
106k
            }
2117
13.8k
          }
2118
215k
        else
2119
215k
#endif
2120
          /* Not UTF mode */
2121
215k
          {
2122
586k
          for (i = Lmin; i < Lmax; i++)
2123
582k
            {
2124
582k
            if (Feptr >= mb->end_subject)
2125
10.0k
              {
2126
10.0k
              SCHECK_PARTIAL();
2127
10.0k
              break;
2128
10.0k
              }
2129
572k
            fc = *Feptr;
2130
#if PCRE2_CODE_UNIT_WIDTH != 8
2131
            if (fc > 255)
2132
              {
2133
              if (Fop == OP_CLASS) break;
2134
              }
2135
            else
2136
#endif
2137
572k
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2138
370k
            Feptr++;
2139
370k
            }
2140
2141
215k
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2142
2143
337k
          while (Feptr >= Lstart_eptr)
2144
288k
            {
2145
288k
            RMATCH(Fecode, RM24);
2146
288k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2147
288k
            Feptr--;
2148
288k
            }
2149
49.6k
          }
2150
2151
231k
        RRETURN(MATCH_NOMATCH);
2152
0
        }
2153
307k
      }
2154
    /* Control never gets here */
2155
2156
0
#undef Lbyte_map_address
2157
0
#undef Lbyte_map
2158
0
#undef Lstart_eptr
2159
0
#undef Lmin
2160
0
#undef Lmax
2161
2162
2163
    /* ===================================================================== */
2164
    /* Match an extended character class. In the 8-bit library, this opcode is
2165
    encountered only when UTF-8 mode mode is supported. In the 16-bit and
2166
    32-bit libraries, codepoints greater than 255 may be encountered even when
2167
    UTF is not supported. */
2168
2169
332k
#define Lstart_eptr  F->temp_sptr[0]
2170
578k
#define Lxclass_data F->temp_sptr[1]
2171
498k
#define Lmin         F->temp_32[0]
2172
653k
#define Lmax         F->temp_32[1]
2173
2174
0
#ifdef SUPPORT_WIDE_CHARS
2175
109k
    case OP_XCLASS:
2176
109k
      {
2177
109k
      Lxclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2178
109k
      Fecode += GET(Fecode, 1);               /* Advance past the item */
2179
2180
109k
      switch (*Fecode)
2181
109k
        {
2182
6.99k
        case OP_CRSTAR:
2183
23.1k
        case OP_CRMINSTAR:
2184
37.0k
        case OP_CRPLUS:
2185
38.3k
        case OP_CRMINPLUS:
2186
41.1k
        case OP_CRQUERY:
2187
61.1k
        case OP_CRMINQUERY:
2188
76.8k
        case OP_CRPOSSTAR:
2189
79.3k
        case OP_CRPOSPLUS:
2190
83.9k
        case OP_CRPOSQUERY:
2191
83.9k
        fc = *Fecode++ - OP_CRSTAR;
2192
83.9k
        Lmin = rep_min[fc];
2193
83.9k
        Lmax = rep_max[fc];
2194
83.9k
        reptype = rep_typ[fc];
2195
83.9k
        break;
2196
2197
0
        case OP_CRRANGE:
2198
0
        case OP_CRMINRANGE:
2199
0
        case OP_CRPOSRANGE:
2200
0
        Lmin = GET2(Fecode, 1);
2201
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2202
0
        if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2203
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
2204
0
        Fecode += 1 + 2 * IMM2_SIZE;
2205
0
        break;
2206
2207
25.2k
        default:               /* No repeat follows */
2208
25.2k
        Lmin = Lmax = 1;
2209
25.2k
        break;
2210
109k
        }
2211
2212
      /* First, ensure the minimum number of matches are present. */
2213
2214
137k
      for (i = 1; i <= Lmin; i++)
2215
42.8k
        {
2216
42.8k
        if (Feptr >= mb->end_subject)
2217
554
          {
2218
554
          SCHECK_PARTIAL();
2219
554
          RRETURN(MATCH_NOMATCH);
2220
0
          }
2221
42.3k
        GETCHARINCTEST(fc, Feptr);
2222
42.3k
        if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2223
27.9k
        }
2224
2225
      /* If Lmax == Lmin we can just continue with the main loop. */
2226
2227
94.3k
      if (Lmin == Lmax) continue;
2228
2229
      /* If minimizing, keep testing the rest of the expression and advancing
2230
      the pointer while it matches the class. */
2231
2232
78.5k
      if (reptype == REPTYPE_MIN)
2233
37.3k
        {
2234
37.3k
        for (;;)
2235
116k
          {
2236
116k
          RMATCH(Fecode, RM100);
2237
116k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2238
116k
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2239
100k
          if (Feptr >= mb->end_subject)
2240
331
            {
2241
331
            SCHECK_PARTIAL();
2242
331
            RRETURN(MATCH_NOMATCH);
2243
0
            }
2244
100k
          GETCHARINCTEST(fc, Feptr);
2245
100k
          if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2246
79.2k
          }
2247
        /* Control never gets here */
2248
37.3k
        }
2249
2250
      /* If maximizing, find the longest possible run, then work backwards. */
2251
2252
41.1k
      else
2253
41.1k
        {
2254
41.1k
        Lstart_eptr = Feptr;
2255
333k
        for (i = Lmin; i < Lmax; i++)
2256
331k
          {
2257
331k
          int len = 1;
2258
331k
          if (Feptr >= mb->end_subject)
2259
5.52k
            {
2260
5.52k
            SCHECK_PARTIAL();
2261
5.52k
            break;
2262
5.52k
            }
2263
326k
#ifdef SUPPORT_UNICODE
2264
326k
          GETCHARLENTEST(fc, Feptr, len);
2265
#else
2266
          fc = *Feptr;
2267
#endif
2268
326k
          if (!PRIV(xclass)(fc, Lxclass_data, utf)) break;
2269
292k
          Feptr += len;
2270
292k
          }
2271
2272
41.1k
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2273
2274
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2275
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2276
        go too far. */
2277
2278
20.0k
        for(;;)
2279
291k
          {
2280
291k
          RMATCH(Fecode, RM101);
2281
291k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2282
291k
          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2283
271k
#ifdef SUPPORT_UNICODE
2284
271k
          if (utf) BACKCHAR(Feptr);
2285
271k
#endif
2286
271k
          }
2287
20.0k
        RRETURN(MATCH_NOMATCH);
2288
0
        }
2289
2290
      /* Control never gets here */
2291
78.5k
      }
2292
0
#endif  /* SUPPORT_WIDE_CHARS: end of XCLASS */
2293
2294
0
#undef Lstart_eptr
2295
0
#undef Lxclass_data
2296
0
#undef Lmin
2297
0
#undef Lmax
2298
2299
2300
    /* ===================================================================== */
2301
    /* Match various character types when PCRE2_UCP is not set. These opcodes
2302
    are not generated when PCRE2_UCP is set - instead appropriate property
2303
    tests are compiled. */
2304
2305
374k
    case OP_NOT_DIGIT:
2306
374k
    if (Feptr >= mb->end_subject)
2307
3.62k
      {
2308
3.62k
      SCHECK_PARTIAL();
2309
3.62k
      RRETURN(MATCH_NOMATCH);
2310
0
      }
2311
370k
    GETCHARINCTEST(fc, Feptr);
2312
370k
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
2313
322k
      RRETURN(MATCH_NOMATCH);
2314
322k
    Fecode++;
2315
322k
    break;
2316
2317
22.7k
    case OP_DIGIT:
2318
22.7k
    if (Feptr >= mb->end_subject)
2319
825
      {
2320
825
      SCHECK_PARTIAL();
2321
825
      RRETURN(MATCH_NOMATCH);
2322
0
      }
2323
21.8k
    GETCHARINCTEST(fc, Feptr);
2324
21.8k
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
2325
18.6k
      RRETURN(MATCH_NOMATCH);
2326
3.22k
    Fecode++;
2327
3.22k
    break;
2328
2329
967k
    case OP_NOT_WHITESPACE:
2330
967k
    if (Feptr >= mb->end_subject)
2331
12.3k
      {
2332
12.3k
      SCHECK_PARTIAL();
2333
12.3k
      RRETURN(MATCH_NOMATCH);
2334
0
      }
2335
955k
    GETCHARINCTEST(fc, Feptr);
2336
955k
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
2337
936k
      RRETURN(MATCH_NOMATCH);
2338
936k
    Fecode++;
2339
936k
    break;
2340
2341
21.9k
    case OP_WHITESPACE:
2342
21.9k
    if (Feptr >= mb->end_subject)
2343
473
      {
2344
473
      SCHECK_PARTIAL();
2345
473
      RRETURN(MATCH_NOMATCH);
2346
0
      }
2347
21.5k
    GETCHARINCTEST(fc, Feptr);
2348
21.5k
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
2349
18.8k
      RRETURN(MATCH_NOMATCH);
2350
2.63k
    Fecode++;
2351
2.63k
    break;
2352
2353
321k
    case OP_NOT_WORDCHAR:
2354
321k
    if (Feptr >= mb->end_subject)
2355
282
      {
2356
282
      SCHECK_PARTIAL();
2357
282
      RRETURN(MATCH_NOMATCH);
2358
0
      }
2359
320k
    GETCHARINCTEST(fc, Feptr);
2360
320k
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
2361
193k
      RRETURN(MATCH_NOMATCH);
2362
126k
    Fecode++;
2363
126k
    break;
2364
2365
229M
    case OP_WORDCHAR:
2366
229M
    if (Feptr >= mb->end_subject)
2367
1.05k
      {
2368
1.05k
      SCHECK_PARTIAL();
2369
1.05k
      RRETURN(MATCH_NOMATCH);
2370
0
      }
2371
229M
    GETCHARINCTEST(fc, Feptr);
2372
229M
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
2373
202M
      RRETURN(MATCH_NOMATCH);
2374
26.3M
    Fecode++;
2375
26.3M
    break;
2376
2377
99.5M
    case OP_ANYNL:
2378
99.5M
    if (Feptr >= mb->end_subject)
2379
1.41M
      {
2380
1.41M
      SCHECK_PARTIAL();
2381
1.41M
      RRETURN(MATCH_NOMATCH);
2382
0
      }
2383
98.1M
    GETCHARINCTEST(fc, Feptr);
2384
98.1M
    switch(fc)
2385
98.1M
      {
2386
92.7M
      default: RRETURN(MATCH_NOMATCH);
2387
2388
71.0k
      case CHAR_CR:
2389
71.0k
      if (Feptr >= mb->end_subject)
2390
766
        {
2391
766
        SCHECK_PARTIAL();
2392
766
        }
2393
70.2k
      else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
2394
71.0k
      break;
2395
2396
2.99M
      case CHAR_LF:
2397
2.99M
      break;
2398
2399
1.92M
      case CHAR_VT:
2400
2.34M
      case CHAR_FF:
2401
2.36M
      case CHAR_NEL:
2402
2.36M
#ifndef EBCDIC
2403
2.36M
      case 0x2028:
2404
2.36M
      case 0x2029:
2405
2.36M
#endif  /* Not EBCDIC */
2406
2.36M
      if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2407
2.36M
      break;
2408
98.1M
      }
2409
5.43M
    Fecode++;
2410
5.43M
    break;
2411
2412
19.6M
    case OP_NOT_HSPACE:
2413
19.6M
    if (Feptr >= mb->end_subject)
2414
649k
      {
2415
649k
      SCHECK_PARTIAL();
2416
649k
      RRETURN(MATCH_NOMATCH);
2417
0
      }
2418
19.0M
    GETCHARINCTEST(fc, Feptr);
2419
19.0M
    switch(fc)
2420
19.0M
      {
2421
247k
      HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2422
18.7M
      default: break;
2423
19.0M
      }
2424
18.7M
    Fecode++;
2425
18.7M
    break;
2426
2427
106k
    case OP_HSPACE:
2428
106k
    if (Feptr >= mb->end_subject)
2429
3.42k
      {
2430
3.42k
      SCHECK_PARTIAL();
2431
3.42k
      RRETURN(MATCH_NOMATCH);
2432
0
      }
2433
103k
    GETCHARINCTEST(fc, Feptr);
2434
103k
    switch(fc)
2435
103k
      {
2436
3.91k
      HSPACE_CASES: break;  /* Byte and multibyte cases */
2437
99.3k
      default: RRETURN(MATCH_NOMATCH);
2438
103k
      }
2439
3.91k
    Fecode++;
2440
3.91k
    break;
2441
2442
103M
    case OP_NOT_VSPACE:
2443
103M
    if (Feptr >= mb->end_subject)
2444
2.92k
      {
2445
2.92k
      SCHECK_PARTIAL();
2446
2.92k
      RRETURN(MATCH_NOMATCH);
2447
0
      }
2448
103M
    GETCHARINCTEST(fc, Feptr);
2449
103M
    switch(fc)
2450
103M
      {
2451
610k
      VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2452
103M
      default: break;
2453
103M
      }
2454
103M
    Fecode++;
2455
103M
    break;
2456
2457
2.14M
    case OP_VSPACE:
2458
2.14M
    if (Feptr >= mb->end_subject)
2459
14.1k
      {
2460
14.1k
      SCHECK_PARTIAL();
2461
14.1k
      RRETURN(MATCH_NOMATCH);
2462
0
      }
2463
2.12M
    GETCHARINCTEST(fc, Feptr);
2464
2.12M
    switch(fc)
2465
2.12M
      {
2466
101k
      VSPACE_CASES: break;
2467
2.02M
      default: RRETURN(MATCH_NOMATCH);
2468
2.12M
      }
2469
101k
    Fecode++;
2470
101k
    break;
2471
2472
2473
0
#ifdef SUPPORT_UNICODE
2474
2475
    /* ===================================================================== */
2476
    /* Check the next character by Unicode property. We will get here only
2477
    if the support is in the binary; otherwise a compile-time error occurs. */
2478
2479
233k
    case OP_PROP:
2480
278k
    case OP_NOTPROP:
2481
278k
    if (Feptr >= mb->end_subject)
2482
5.72k
      {
2483
5.72k
      SCHECK_PARTIAL();
2484
5.72k
      RRETURN(MATCH_NOMATCH);
2485
0
      }
2486
272k
    GETCHARINCTEST(fc, Feptr);
2487
272k
      {
2488
272k
      const uint32_t *cp;
2489
272k
      uint32_t chartype;
2490
272k
      const ucd_record *prop = GET_UCD(fc);
2491
272k
      BOOL notmatch = Fop == OP_NOTPROP;
2492
2493
272k
      switch(Fecode[1])
2494
272k
        {
2495
0
        case PT_ANY:
2496
0
        if (notmatch) RRETURN(MATCH_NOMATCH);
2497
0
        break;
2498
2499
0
        case PT_LAMP:
2500
0
        chartype = prop->chartype;
2501
0
        if ((chartype == ucp_Lu ||
2502
0
             chartype == ucp_Ll ||
2503
0
             chartype == ucp_Lt) == notmatch)
2504
0
          RRETURN(MATCH_NOMATCH);
2505
0
        break;
2506
2507
4.45k
        case PT_GC:
2508
4.45k
        if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch)
2509
3.45k
          RRETURN(MATCH_NOMATCH);
2510
3.45k
        break;
2511
2512
113k
        case PT_PC:
2513
113k
        if ((Fecode[2] == prop->chartype) == notmatch)
2514
110k
          RRETURN(MATCH_NOMATCH);
2515
3.81k
        break;
2516
2517
0
        case PT_SC:
2518
0
        if ((Fecode[2] == prop->script) == notmatch)
2519
0
          RRETURN(MATCH_NOMATCH);
2520
0
        break;
2521
2522
0
        case PT_SCX:
2523
0
          {
2524
0
          BOOL ok = (Fecode[2] == prop->script ||
2525
0
                     MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
2526
0
          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2527
0
          }
2528
0
        break;
2529
2530
        /* These are specials */
2531
2532
0
        case PT_ALNUM:
2533
0
        chartype = prop->chartype;
2534
0
        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2535
0
             PRIV(ucp_gentype)[chartype] == ucp_N) == notmatch)
2536
0
          RRETURN(MATCH_NOMATCH);
2537
0
        break;
2538
2539
        /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2540
        which means that Perl space and POSIX space are now identical. PCRE
2541
        was changed at release 8.34. */
2542
2543
30.7k
        case PT_SPACE:    /* Perl space */
2544
30.7k
        case PT_PXSPACE:  /* POSIX space */
2545
30.7k
        switch(fc)
2546
30.7k
          {
2547
40.2k
          HSPACE_CASES:
2548
40.2k
          VSPACE_CASES:
2549
35.9k
          if (notmatch) RRETURN(MATCH_NOMATCH);
2550
24
          break;
2551
2552
25.2k
          default:
2553
25.2k
          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch)
2554
24.5k
            RRETURN(MATCH_NOMATCH);
2555
24.5k
          break;
2556
30.7k
          }
2557
24.5k
        break;
2558
2559
70.8k
        case PT_WORD:
2560
70.8k
        chartype = prop->chartype;
2561
70.8k
        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2562
50.9k
             PRIV(ucp_gentype)[chartype] == ucp_N ||
2563
49.4k
             chartype == ucp_Mn ||
2564
49.4k
             chartype == ucp_Pc) == notmatch)
2565
49.7k
          RRETURN(MATCH_NOMATCH);
2566
21.0k
        break;
2567
2568
52.7k
        case PT_CLIST:
2569
#if PCRE2_CODE_UNIT_WIDTH == 32
2570
            if (fc > MAX_UTF_CODE_POINT)
2571
              {
2572
              if (notmatch) break;;
2573
              RRETURN(MATCH_NOMATCH);
2574
              }
2575
#endif
2576
52.7k
        cp = PRIV(ucd_caseless_sets) + Fecode[2];
2577
52.7k
        for (;;)
2578
69.7k
          {
2579
69.7k
          if (fc < *cp)
2580
50.6k
            { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } }
2581
19.1k
          if (fc == *cp++)
2582
2.12k
            { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; }
2583
19.1k
          }
2584
3.55k
        break;
2585
2586
3.55k
        case PT_UCNC:
2587
0
        if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2588
0
             fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2589
0
             fc >= 0xe000) == notmatch)
2590
0
          RRETURN(MATCH_NOMATCH);
2591
0
        break;
2592
2593
0
        case PT_BIDICL:
2594
0
        if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
2595
0
          RRETURN(MATCH_NOMATCH);
2596
0
        break;
2597
2598
0
        case PT_BOOL:
2599
0
          {
2600
0
          BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2601
0
            UCD_BPROPS_PROP(prop), Fecode[2]) != 0;
2602
0
          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2603
0
          }
2604
0
        break;
2605
2606
        /* This should never occur */
2607
2608
0
        default:
2609
0
        return PCRE2_ERROR_INTERNAL;
2610
272k
        }
2611
2612
56.4k
      Fecode += 3;
2613
56.4k
      }
2614
0
    break;
2615
2616
2617
    /* ===================================================================== */
2618
    /* Match an extended Unicode sequence. We will get here only if the support
2619
    is in the binary; otherwise a compile-time error occurs. */
2620
2621
34.4k
    case OP_EXTUNI:
2622
34.4k
    if (Feptr >= mb->end_subject)
2623
1.47k
      {
2624
1.47k
      SCHECK_PARTIAL();
2625
1.47k
      RRETURN(MATCH_NOMATCH);
2626
0
      }
2627
33.0k
    else
2628
33.0k
      {
2629
33.0k
      GETCHARINCTEST(fc, Feptr);
2630
33.0k
      Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
2631
33.0k
        NULL);
2632
33.0k
      }
2633
33.0k
    CHECK_PARTIAL();
2634
33.0k
    Fecode++;
2635
33.0k
    break;
2636
2637
0
#endif  /* SUPPORT_UNICODE */
2638
2639
2640
    /* ===================================================================== */
2641
    /* Match a single character type repeatedly. Note that the property type
2642
    does not need to be in a stack frame as it is not used within an RMATCH()
2643
    loop. */
2644
2645
1.12G
#define Lstart_eptr  F->temp_sptr[0]
2646
612M
#define Lmin         F->temp_32[0]
2647
803M
#define Lmax         F->temp_32[1]
2648
1.19G
#define Lctype       F->temp_32[2]
2649
10.8M
#define Lpropvalue   F->temp_32[3]
2650
2651
0
    case OP_TYPEEXACT:
2652
0
    Lmin = Lmax = GET2(Fecode, 1);
2653
0
    Fecode += 1 + IMM2_SIZE;
2654
0
    goto REPEATTYPE;
2655
2656
0
    case OP_TYPEUPTO:
2657
0
    case OP_TYPEMINUPTO:
2658
0
    Lmin = 0;
2659
0
    Lmax = GET2(Fecode, 1);
2660
0
    reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
2661
0
    Fecode += 1 + IMM2_SIZE;
2662
0
    goto REPEATTYPE;
2663
2664
3.89M
    case OP_TYPEPOSSTAR:
2665
3.89M
    reptype = REPTYPE_POS;
2666
3.89M
    Lmin = 0;
2667
3.89M
    Lmax = UINT32_MAX;
2668
3.89M
    Fecode++;
2669
3.89M
    goto REPEATTYPE;
2670
2671
6.13M
    case OP_TYPEPOSPLUS:
2672
6.13M
    reptype = REPTYPE_POS;
2673
6.13M
    Lmin = 1;
2674
6.13M
    Lmax = UINT32_MAX;
2675
6.13M
    Fecode++;
2676
6.13M
    goto REPEATTYPE;
2677
2678
6.39M
    case OP_TYPEPOSQUERY:
2679
6.39M
    reptype = REPTYPE_POS;
2680
6.39M
    Lmin = 0;
2681
6.39M
    Lmax = 1;
2682
6.39M
    Fecode++;
2683
6.39M
    goto REPEATTYPE;
2684
2685
0
    case OP_TYPEPOSUPTO:
2686
0
    reptype = REPTYPE_POS;
2687
0
    Lmin = 0;
2688
0
    Lmax = GET2(Fecode, 1);
2689
0
    Fecode += 1 + IMM2_SIZE;
2690
0
    goto REPEATTYPE;
2691
2692
113k
    case OP_TYPESTAR:
2693
114k
    case OP_TYPEMINSTAR:
2694
6.79M
    case OP_TYPEPLUS:
2695
7.41M
    case OP_TYPEMINPLUS:
2696
119M
    case OP_TYPEQUERY:
2697
125M
    case OP_TYPEMINQUERY:
2698
125M
    fc = *Fecode++ - OP_TYPESTAR;
2699
125M
    Lmin = rep_min[fc];
2700
125M
    Lmax = rep_max[fc];
2701
125M
    reptype = rep_typ[fc];
2702
2703
    /* Common code for all repeated character type matches. */
2704
2705
141M
    REPEATTYPE:
2706
141M
    Lctype = *Fecode++;      /* Code for the character type */
2707
2708
141M
#ifdef SUPPORT_UNICODE
2709
141M
    if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
2710
10.6M
      {
2711
10.6M
      proptype = *Fecode++;
2712
10.6M
      Lpropvalue = *Fecode++;
2713
10.6M
      }
2714
131M
    else proptype = -1;
2715
141M
#endif
2716
2717
    /* First, ensure the minimum number of matches are present. Use inline
2718
    code for maximizing the speed, and do the type test once at the start
2719
    (i.e. keep it out of the loops). As there are no calls to RMATCH in the
2720
    loops, we can use an ordinary variable for "notmatch". The code for UTF
2721
    mode is separated out for tidiness, except for Unicode property tests. */
2722
2723
141M
    if (Lmin > 0)
2724
13.4M
      {
2725
13.4M
#ifdef SUPPORT_UNICODE
2726
13.4M
      if (proptype >= 0)  /* Property tests in all modes */
2727
1.98M
        {
2728
1.98M
        BOOL notmatch = Lctype == OP_NOTPROP;
2729
1.98M
        switch(proptype)
2730
1.98M
          {
2731
0
          case PT_ANY:
2732
0
          if (notmatch) RRETURN(MATCH_NOMATCH);
2733
0
          for (i = 1; i <= Lmin; i++)
2734
0
            {
2735
0
            if (Feptr >= mb->end_subject)
2736
0
              {
2737
0
              SCHECK_PARTIAL();
2738
0
              RRETURN(MATCH_NOMATCH);
2739
0
              }
2740
0
            GETCHARINCTEST(fc, Feptr);
2741
0
            }
2742
0
          break;
2743
2744
0
          case PT_LAMP:
2745
0
          for (i = 1; i <= Lmin; i++)
2746
0
            {
2747
0
            int chartype;
2748
0
            if (Feptr >= mb->end_subject)
2749
0
              {
2750
0
              SCHECK_PARTIAL();
2751
0
              RRETURN(MATCH_NOMATCH);
2752
0
              }
2753
0
            GETCHARINCTEST(fc, Feptr);
2754
0
            chartype = UCD_CHARTYPE(fc);
2755
0
            if ((chartype == ucp_Lu ||
2756
0
                 chartype == ucp_Ll ||
2757
0
                 chartype == ucp_Lt) == notmatch)
2758
0
              RRETURN(MATCH_NOMATCH);
2759
0
            }
2760
0
          break;
2761
2762
588
          case PT_GC:
2763
1.05k
          for (i = 1; i <= Lmin; i++)
2764
588
            {
2765
588
            if (Feptr >= mb->end_subject)
2766
0
              {
2767
0
              SCHECK_PARTIAL();
2768
0
              RRETURN(MATCH_NOMATCH);
2769
0
              }
2770
588
            GETCHARINCTEST(fc, Feptr);
2771
588
            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch)
2772
464
              RRETURN(MATCH_NOMATCH);
2773
464
            }
2774
464
          break;
2775
2776
89.4k
          case PT_PC:
2777
92.2k
          for (i = 1; i <= Lmin; i++)
2778
89.4k
            {
2779
89.4k
            if (Feptr >= mb->end_subject)
2780
0
              {
2781
0
              SCHECK_PARTIAL();
2782
0
              RRETURN(MATCH_NOMATCH);
2783
0
              }
2784
89.4k
            GETCHARINCTEST(fc, Feptr);
2785
89.4k
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch)
2786
86.5k
              RRETURN(MATCH_NOMATCH);
2787
2.85k
            }
2788
2.85k
          break;
2789
2790
2.85k
          case PT_SC:
2791
0
          for (i = 1; i <= Lmin; i++)
2792
0
            {
2793
0
            if (Feptr >= mb->end_subject)
2794
0
              {
2795
0
              SCHECK_PARTIAL();
2796
0
              RRETURN(MATCH_NOMATCH);
2797
0
              }
2798
0
            GETCHARINCTEST(fc, Feptr);
2799
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch)
2800
0
              RRETURN(MATCH_NOMATCH);
2801
0
            }
2802
0
          break;
2803
2804
0
          case PT_SCX:
2805
0
          for (i = 1; i <= Lmin; i++)
2806
0
            {
2807
0
            BOOL ok;
2808
0
            const ucd_record *prop;
2809
0
            if (Feptr >= mb->end_subject)
2810
0
              {
2811
0
              SCHECK_PARTIAL();
2812
0
              RRETURN(MATCH_NOMATCH);
2813
0
              }
2814
0
            GETCHARINCTEST(fc, Feptr);
2815
0
            prop = GET_UCD(fc);
2816
0
            ok = (prop->script == Lpropvalue ||
2817
0
                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
2818
0
            if (ok == notmatch)
2819
0
              RRETURN(MATCH_NOMATCH);
2820
0
            }
2821
0
          break;
2822
2823
0
          case PT_ALNUM:
2824
0
          for (i = 1; i <= Lmin; i++)
2825
0
            {
2826
0
            int category;
2827
0
            if (Feptr >= mb->end_subject)
2828
0
              {
2829
0
              SCHECK_PARTIAL();
2830
0
              RRETURN(MATCH_NOMATCH);
2831
0
              }
2832
0
            GETCHARINCTEST(fc, Feptr);
2833
0
            category = UCD_CATEGORY(fc);
2834
0
            if ((category == ucp_L || category == ucp_N) == notmatch)
2835
0
              RRETURN(MATCH_NOMATCH);
2836
0
            }
2837
0
          break;
2838
2839
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2840
          which means that Perl space and POSIX space are now identical. PCRE
2841
          was changed at release 8.34. */
2842
2843
1.85M
          case PT_SPACE:    /* Perl space */
2844
1.85M
          case PT_PXSPACE:  /* POSIX space */
2845
2.87M
          for (i = 1; i <= Lmin; i++)
2846
1.85M
            {
2847
1.85M
            if (Feptr >= mb->end_subject)
2848
15.6k
              {
2849
15.6k
              SCHECK_PARTIAL();
2850
15.6k
              RRETURN(MATCH_NOMATCH);
2851
0
              }
2852
1.84M
            GETCHARINCTEST(fc, Feptr);
2853
1.84M
            switch(fc)
2854
1.84M
              {
2855
6.20M
              HSPACE_CASES:
2856
6.20M
              VSPACE_CASES:
2857
2.85M
              if (notmatch) RRETURN(MATCH_NOMATCH);
2858
376k
              break;
2859
2860
1.43M
              default:
2861
1.43M
              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
2862
789k
                RRETURN(MATCH_NOMATCH);
2863
644k
              break;
2864
1.84M
              }
2865
1.84M
            }
2866
1.02M
          break;
2867
2868
1.02M
          case PT_WORD:
2869
62.0k
          for (i = 1; i <= Lmin; i++)
2870
39.3k
            {
2871
39.3k
            int chartype, category;
2872
39.3k
            if (Feptr >= mb->end_subject)
2873
827
              {
2874
827
              SCHECK_PARTIAL();
2875
827
              RRETURN(MATCH_NOMATCH);
2876
0
              }
2877
38.5k
            GETCHARINCTEST(fc, Feptr);
2878
38.5k
            chartype = UCD_CHARTYPE(fc);
2879
38.5k
            category = PRIV(ucp_gentype)[chartype];
2880
38.5k
            if ((category == ucp_L || category == ucp_N ||
2881
17.0k
                 chartype == ucp_Mn || chartype == ucp_Pc) == notmatch)
2882
22.6k
              RRETURN(MATCH_NOMATCH);
2883
22.6k
            }
2884
22.6k
          break;
2885
2886
22.6k
          case PT_CLIST:
2887
57
          for (i = 1; i <= Lmin; i++)
2888
29
            {
2889
29
            const uint32_t *cp;
2890
29
            if (Feptr >= mb->end_subject)
2891
0
              {
2892
0
              SCHECK_PARTIAL();
2893
0
              RRETURN(MATCH_NOMATCH);
2894
0
              }
2895
29
            GETCHARINCTEST(fc, Feptr);
2896
#if PCRE2_CODE_UNIT_WIDTH == 32
2897
            if (fc > MAX_UTF_CODE_POINT)
2898
              {
2899
              if (notmatch) continue;
2900
              RRETURN(MATCH_NOMATCH);
2901
              }
2902
#endif
2903
29
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
2904
29
            for (;;)
2905
32
              {
2906
32
              if (fc < *cp)
2907
29
                {
2908
29
                if (notmatch) break;
2909
29
                RRETURN(MATCH_NOMATCH);
2910
0
                }
2911
3
              if (fc == *cp++)
2912
0
                {
2913
0
                if (notmatch) RRETURN(MATCH_NOMATCH);
2914
0
                break;
2915
0
                }
2916
3
              }
2917
29
            }
2918
28
          break;
2919
2920
28
          case PT_UCNC:
2921
0
          for (i = 1; i <= Lmin; i++)
2922
0
            {
2923
0
            if (Feptr >= mb->end_subject)
2924
0
              {
2925
0
              SCHECK_PARTIAL();
2926
0
              RRETURN(MATCH_NOMATCH);
2927
0
              }
2928
0
            GETCHARINCTEST(fc, Feptr);
2929
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2930
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2931
0
                 fc >= 0xe000) == notmatch)
2932
0
              RRETURN(MATCH_NOMATCH);
2933
0
            }
2934
0
          break;
2935
2936
0
          case PT_BIDICL:
2937
0
          for (i = 1; i <= Lmin; i++)
2938
0
            {
2939
0
            if (Feptr >= mb->end_subject)
2940
0
              {
2941
0
              SCHECK_PARTIAL();
2942
0
              RRETURN(MATCH_NOMATCH);
2943
0
              }
2944
0
            GETCHARINCTEST(fc, Feptr);
2945
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch)
2946
0
              RRETURN(MATCH_NOMATCH);
2947
0
            }
2948
0
          break;
2949
2950
0
          case PT_BOOL:
2951
0
          for (i = 1; i <= Lmin; i++)
2952
0
            {
2953
0
            BOOL ok;
2954
0
            const ucd_record *prop;
2955
0
            if (Feptr >= mb->end_subject)
2956
0
              {
2957
0
              SCHECK_PARTIAL();
2958
0
              RRETURN(MATCH_NOMATCH);
2959
0
              }
2960
0
            GETCHARINCTEST(fc, Feptr);
2961
0
            prop = GET_UCD(fc);
2962
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2963
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
2964
0
            if (ok == notmatch)
2965
0
              RRETURN(MATCH_NOMATCH);
2966
0
            }
2967
0
          break;
2968
2969
          /* This should not occur */
2970
2971
0
          default:
2972
0
          return PCRE2_ERROR_INTERNAL;
2973
1.98M
          }
2974
1.98M
        }
2975
2976
      /* Match extended Unicode sequences. We will get here only if the
2977
      support is in the binary; otherwise a compile-time error occurs. */
2978
2979
11.4M
      else if (Lctype == OP_EXTUNI)
2980
81.5k
        {
2981
162k
        for (i = 1; i <= Lmin; i++)
2982
81.5k
          {
2983
81.5k
          if (Feptr >= mb->end_subject)
2984
434
            {
2985
434
            SCHECK_PARTIAL();
2986
434
            RRETURN(MATCH_NOMATCH);
2987
0
            }
2988
81.1k
          else
2989
81.1k
            {
2990
81.1k
            GETCHARINCTEST(fc, Feptr);
2991
81.1k
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
2992
81.1k
              mb->end_subject, utf, NULL);
2993
81.1k
            }
2994
81.1k
          CHECK_PARTIAL();
2995
81.1k
          }
2996
81.5k
        }
2997
11.3M
      else
2998
11.3M
#endif     /* SUPPORT_UNICODE */
2999
3000
/* Handle all other cases in UTF mode */
3001
3002
11.3M
#ifdef SUPPORT_UNICODE
3003
11.3M
      if (utf) switch(Lctype)
3004
3.43M
        {
3005
1.86k
        case OP_ANY:
3006
3.69k
        for (i = 1; i <= Lmin; i++)
3007
1.86k
          {
3008
1.86k
          if (Feptr >= mb->end_subject)
3009
7
            {
3010
7
            SCHECK_PARTIAL();
3011
7
            RRETURN(MATCH_NOMATCH);
3012
0
            }
3013
1.85k
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3014
1.82k
          if (mb->partial != 0 &&
3015
0
              Feptr + 1 >= mb->end_subject &&
3016
0
              NLBLOCK->nltype == NLTYPE_FIXED &&
3017
0
              NLBLOCK->nllen == 2 &&
3018
0
              UCHAR21(Feptr) == NLBLOCK->nl[0])
3019
0
            {
3020
0
            mb->hitend = TRUE;
3021
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3022
0
            }
3023
1.82k
          Feptr++;
3024
1.82k
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3025
1.82k
          }
3026
1.82k
        break;
3027
3028
2.39M
        case OP_ALLANY:
3029
4.76M
        for (i = 1; i <= Lmin; i++)
3030
2.39M
          {
3031
2.39M
          if (Feptr >= mb->end_subject)
3032
29.9k
            {
3033
29.9k
            SCHECK_PARTIAL();
3034
29.9k
            RRETURN(MATCH_NOMATCH);
3035
0
            }
3036
2.36M
          Feptr++;
3037
2.36M
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3038
2.36M
          }
3039
2.36M
        break;
3040
3041
2.36M
        case OP_ANYBYTE:
3042
14.3k
        if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
3043
14.3k
        Feptr += Lmin;
3044
14.3k
        break;
3045
3046
1.00M
        case OP_ANYNL:
3047
1.04M
        for (i = 1; i <= Lmin; i++)
3048
1.00M
          {
3049
1.00M
          if (Feptr >= mb->end_subject)
3050
29.8k
            {
3051
29.8k
            SCHECK_PARTIAL();
3052
29.8k
            RRETURN(MATCH_NOMATCH);
3053
0
            }
3054
972k
          GETCHARINC(fc, Feptr);
3055
972k
          switch(fc)
3056
972k
            {
3057
929k
            default: RRETURN(MATCH_NOMATCH);
3058
3059
16
            case CHAR_CR:
3060
16
            if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3061
16
            break;
3062
3063
20.7k
            case CHAR_LF:
3064
20.7k
            break;
3065
3066
22.4k
            case CHAR_VT:
3067
22.4k
            case CHAR_FF:
3068
22.4k
            case CHAR_NEL:
3069
22.4k
#ifndef EBCDIC
3070
22.4k
            case 0x2028:
3071
22.4k
            case 0x2029:
3072
22.4k
#endif  /* Not EBCDIC */
3073
22.4k
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3074
22.4k
            break;
3075
972k
            }
3076
972k
          }
3077
43.1k
        break;
3078
3079
43.1k
        case OP_NOT_HSPACE:
3080
18.4k
        for (i = 1; i <= Lmin; i++)
3081
9.92k
          {
3082
9.92k
          if (Feptr >= mb->end_subject)
3083
2
            {
3084
2
            SCHECK_PARTIAL();
3085
2
            RRETURN(MATCH_NOMATCH);
3086
0
            }
3087
9.91k
          GETCHARINC(fc, Feptr);
3088
9.91k
          switch(fc)
3089
9.91k
            {
3090
1.37k
            HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3091
8.54k
            default: break;
3092
9.91k
            }
3093
9.91k
          }
3094
8.54k
        break;
3095
3096
8.54k
        case OP_HSPACE:
3097
8.82k
        for (i = 1; i <= Lmin; i++)
3098
7.93k
          {
3099
7.93k
          if (Feptr >= mb->end_subject)
3100
268
            {
3101
268
            SCHECK_PARTIAL();
3102
268
            RRETURN(MATCH_NOMATCH);
3103
0
            }
3104
7.66k
          GETCHARINC(fc, Feptr);
3105
7.66k
          switch(fc)
3106
7.66k
            {
3107
891
            HSPACE_CASES: break;
3108
6.77k
            default: RRETURN(MATCH_NOMATCH);
3109
7.66k
            }
3110
7.66k
          }
3111
891
        break;
3112
3113
3.81k
        case OP_NOT_VSPACE:
3114
7.16k
        for (i = 1; i <= Lmin; i++)
3115
3.81k
          {
3116
3.81k
          if (Feptr >= mb->end_subject)
3117
45
            {
3118
45
            SCHECK_PARTIAL();
3119
45
            RRETURN(MATCH_NOMATCH);
3120
0
            }
3121
3.76k
          GETCHARINC(fc, Feptr);
3122
3.76k
          switch(fc)
3123
3.76k
            {
3124
419
            VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3125
3.34k
            default: break;
3126
3.76k
            }
3127
3.76k
          }
3128
3.34k
        break;
3129
3130
3.34k
        case OP_VSPACE:
3131
2.28k
        for (i = 1; i <= Lmin; i++)
3132
2.10k
          {
3133
2.10k
          if (Feptr >= mb->end_subject)
3134
0
            {
3135
0
            SCHECK_PARTIAL();
3136
0
            RRETURN(MATCH_NOMATCH);
3137
0
            }
3138
2.10k
          GETCHARINC(fc, Feptr);
3139
2.10k
          switch(fc)
3140
2.10k
            {
3141
181
            VSPACE_CASES: break;
3142
1.92k
            default: RRETURN(MATCH_NOMATCH);
3143
2.10k
            }
3144
2.10k
          }
3145
181
        break;
3146
3147
181
        case OP_NOT_DIGIT:
3148
0
        for (i = 1; i <= Lmin; i++)
3149
0
          {
3150
0
          if (Feptr >= mb->end_subject)
3151
0
            {
3152
0
            SCHECK_PARTIAL();
3153
0
            RRETURN(MATCH_NOMATCH);
3154
0
            }
3155
0
          GETCHARINC(fc, Feptr);
3156
0
          if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
3157
0
            RRETURN(MATCH_NOMATCH);
3158
0
          }
3159
0
        break;
3160
3161
0
        case OP_DIGIT:
3162
0
        for (i = 1; i <= Lmin; i++)
3163
0
          {
3164
0
          uint32_t cc;
3165
0
          if (Feptr >= mb->end_subject)
3166
0
            {
3167
0
            SCHECK_PARTIAL();
3168
0
            RRETURN(MATCH_NOMATCH);
3169
0
            }
3170
0
          cc = UCHAR21(Feptr);
3171
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
3172
0
            RRETURN(MATCH_NOMATCH);
3173
0
          Feptr++;
3174
          /* No need to skip more code units - we know it has only one. */
3175
0
          }
3176
0
        break;
3177
3178
0
        case OP_NOT_WHITESPACE:
3179
0
        for (i = 1; i <= Lmin; i++)
3180
0
          {
3181
0
          uint32_t cc;
3182
0
          if (Feptr >= mb->end_subject)
3183
0
            {
3184
0
            SCHECK_PARTIAL();
3185
0
            RRETURN(MATCH_NOMATCH);
3186
0
            }
3187
0
          cc = UCHAR21(Feptr);
3188
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
3189
0
            RRETURN(MATCH_NOMATCH);
3190
0
          Feptr++;
3191
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3192
0
          }
3193
0
        break;
3194
3195
0
        case OP_WHITESPACE:
3196
0
        for (i = 1; i <= Lmin; i++)
3197
0
          {
3198
0
          uint32_t cc;
3199
0
          if (Feptr >= mb->end_subject)
3200
0
            {
3201
0
            SCHECK_PARTIAL();
3202
0
            RRETURN(MATCH_NOMATCH);
3203
0
            }
3204
0
          cc = UCHAR21(Feptr);
3205
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
3206
0
            RRETURN(MATCH_NOMATCH);
3207
0
          Feptr++;
3208
          /* No need to skip more code units - we know it has only one. */
3209
0
          }
3210
0
        break;
3211
3212
0
        case OP_NOT_WORDCHAR:
3213
0
        for (i = 1; i <= Lmin; i++)
3214
0
          {
3215
0
          uint32_t cc;
3216
0
          if (Feptr >= mb->end_subject)
3217
0
            {
3218
0
            SCHECK_PARTIAL();
3219
0
            RRETURN(MATCH_NOMATCH);
3220
0
            }
3221
0
          cc = UCHAR21(Feptr);
3222
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
3223
0
            RRETURN(MATCH_NOMATCH);
3224
0
          Feptr++;
3225
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3226
0
          }
3227
0
        break;
3228
3229
0
        case OP_WORDCHAR:
3230
0
        for (i = 1; i <= Lmin; i++)
3231
0
          {
3232
0
          uint32_t cc;
3233
0
          if (Feptr >= mb->end_subject)
3234
0
            {
3235
0
            SCHECK_PARTIAL();
3236
0
            RRETURN(MATCH_NOMATCH);
3237
0
            }
3238
0
          cc = UCHAR21(Feptr);
3239
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
3240
0
            RRETURN(MATCH_NOMATCH);
3241
0
          Feptr++;
3242
          /* No need to skip more code units - we know it has only one. */
3243
0
          }
3244
0
        break;
3245
3246
0
        default:
3247
0
        return PCRE2_ERROR_INTERNAL;
3248
3.43M
        }  /* End switch(Lctype) */
3249
3250
7.92M
      else
3251
7.92M
#endif     /* SUPPORT_UNICODE */
3252
3253
      /* Code for the non-UTF case for minimum matching of operators other
3254
      than OP_PROP and OP_NOTPROP. */
3255
3256
7.92M
      switch(Lctype)
3257
7.92M
        {
3258
457k
        case OP_ANY:
3259
904k
        for (i = 1; i <= Lmin; i++)
3260
457k
          {
3261
457k
          if (Feptr >= mb->end_subject)
3262
8.02k
            {
3263
8.02k
            SCHECK_PARTIAL();
3264
8.02k
            RRETURN(MATCH_NOMATCH);
3265
0
            }
3266
449k
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3267
446k
          if (mb->partial != 0 &&
3268
0
              Feptr + 1 >= mb->end_subject &&
3269
0
              NLBLOCK->nltype == NLTYPE_FIXED &&
3270
0
              NLBLOCK->nllen == 2 &&
3271
0
              *Feptr == NLBLOCK->nl[0])
3272
0
            {
3273
0
            mb->hitend = TRUE;
3274
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3275
0
            }
3276
446k
          Feptr++;
3277
446k
          }
3278
446k
        break;
3279
3280
446k
        case OP_ALLANY:
3281
45.8k
        if (Feptr > mb->end_subject - Lmin)
3282
207
          {
3283
207
          SCHECK_PARTIAL();
3284
207
          RRETURN(MATCH_NOMATCH);
3285
0
          }
3286
45.5k
        Feptr += Lmin;
3287
45.5k
        break;
3288
3289
        /* This OP_ANYBYTE case will never be reached because \C gets turned
3290
        into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
3291
        reports don't complain about it's never being used. */
3292
3293
/*        case OP_ANYBYTE:
3294
*        if (Feptr > mb->end_subject - Lmin)
3295
*          {
3296
*          SCHECK_PARTIAL();
3297
*          RRETURN(MATCH_NOMATCH);
3298
*          }
3299
*        Feptr += Lmin;
3300
*        break;
3301
*/
3302
4.03M
        case OP_ANYNL:
3303
4.08M
        for (i = 1; i <= Lmin; i++)
3304
4.03M
          {
3305
4.03M
          if (Feptr >= mb->end_subject)
3306
6.91k
            {
3307
6.91k
            SCHECK_PARTIAL();
3308
6.91k
            RRETURN(MATCH_NOMATCH);
3309
0
            }
3310
4.02M
          switch(*Feptr++)
3311
4.02M
            {
3312
3.97M
            default: RRETURN(MATCH_NOMATCH);
3313
3314
14.7k
            case CHAR_CR:
3315
14.7k
            if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3316
14.7k
            break;
3317
3318
27.4k
            case CHAR_LF:
3319
27.4k
            break;
3320
3321
3.14k
            case CHAR_VT:
3322
11.2k
            case CHAR_FF:
3323
11.8k
            case CHAR_NEL:
3324
#if PCRE2_CODE_UNIT_WIDTH != 8
3325
            case 0x2028:
3326
            case 0x2029:
3327
#endif
3328
11.8k
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3329
11.8k
            break;
3330
4.02M
            }
3331
4.02M
          }
3332
53.9k
        break;
3333
3334
53.9k
        case OP_NOT_HSPACE:
3335
34.4k
        for (i = 1; i <= Lmin; i++)
3336
17.5k
          {
3337
17.5k
          if (Feptr >= mb->end_subject)
3338
112
            {
3339
112
            SCHECK_PARTIAL();
3340
112
            RRETURN(MATCH_NOMATCH);
3341
0
            }
3342
17.3k
          switch(*Feptr++)
3343
17.3k
            {
3344
16.9k
            default: break;
3345
16.9k
            HSPACE_BYTE_CASES:
3346
#if PCRE2_CODE_UNIT_WIDTH != 8
3347
            HSPACE_MULTIBYTE_CASES:
3348
#endif
3349
467
            RRETURN(MATCH_NOMATCH);
3350
17.3k
            }
3351
17.3k
          }
3352
16.9k
        break;
3353
3354
16.9k
        case OP_HSPACE:
3355
7.46k
        for (i = 1; i <= Lmin; i++)
3356
4.65k
          {
3357
4.65k
          if (Feptr >= mb->end_subject)
3358
16
            {
3359
16
            SCHECK_PARTIAL();
3360
16
            RRETURN(MATCH_NOMATCH);
3361
0
            }
3362
4.64k
          switch(*Feptr++)
3363
4.64k
            {
3364
1.83k
            default: RRETURN(MATCH_NOMATCH);
3365
8.10k
            HSPACE_BYTE_CASES:
3366
#if PCRE2_CODE_UNIT_WIDTH != 8
3367
            HSPACE_MULTIBYTE_CASES:
3368
#endif
3369
8.10k
            break;
3370
4.64k
            }
3371
4.64k
          }
3372
2.81k
        break;
3373
3374
2.01M
        case OP_NOT_VSPACE:
3375
3.99M
        for (i = 1; i <= Lmin; i++)
3376
2.01M
          {
3377
2.01M
          if (Feptr >= mb->end_subject)
3378
4.94k
            {
3379
4.94k
            SCHECK_PARTIAL();
3380
4.94k
            RRETURN(MATCH_NOMATCH);
3381
0
            }
3382
2.01M
          switch(*Feptr++)
3383
2.01M
            {
3384
33.6k
            VSPACE_BYTE_CASES:
3385
#if PCRE2_CODE_UNIT_WIDTH != 8
3386
            VSPACE_MULTIBYTE_CASES:
3387
#endif
3388
33.6k
            RRETURN(MATCH_NOMATCH);
3389
1.97M
            default: break;
3390
2.01M
            }
3391
2.01M
          }
3392
1.97M
        break;
3393
3394
1.97M
        case OP_VSPACE:
3395
5.39k
        for (i = 1; i <= Lmin; i++)
3396
5.18k
          {
3397
5.18k
          if (Feptr >= mb->end_subject)
3398
98
            {
3399
98
            SCHECK_PARTIAL();
3400
98
            RRETURN(MATCH_NOMATCH);
3401
0
            }
3402
5.08k
          switch(*Feptr++)
3403
5.08k
            {
3404
4.87k
            default: RRETURN(MATCH_NOMATCH);
3405
731
            VSPACE_BYTE_CASES:
3406
#if PCRE2_CODE_UNIT_WIDTH != 8
3407
            VSPACE_MULTIBYTE_CASES:
3408
#endif
3409
731
            break;
3410
5.08k
            }
3411
5.08k
          }
3412
215
        break;
3413
3414
48.4k
        case OP_NOT_DIGIT:
3415
78.7k
        for (i = 1; i <= Lmin; i++)
3416
48.4k
          {
3417
48.4k
          if (Feptr >= mb->end_subject)
3418
1.17k
            {
3419
1.17k
            SCHECK_PARTIAL();
3420
1.17k
            RRETURN(MATCH_NOMATCH);
3421
0
            }
3422
47.2k
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
3423
30.2k
            RRETURN(MATCH_NOMATCH);
3424
30.2k
          Feptr++;
3425
30.2k
          }
3426
30.2k
        break;
3427
3428
962k
        case OP_DIGIT:
3429
971k
        for (i = 1; i <= Lmin; i++)
3430
962k
          {
3431
962k
          if (Feptr >= mb->end_subject)
3432
22.5k
            {
3433
22.5k
            SCHECK_PARTIAL();
3434
22.5k
            RRETURN(MATCH_NOMATCH);
3435
0
            }
3436
939k
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
3437
931k
            RRETURN(MATCH_NOMATCH);
3438
8.67k
          Feptr++;
3439
8.67k
          }
3440
8.67k
        break;
3441
3442
211k
        case OP_NOT_WHITESPACE:
3443
416k
        for (i = 1; i <= Lmin; i++)
3444
211k
          {
3445
211k
          if (Feptr >= mb->end_subject)
3446
1.43k
            {
3447
1.43k
            SCHECK_PARTIAL();
3448
1.43k
            RRETURN(MATCH_NOMATCH);
3449
0
            }
3450
210k
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
3451
204k
            RRETURN(MATCH_NOMATCH);
3452
204k
          Feptr++;
3453
204k
          }
3454
204k
        break;
3455
3456
204k
        case OP_WHITESPACE:
3457
459
        for (i = 1; i <= Lmin; i++)
3458
325
          {
3459
325
          if (Feptr >= mb->end_subject)
3460
2
            {
3461
2
            SCHECK_PARTIAL();
3462
2
            RRETURN(MATCH_NOMATCH);
3463
0
            }
3464
323
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
3465
189
            RRETURN(MATCH_NOMATCH);
3466
134
          Feptr++;
3467
134
          }
3468
134
        break;
3469
3470
112k
        case OP_NOT_WORDCHAR:
3471
212k
        for (i = 1; i <= Lmin; i++)
3472
112k
          {
3473
112k
          if (Feptr >= mb->end_subject)
3474
234
            {
3475
234
            SCHECK_PARTIAL();
3476
234
            RRETURN(MATCH_NOMATCH);
3477
0
            }
3478
112k
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
3479
99.5k
            RRETURN(MATCH_NOMATCH);
3480
99.5k
          Feptr++;
3481
99.5k
          }
3482
99.5k
        break;
3483
3484
99.5k
        case OP_WORDCHAR:
3485
16.0k
        for (i = 1; i <= Lmin; i++)
3486
10.1k
          {
3487
10.1k
          if (Feptr >= mb->end_subject)
3488
6
            {
3489
6
            SCHECK_PARTIAL();
3490
6
            RRETURN(MATCH_NOMATCH);
3491
0
            }
3492
10.1k
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
3493
5.84k
            RRETURN(MATCH_NOMATCH);
3494
5.84k
          Feptr++;
3495
5.84k
          }
3496
5.84k
        break;
3497
3498
5.84k
        default:
3499
0
        return PCRE2_ERROR_INTERNAL;
3500
7.92M
        }
3501
13.4M
      }
3502
3503
    /* If Lmin = Lmax we are done. Continue with the main loop. */
3504
3505
134M
    if (Lmin == Lmax) continue;
3506
3507
    /* If minimizing, we have to test the rest of the pattern before each
3508
    subsequent match. This means we cannot use a local "notmatch" variable as
3509
    in the other cases. As all 4 temporary 32-bit values in the frame are
3510
    already in use, just test the type each time. */
3511
3512
134M
    if (reptype == REPTYPE_MIN)
3513
5.86M
      {
3514
5.86M
#ifdef SUPPORT_UNICODE
3515
5.86M
      if (proptype >= 0)
3516
707k
        {
3517
707k
        switch(proptype)
3518
707k
          {
3519
0
          case PT_ANY:
3520
0
          for (;;)
3521
0
            {
3522
0
            RMATCH(Fecode, RM208);
3523
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3524
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3525
0
            if (Feptr >= mb->end_subject)
3526
0
              {
3527
0
              SCHECK_PARTIAL();
3528
0
              RRETURN(MATCH_NOMATCH);
3529
0
              }
3530
0
            GETCHARINCTEST(fc, Feptr);
3531
0
            if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3532
0
            }
3533
          /* Control never gets here */
3534
3535
0
          case PT_LAMP:
3536
0
          for (;;)
3537
0
            {
3538
0
            int chartype;
3539
0
            RMATCH(Fecode, RM209);
3540
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3541
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3542
0
            if (Feptr >= mb->end_subject)
3543
0
              {
3544
0
              SCHECK_PARTIAL();
3545
0
              RRETURN(MATCH_NOMATCH);
3546
0
              }
3547
0
            GETCHARINCTEST(fc, Feptr);
3548
0
            chartype = UCD_CHARTYPE(fc);
3549
0
            if ((chartype == ucp_Lu ||
3550
0
                 chartype == ucp_Ll ||
3551
0
                 chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3552
0
              RRETURN(MATCH_NOMATCH);
3553
0
            }
3554
          /* Control never gets here */
3555
3556
975
          case PT_GC:
3557
975
          for (;;)
3558
59.4k
            {
3559
59.4k
            RMATCH(Fecode, RM210);
3560
59.4k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3561
59.4k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3562
59.3k
            if (Feptr >= mb->end_subject)
3563
6
              {
3564
6
              SCHECK_PARTIAL();
3565
6
              RRETURN(MATCH_NOMATCH);
3566
0
              }
3567
59.3k
            GETCHARINCTEST(fc, Feptr);
3568
59.3k
            if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3569
58.4k
              RRETURN(MATCH_NOMATCH);
3570
58.4k
            }
3571
          /* Control never gets here */
3572
3573
36
          case PT_PC:
3574
36
          for (;;)
3575
158
            {
3576
158
            RMATCH(Fecode, RM211);
3577
158
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3578
158
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3579
158
            if (Feptr >= mb->end_subject)
3580
18
              {
3581
18
              SCHECK_PARTIAL();
3582
18
              RRETURN(MATCH_NOMATCH);
3583
0
              }
3584
140
            GETCHARINCTEST(fc, Feptr);
3585
140
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3586
122
              RRETURN(MATCH_NOMATCH);
3587
122
            }
3588
          /* Control never gets here */
3589
3590
0
          case PT_SC:
3591
0
          for (;;)
3592
0
            {
3593
0
            RMATCH(Fecode, RM212);
3594
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3595
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3596
0
            if (Feptr >= mb->end_subject)
3597
0
              {
3598
0
              SCHECK_PARTIAL();
3599
0
              RRETURN(MATCH_NOMATCH);
3600
0
              }
3601
0
            GETCHARINCTEST(fc, Feptr);
3602
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3603
0
              RRETURN(MATCH_NOMATCH);
3604
0
            }
3605
          /* Control never gets here */
3606
3607
0
          case PT_SCX:
3608
0
          for (;;)
3609
0
            {
3610
0
            BOOL ok;
3611
0
            const ucd_record *prop;
3612
0
            RMATCH(Fecode, RM225);
3613
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3614
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3615
0
            if (Feptr >= mb->end_subject)
3616
0
              {
3617
0
              SCHECK_PARTIAL();
3618
0
              RRETURN(MATCH_NOMATCH);
3619
0
              }
3620
0
            GETCHARINCTEST(fc, Feptr);
3621
0
            prop = GET_UCD(fc);
3622
0
            ok = (prop->script == Lpropvalue
3623
0
                  || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
3624
0
            if (ok == (Lctype == OP_NOTPROP))
3625
0
              RRETURN(MATCH_NOMATCH);
3626
0
            }
3627
          /* Control never gets here */
3628
3629
0
          case PT_ALNUM:
3630
0
          for (;;)
3631
0
            {
3632
0
            int category;
3633
0
            RMATCH(Fecode, RM213);
3634
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3635
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3636
0
            if (Feptr >= mb->end_subject)
3637
0
              {
3638
0
              SCHECK_PARTIAL();
3639
0
              RRETURN(MATCH_NOMATCH);
3640
0
              }
3641
0
            GETCHARINCTEST(fc, Feptr);
3642
0
            category = UCD_CATEGORY(fc);
3643
0
            if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
3644
0
              RRETURN(MATCH_NOMATCH);
3645
0
            }
3646
          /* Control never gets here */
3647
3648
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3649
          which means that Perl space and POSIX space are now identical. PCRE
3650
          was changed at release 8.34. */
3651
3652
673k
          case PT_SPACE:    /* Perl space */
3653
673k
          case PT_PXSPACE:  /* POSIX space */
3654
673k
          for (;;)
3655
1.32M
            {
3656
1.32M
            RMATCH(Fecode, RM214);
3657
1.32M
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3658
1.32M
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3659
688k
            if (Feptr >= mb->end_subject)
3660
12.8k
              {
3661
12.8k
              SCHECK_PARTIAL();
3662
12.8k
              RRETURN(MATCH_NOMATCH);
3663
0
              }
3664
675k
            GETCHARINCTEST(fc, Feptr);
3665
675k
            switch(fc)
3666
675k
              {
3667
166k
              HSPACE_CASES:
3668
166k
              VSPACE_CASES:
3669
137k
              if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3670
1
              break;
3671
3672
654k
              default:
3673
654k
              if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3674
654k
                RRETURN(MATCH_NOMATCH);
3675
654k
              break;
3676
675k
              }
3677
675k
            }
3678
          /* Control never gets here */
3679
3680
27.3k
          case PT_WORD:
3681
27.3k
          for (;;)
3682
51.0k
            {
3683
51.0k
            int chartype, category;
3684
51.0k
            RMATCH(Fecode, RM215);
3685
51.0k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3686
51.0k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3687
28.8k
            if (Feptr >= mb->end_subject)
3688
270
              {
3689
270
              SCHECK_PARTIAL();
3690
270
              RRETURN(MATCH_NOMATCH);
3691
0
              }
3692
28.6k
            GETCHARINCTEST(fc, Feptr);
3693
28.6k
            chartype = UCD_CHARTYPE(fc);
3694
28.6k
            category = PRIV(ucp_gentype)[chartype];
3695
28.6k
            if ((category == ucp_L ||
3696
25.1k
                 category == ucp_N ||
3697
23.4k
                 chartype == ucp_Mn ||
3698
28.6k
                 chartype == ucp_Pc) == (Lctype == OP_NOTPROP))
3699
23.7k
              RRETURN(MATCH_NOMATCH);
3700
23.7k
            }
3701
          /* Control never gets here */
3702
3703
5.51k
          case PT_CLIST:
3704
5.51k
          for (;;)
3705
10.3k
            {
3706
10.3k
            const uint32_t *cp;
3707
10.3k
            RMATCH(Fecode, RM216);
3708
10.3k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3709
10.3k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3710
5.51k
            if (Feptr >= mb->end_subject)
3711
58
              {
3712
58
              SCHECK_PARTIAL();
3713
58
              RRETURN(MATCH_NOMATCH);
3714
0
              }
3715
5.45k
            GETCHARINCTEST(fc, Feptr);
3716
#if PCRE2_CODE_UNIT_WIDTH == 32
3717
            if (fc > MAX_UTF_CODE_POINT)
3718
              {
3719
              if (Lctype == OP_NOTPROP) continue;
3720
              RRETURN(MATCH_NOMATCH);
3721
              }
3722
#endif
3723
5.45k
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3724
5.45k
            for (;;)
3725
9.55k
              {
3726
9.55k
              if (fc < *cp)
3727
5.45k
                {
3728
5.45k
                if (Lctype == OP_NOTPROP) break;
3729
5.45k
                RRETURN(MATCH_NOMATCH);
3730
0
                }
3731
4.10k
              if (fc == *cp++)
3732
0
                {
3733
0
                if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3734
0
                break;
3735
0
                }
3736
4.10k
              }
3737
5.45k
            }
3738
          /* Control never gets here */
3739
3740
0
          case PT_UCNC:
3741
0
          for (;;)
3742
0
            {
3743
0
            RMATCH(Fecode, RM217);
3744
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3745
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3746
0
            if (Feptr >= mb->end_subject)
3747
0
              {
3748
0
              SCHECK_PARTIAL();
3749
0
              RRETURN(MATCH_NOMATCH);
3750
0
              }
3751
0
            GETCHARINCTEST(fc, Feptr);
3752
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3753
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3754
0
                 fc >= 0xe000) == (Lctype == OP_NOTPROP))
3755
0
              RRETURN(MATCH_NOMATCH);
3756
0
            }
3757
          /* Control never gets here */
3758
3759
0
          case PT_BIDICL:
3760
0
          for (;;)
3761
0
            {
3762
0
            RMATCH(Fecode, RM224);
3763
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3764
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3765
0
            if (Feptr >= mb->end_subject)
3766
0
              {
3767
0
              SCHECK_PARTIAL();
3768
0
              RRETURN(MATCH_NOMATCH);
3769
0
              }
3770
0
            GETCHARINCTEST(fc, Feptr);
3771
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3772
0
              RRETURN(MATCH_NOMATCH);
3773
0
            }
3774
          /* Control never gets here */
3775
3776
0
          case PT_BOOL:
3777
0
          for (;;)
3778
0
            {
3779
0
            BOOL ok;
3780
0
            const ucd_record *prop;
3781
0
            RMATCH(Fecode, RM223);
3782
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3783
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3784
0
            if (Feptr >= mb->end_subject)
3785
0
              {
3786
0
              SCHECK_PARTIAL();
3787
0
              RRETURN(MATCH_NOMATCH);
3788
0
              }
3789
0
            GETCHARINCTEST(fc, Feptr);
3790
0
            prop = GET_UCD(fc);
3791
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
3792
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
3793
0
            if (ok == (Lctype == OP_NOTPROP))
3794
0
              RRETURN(MATCH_NOMATCH);
3795
0
            }
3796
          /* Control never gets here */
3797
3798
          /* This should never occur */
3799
0
          default:
3800
0
          return PCRE2_ERROR_INTERNAL;
3801
707k
          }
3802
707k
        }
3803
3804
      /* Match extended Unicode sequences. We will get here only if the
3805
      support is in the binary; otherwise a compile-time error occurs. */
3806
3807
5.15M
      else if (Lctype == OP_EXTUNI)
3808
19.4k
        {
3809
19.4k
        for (;;)
3810
3.69M
          {
3811
3.69M
          RMATCH(Fecode, RM218);
3812
3.69M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3813
3.69M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3814
3.69M
          if (Feptr >= mb->end_subject)
3815
19.3k
            {
3816
19.3k
            SCHECK_PARTIAL();
3817
19.3k
            RRETURN(MATCH_NOMATCH);
3818
0
            }
3819
3.67M
          else
3820
3.67M
            {
3821
3.67M
            GETCHARINCTEST(fc, Feptr);
3822
3.67M
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
3823
3.67M
              utf, NULL);
3824
3.67M
            }
3825
3.67M
          CHECK_PARTIAL();
3826
3.67M
          }
3827
19.4k
        }
3828
5.13M
      else
3829
5.13M
#endif     /* SUPPORT_UNICODE */
3830
3831
      /* UTF mode for non-property testing character types. */
3832
3833
5.13M
#ifdef SUPPORT_UNICODE
3834
5.13M
      if (utf)
3835
4.60M
        {
3836
4.60M
        for (;;)
3837
13.0M
          {
3838
13.0M
          RMATCH(Fecode, RM219);
3839
13.0M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3840
13.0M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3841
8.65M
          if (Feptr >= mb->end_subject)
3842
149k
            {
3843
149k
            SCHECK_PARTIAL();
3844
149k
            RRETURN(MATCH_NOMATCH);
3845
0
            }
3846
8.50M
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3847
8.50M
          GETCHARINC(fc, Feptr);
3848
8.50M
          switch(Lctype)
3849
8.50M
            {
3850
46.2k
            case OP_ANY:               /* This is the non-NL case */
3851
46.2k
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
3852
0
                Feptr >= mb->end_subject &&
3853
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
3854
0
                NLBLOCK->nllen == 2 &&
3855
0
                fc == NLBLOCK->nl[0])
3856
0
              {
3857
0
              mb->hitend = TRUE;
3858
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3859
0
              }
3860
46.2k
            break;
3861
3862
1.31M
            case OP_ALLANY:
3863
3.91M
            case OP_ANYBYTE:
3864
3.91M
            break;
3865
3866
2.79k
            case OP_ANYNL:
3867
2.79k
            switch(fc)
3868
2.79k
              {
3869
2.62k
              default: RRETURN(MATCH_NOMATCH);
3870
3871
26
              case CHAR_CR:
3872
26
              if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3873
26
              break;
3874
3875
111
              case CHAR_LF:
3876
111
              break;
3877
3878
25
              case CHAR_VT:
3879
29
              case CHAR_FF:
3880
29
              case CHAR_NEL:
3881
29
#ifndef EBCDIC
3882
29
              case 0x2028:
3883
29
              case 0x2029:
3884
29
#endif  /* Not EBCDIC */
3885
29
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3886
29
                RRETURN(MATCH_NOMATCH);
3887
29
              break;
3888
2.79k
              }
3889
166
            break;
3890
3891
99.4k
            case OP_NOT_HSPACE:
3892
99.4k
            switch(fc)
3893
99.4k
              {
3894
5.63k
              HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3895
93.7k
              default: break;
3896
99.4k
              }
3897
93.7k
            break;
3898
3899
93.7k
            case OP_HSPACE:
3900
0
            switch(fc)
3901
0
              {
3902
0
              HSPACE_CASES: break;
3903
0
              default: RRETURN(MATCH_NOMATCH);
3904
0
              }
3905
0
            break;
3906
3907
4.44M
            case OP_NOT_VSPACE:
3908
4.44M
            switch(fc)
3909
4.44M
              {
3910
37.6k
              VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3911
4.40M
              default: break;
3912
4.44M
              }
3913
4.40M
            break;
3914
3915
4.40M
            case OP_VSPACE:
3916
40
            switch(fc)
3917
40
              {
3918
7
              VSPACE_CASES: break;
3919
33
              default: RRETURN(MATCH_NOMATCH);
3920
40
              }
3921
7
            break;
3922
3923
7
            case OP_NOT_DIGIT:
3924
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
3925
0
              RRETURN(MATCH_NOMATCH);
3926
0
            break;
3927
3928
0
            case OP_DIGIT:
3929
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
3930
0
              RRETURN(MATCH_NOMATCH);
3931
0
            break;
3932
3933
0
            case OP_NOT_WHITESPACE:
3934
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
3935
0
              RRETURN(MATCH_NOMATCH);
3936
0
            break;
3937
3938
0
            case OP_WHITESPACE:
3939
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
3940
0
              RRETURN(MATCH_NOMATCH);
3941
0
            break;
3942
3943
0
            case OP_NOT_WORDCHAR:
3944
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
3945
0
              RRETURN(MATCH_NOMATCH);
3946
0
            break;
3947
3948
0
            case OP_WORDCHAR:
3949
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
3950
0
              RRETURN(MATCH_NOMATCH);
3951
0
            break;
3952
3953
0
            default:
3954
0
            return PCRE2_ERROR_INTERNAL;
3955
8.50M
            }
3956
8.50M
          }
3957
4.60M
        }
3958
533k
      else
3959
533k
#endif  /* SUPPORT_UNICODE */
3960
3961
      /* Not UTF mode */
3962
533k
        {
3963
533k
        for (;;)
3964
29.8M
          {
3965
29.8M
          RMATCH(Fecode, RM33);
3966
29.8M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3967
29.8M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3968
29.8M
          if (Feptr >= mb->end_subject)
3969
35.3k
            {
3970
35.3k
            SCHECK_PARTIAL();
3971
35.3k
            RRETURN(MATCH_NOMATCH);
3972
0
            }
3973
29.7M
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
3974
29.7M
            RRETURN(MATCH_NOMATCH);
3975
29.7M
          fc = *Feptr++;
3976
29.7M
          switch(Lctype)
3977
29.7M
            {
3978
217k
            case OP_ANY:               /* This is the non-NL case */
3979
217k
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
3980
0
                Feptr >= mb->end_subject &&
3981
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
3982
0
                NLBLOCK->nllen == 2 &&
3983
0
                fc == NLBLOCK->nl[0])
3984
0
              {
3985
0
              mb->hitend = TRUE;
3986
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3987
0
              }
3988
217k
            break;
3989
3990
1.70M
            case OP_ALLANY:
3991
1.70M
            case OP_ANYBYTE:
3992
1.70M
            break;
3993
3994
884
            case OP_ANYNL:
3995
884
            switch(fc)
3996
884
              {
3997
828
              default: RRETURN(MATCH_NOMATCH);
3998
3999
21
              case CHAR_CR:
4000
21
              if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
4001
21
              break;
4002
4003
7
              case CHAR_LF:
4004
7
              break;
4005
4006
19
              case CHAR_VT:
4007
19
              case CHAR_FF:
4008
28
              case CHAR_NEL:
4009
#if PCRE2_CODE_UNIT_WIDTH != 8
4010
              case 0x2028:
4011
              case 0x2029:
4012
#endif
4013
28
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
4014
28
                RRETURN(MATCH_NOMATCH);
4015
28
              break;
4016
884
              }
4017
56
            break;
4018
4019
45.8k
            case OP_NOT_HSPACE:
4020
45.8k
            switch(fc)
4021
45.8k
              {
4022
44.2k
              default: break;
4023
44.2k
              HSPACE_BYTE_CASES:
4024
#if PCRE2_CODE_UNIT_WIDTH != 8
4025
              HSPACE_MULTIBYTE_CASES:
4026
#endif
4027
1.59k
              RRETURN(MATCH_NOMATCH);
4028
45.8k
              }
4029
44.2k
            break;
4030
4031
44.2k
            case OP_HSPACE:
4032
0
            switch(fc)
4033
0
              {
4034
0
              default: RRETURN(MATCH_NOMATCH);
4035
0
              HSPACE_BYTE_CASES:
4036
#if PCRE2_CODE_UNIT_WIDTH != 8
4037
              HSPACE_MULTIBYTE_CASES:
4038
#endif
4039
0
              break;
4040
0
              }
4041
0
            break;
4042
4043
24.3M
            case OP_NOT_VSPACE:
4044
24.3M
            switch(fc)
4045
24.3M
              {
4046
23.9M
              default: break;
4047
23.9M
              VSPACE_BYTE_CASES:
4048
#if PCRE2_CODE_UNIT_WIDTH != 8
4049
              VSPACE_MULTIBYTE_CASES:
4050
#endif
4051
376k
              RRETURN(MATCH_NOMATCH);
4052
24.3M
              }
4053
23.9M
            break;
4054
4055
23.9M
            case OP_VSPACE:
4056
83
            switch(fc)
4057
83
              {
4058
16
              default: RRETURN(MATCH_NOMATCH);
4059
230
              VSPACE_BYTE_CASES:
4060
#if PCRE2_CODE_UNIT_WIDTH != 8
4061
              VSPACE_MULTIBYTE_CASES:
4062
#endif
4063
230
              break;
4064
83
              }
4065
67
            break;
4066
4067
953k
            case OP_NOT_DIGIT:
4068
953k
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
4069
942k
              RRETURN(MATCH_NOMATCH);
4070
942k
            break;
4071
4072
168
            case OP_DIGIT:
4073
168
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
4074
108
              RRETURN(MATCH_NOMATCH);
4075
108
            break;
4076
4077
426k
            case OP_NOT_WHITESPACE:
4078
426k
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
4079
420k
              RRETURN(MATCH_NOMATCH);
4080
420k
            break;
4081
4082
52
            case OP_WHITESPACE:
4083
52
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
4084
35
              RRETURN(MATCH_NOMATCH);
4085
35
            break;
4086
4087
2.02M
            case OP_NOT_WORDCHAR:
4088
2.02M
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
4089
1.93M
              RRETURN(MATCH_NOMATCH);
4090
1.93M
            break;
4091
4092
74.4k
            case OP_WORDCHAR:
4093
74.4k
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
4094
72.0k
              RRETURN(MATCH_NOMATCH);
4095
72.0k
            break;
4096
4097
0
            default:
4098
0
            return PCRE2_ERROR_INTERNAL;
4099
29.7M
            }
4100
29.7M
          }
4101
533k
        }
4102
      /* Control never gets here */
4103
5.86M
      }
4104
4105
    /* If maximizing, it is worth using inline code for speed, doing the type
4106
    test once at the start (i.e. keep it out of the loops). Once again,
4107
    "notmatch" can be an ordinary local variable because the loops do not call
4108
    RMATCH. */
4109
4110
128M
    else
4111
128M
      {
4112
128M
      Lstart_eptr = Feptr;  /* Remember where we started */
4113
4114
128M
#ifdef SUPPORT_UNICODE
4115
128M
      if (proptype >= 0)
4116
8.97M
        {
4117
8.97M
        BOOL notmatch = Lctype == OP_NOTPROP;
4118
8.97M
        switch(proptype)
4119
8.97M
          {
4120
0
          case PT_ANY:
4121
0
          for (i = Lmin; i < Lmax; i++)
4122
0
            {
4123
0
            int len = 1;
4124
0
            if (Feptr >= mb->end_subject)
4125
0
              {
4126
0
              SCHECK_PARTIAL();
4127
0
              break;
4128
0
              }
4129
0
            GETCHARLENTEST(fc, Feptr, len);
4130
0
            if (notmatch) break;
4131
0
            Feptr+= len;
4132
0
            }
4133
0
          break;
4134
4135
0
          case PT_LAMP:
4136
0
          for (i = Lmin; i < Lmax; i++)
4137
0
            {
4138
0
            int chartype;
4139
0
            int len = 1;
4140
0
            if (Feptr >= mb->end_subject)
4141
0
              {
4142
0
              SCHECK_PARTIAL();
4143
0
              break;
4144
0
              }
4145
0
            GETCHARLENTEST(fc, Feptr, len);
4146
0
            chartype = UCD_CHARTYPE(fc);
4147
0
            if ((chartype == ucp_Lu ||
4148
0
                 chartype == ucp_Ll ||
4149
0
                 chartype == ucp_Lt) == notmatch)
4150
0
              break;
4151
0
            Feptr+= len;
4152
0
            }
4153
0
          break;
4154
4155
1.09k
          case PT_GC:
4156
21.4k
          for (i = Lmin; i < Lmax; i++)
4157
21.3k
            {
4158
21.3k
            int len = 1;
4159
21.3k
            if (Feptr >= mb->end_subject)
4160
12
              {
4161
12
              SCHECK_PARTIAL();
4162
12
              break;
4163
12
              }
4164
21.3k
            GETCHARLENTEST(fc, Feptr, len);
4165
21.3k
            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break;
4166
20.3k
            Feptr+= len;
4167
20.3k
            }
4168
1.09k
          break;
4169
4170
2.86k
          case PT_PC:
4171
47.7k
          for (i = Lmin; i < Lmax; i++)
4172
47.7k
            {
4173
47.7k
            int len = 1;
4174
47.7k
            if (Feptr >= mb->end_subject)
4175
177
              {
4176
177
              SCHECK_PARTIAL();
4177
177
              break;
4178
177
              }
4179
47.5k
            GETCHARLENTEST(fc, Feptr, len);
4180
47.5k
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break;
4181
44.8k
            Feptr+= len;
4182
44.8k
            }
4183
2.86k
          break;
4184
4185
2.86k
          case PT_SC:
4186
0
          for (i = Lmin; i < Lmax; i++)
4187
0
            {
4188
0
            int len = 1;
4189
0
            if (Feptr >= mb->end_subject)
4190
0
              {
4191
0
              SCHECK_PARTIAL();
4192
0
              break;
4193
0
              }
4194
0
            GETCHARLENTEST(fc, Feptr, len);
4195
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break;
4196
0
            Feptr+= len;
4197
0
            }
4198
0
          break;
4199
4200
0
          case PT_SCX:
4201
0
          for (i = Lmin; i < Lmax; i++)
4202
0
            {
4203
0
            BOOL ok;
4204
0
            const ucd_record *prop;
4205
0
            int len = 1;
4206
0
            if (Feptr >= mb->end_subject)
4207
0
              {
4208
0
              SCHECK_PARTIAL();
4209
0
              break;
4210
0
              }
4211
0
            GETCHARLENTEST(fc, Feptr, len);
4212
0
            prop = GET_UCD(fc);
4213
0
            ok = (prop->script == Lpropvalue ||
4214
0
                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
4215
0
            if (ok == notmatch) break;
4216
0
            Feptr+= len;
4217
0
            }
4218
0
          break;
4219
4220
0
          case PT_ALNUM:
4221
0
          for (i = Lmin; i < Lmax; i++)
4222
0
            {
4223
0
            int category;
4224
0
            int len = 1;
4225
0
            if (Feptr >= mb->end_subject)
4226
0
              {
4227
0
              SCHECK_PARTIAL();
4228
0
              break;
4229
0
              }
4230
0
            GETCHARLENTEST(fc, Feptr, len);
4231
0
            category = UCD_CATEGORY(fc);
4232
0
            if ((category == ucp_L || category == ucp_N) == notmatch)
4233
0
              break;
4234
0
            Feptr+= len;
4235
0
            }
4236
0
          break;
4237
4238
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4239
          which means that Perl space and POSIX space are now identical. PCRE
4240
          was changed at release 8.34. */
4241
4242
1.02M
          case PT_SPACE:    /* Perl space */
4243
1.02M
          case PT_PXSPACE:  /* POSIX space */
4244
13.7M
          for (i = Lmin; i < Lmax; i++)
4245
13.7M
            {
4246
13.7M
            int len = 1;
4247
13.7M
            if (Feptr >= mb->end_subject)
4248
139k
              {
4249
139k
              SCHECK_PARTIAL();
4250
139k
              break;
4251
139k
              }
4252
13.6M
            GETCHARLENTEST(fc, Feptr, len);
4253
13.6M
            switch(fc)
4254
13.6M
              {
4255
20.6M
              HSPACE_CASES:
4256
20.6M
              VSPACE_CASES:
4257
9.95M
              if (notmatch) goto ENDLOOP99;  /* Break the loop */
4258
954k
              break;
4259
4260
12.1M
              default:
4261
12.1M
              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
4262
376k
                goto ENDLOOP99;   /* Break the loop */
4263
11.7M
              break;
4264
13.6M
              }
4265
12.7M
            Feptr+= len;
4266
12.7M
            }
4267
1.02M
          ENDLOOP99:
4268
1.02M
          break;
4269
4270
7.94M
          case PT_WORD:
4271
56.7M
          for (i = Lmin; i < Lmax; i++)
4272
53.4M
            {
4273
53.4M
            int chartype, category;
4274
53.4M
            int len = 1;
4275
53.4M
            if (Feptr >= mb->end_subject)
4276
357k
              {
4277
357k
              SCHECK_PARTIAL();
4278
357k
              break;
4279
357k
              }
4280
53.0M
            GETCHARLENTEST(fc, Feptr, len);
4281
53.0M
            chartype = UCD_CHARTYPE(fc);
4282
53.0M
            category = PRIV(ucp_gentype)[chartype];
4283
53.0M
            if ((category == ucp_L ||
4284
48.8M
                 category == ucp_N ||
4285
48.7M
                 chartype == ucp_Mn ||
4286
48.7M
                 chartype == ucp_Pc) == notmatch)
4287
4.27M
              break;
4288
48.7M
            Feptr+= len;
4289
48.7M
            }
4290
7.94M
          break;
4291
4292
7.94M
          case PT_CLIST:
4293
1.15k
          for (i = Lmin; i < Lmax; i++)
4294
1.14k
            {
4295
1.14k
            const uint32_t *cp;
4296
1.14k
            int len = 1;
4297
1.14k
            if (Feptr >= mb->end_subject)
4298
28
              {
4299
28
              SCHECK_PARTIAL();
4300
28
              break;
4301
28
              }
4302
1.11k
            GETCHARLENTEST(fc, Feptr, len);
4303
#if PCRE2_CODE_UNIT_WIDTH == 32
4304
            if (fc > MAX_UTF_CODE_POINT)
4305
              {
4306
              if (!notmatch) goto GOT_MAX;
4307
              }
4308
            else
4309
#endif
4310
1.11k
              {
4311
1.11k
              cp = PRIV(ucd_caseless_sets) + Lpropvalue;
4312
1.11k
              for (;;)
4313
1.47k
                {
4314
1.47k
                if (fc < *cp)
4315
1.10k
                  { if (notmatch) break; else goto GOT_MAX; }
4316
371
                if (fc == *cp++)
4317
12
                  { if (notmatch) goto GOT_MAX; else break; }
4318
371
                }
4319
1.11k
              }
4320
4321
761
            Feptr += len;
4322
761
            }
4323
393
          GOT_MAX:
4324
393
          break;
4325
4326
40
          case PT_UCNC:
4327
0
          for (i = Lmin; i < Lmax; i++)
4328
0
            {
4329
0
            int len = 1;
4330
0
            if (Feptr >= mb->end_subject)
4331
0
              {
4332
0
              SCHECK_PARTIAL();
4333
0
              break;
4334
0
              }
4335
0
            GETCHARLENTEST(fc, Feptr, len);
4336
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
4337
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
4338
0
                 fc >= 0xe000) == notmatch)
4339
0
              break;
4340
0
            Feptr += len;
4341
0
            }
4342
0
          break;
4343
4344
0
          case PT_BIDICL:
4345
0
          for (i = Lmin; i < Lmax; i++)
4346
0
            {
4347
0
            int len = 1;
4348
0
            if (Feptr >= mb->end_subject)
4349
0
              {
4350
0
              SCHECK_PARTIAL();
4351
0
              break;
4352
0
              }
4353
0
            GETCHARLENTEST(fc, Feptr, len);
4354
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break;
4355
0
            Feptr+= len;
4356
0
            }
4357
0
          break;
4358
4359
0
          case PT_BOOL:
4360
0
          for (i = Lmin; i < Lmax; i++)
4361
0
            {
4362
0
            BOOL ok;
4363
0
            const ucd_record *prop;
4364
0
            int len = 1;
4365
0
            if (Feptr >= mb->end_subject)
4366
0
              {
4367
0
              SCHECK_PARTIAL();
4368
0
              break;
4369
0
              }
4370
0
            GETCHARLENTEST(fc, Feptr, len);
4371
0
            prop = GET_UCD(fc);
4372
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
4373
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
4374
0
            if (ok == notmatch) break;
4375
0
            Feptr+= len;
4376
0
            }
4377
0
          break;
4378
4379
0
          default:
4380
0
          return PCRE2_ERROR_INTERNAL;
4381
8.97M
          }
4382
4383
        /* Feptr is now past the end of the maximum run */
4384
4385
8.97M
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4386
4387
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4388
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
4389
        go too far. */
4390
4391
5.07M
        for(;;)
4392
21.2M
          {
4393
21.2M
          if (Feptr <= Lstart_eptr) break;
4394
16.1M
          RMATCH(Fecode, RM222);
4395
16.1M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4396
16.1M
          Feptr--;
4397
16.1M
          if (utf) BACKCHAR(Feptr);
4398
16.1M
          }
4399
5.07M
        }
4400
4401
      /* Match extended Unicode grapheme clusters. We will get here only if the
4402
      support is in the binary; otherwise a compile-time error occurs. */
4403
4404
119M
      else if (Lctype == OP_EXTUNI)
4405
84.5k
        {
4406
14.7M
        for (i = Lmin; i < Lmax; i++)
4407
14.7M
          {
4408
14.7M
          if (Feptr >= mb->end_subject)
4409
84.3k
            {
4410
84.3k
            SCHECK_PARTIAL();
4411
84.3k
            break;
4412
84.3k
            }
4413
14.6M
          else
4414
14.6M
            {
4415
14.6M
            GETCHARINCTEST(fc, Feptr);
4416
14.6M
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4417
14.6M
              utf, NULL);
4418
14.6M
            }
4419
14.6M
          CHECK_PARTIAL();
4420
14.6M
          }
4421
4422
        /* Feptr is now past the end of the maximum run */
4423
4424
84.5k
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4425
4426
        /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
4427
        of the run while backtracking because the use of \C in UTF mode can
4428
        cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
4429
        the use of \C in UTF mode is fraught with danger. */
4430
4431
84.5k
        for(;;)
4432
14.7M
          {
4433
14.7M
          int lgb, rgb;
4434
14.7M
          PCRE2_SPTR fptr;
4435
4436
14.7M
          if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4437
14.6M
          RMATCH(Fecode, RM220);
4438
14.6M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4439
4440
          /* Backtracking over an extended grapheme cluster involves inspecting
4441
          the previous two characters (if present) to see if a break is
4442
          permitted between them. */
4443
4444
14.6M
          Feptr--;
4445
14.6M
          if (!utf) fc = *Feptr; else
4446
9.89M
            {
4447
9.89M
            BACKCHAR(Feptr);
4448
9.89M
            GETCHAR(fc, Feptr);
4449
9.89M
            }
4450
14.6M
          rgb = UCD_GRAPHBREAK(fc);
4451
4452
14.6M
          for (;;)
4453
14.6M
            {
4454
14.6M
            if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4455
14.5M
            fptr = Feptr - 1;
4456
14.5M
            if (!utf) fc = *fptr; else
4457
9.88M
              {
4458
9.88M
              BACKCHAR(fptr);
4459
9.88M
              GETCHAR(fc, fptr);
4460
9.88M
              }
4461
14.5M
            lgb = UCD_GRAPHBREAK(fc);
4462
14.5M
            if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
4463
28.6k
            Feptr = fptr;
4464
28.6k
            rgb = lgb;
4465
28.6k
            }
4466
14.6M
          }
4467
84.5k
        }
4468
4469
119M
      else
4470
119M
#endif   /* SUPPORT_UNICODE */
4471
4472
119M
#ifdef SUPPORT_UNICODE
4473
119M
      if (utf)
4474
8.69M
        {
4475
8.69M
        switch(Lctype)
4476
8.69M
          {
4477
13.4k
          case OP_ANY:
4478
54.5k
          for (i = Lmin; i < Lmax; i++)
4479
41.8k
            {
4480
41.8k
            if (Feptr >= mb->end_subject)
4481
596
              {
4482
596
              SCHECK_PARTIAL();
4483
596
              break;
4484
596
              }
4485
41.2k
            if (IS_NEWLINE(Feptr)) break;
4486
41.1k
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4487
0
                Feptr + 1 >= mb->end_subject &&
4488
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
4489
0
                NLBLOCK->nllen == 2 &&
4490
0
                UCHAR21(Feptr) == NLBLOCK->nl[0])
4491
0
              {
4492
0
              mb->hitend = TRUE;
4493
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4494
0
              }
4495
41.1k
            Feptr++;
4496
41.1k
            ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4497
41.1k
            }
4498
13.4k
          break;
4499
4500
2.36M
          case OP_ALLANY:
4501
2.36M
          if (Lmax < UINT32_MAX)
4502
1.24k
            {
4503
2.46k
            for (i = Lmin; i < Lmax; i++)
4504
1.24k
              {
4505
1.24k
              if (Feptr >= mb->end_subject)
4506
30
                {
4507
30
                SCHECK_PARTIAL();
4508
30
                break;
4509
30
                }
4510
1.21k
              Feptr++;
4511
1.21k
              ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4512
1.21k
              }
4513
1.24k
            }
4514
2.35M
          else
4515
2.35M
            {
4516
2.35M
            Feptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
4517
2.35M
            SCHECK_PARTIAL();
4518
2.35M
            }
4519
2.36M
          break;
4520
4521
          /* The "byte" (i.e. "code unit") case is the same as non-UTF */
4522
4523
2.36M
          case OP_ANYBYTE:
4524
281
          fc = Lmax - Lmin;
4525
281
          if (fc > (uint32_t)(mb->end_subject - Feptr))
4526
281
            {
4527
281
            Feptr = mb->end_subject;
4528
281
            SCHECK_PARTIAL();
4529
281
            }
4530
0
          else Feptr += fc;
4531
281
          break;
4532
4533
6.29M
          case OP_ANYNL:
4534
6.53M
          for (i = Lmin; i < Lmax; i++)
4535
6.29M
            {
4536
6.29M
            int len = 1;
4537
6.29M
            if (Feptr >= mb->end_subject)
4538
138k
              {
4539
138k
              SCHECK_PARTIAL();
4540
138k
              break;
4541
138k
              }
4542
6.15M
            GETCHARLEN(fc, Feptr, len);
4543
6.15M
            if (fc == CHAR_CR)
4544
125
              {
4545
125
              if (++Feptr >= mb->end_subject) break;
4546
125
              if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
4547
125
              }
4548
6.15M
            else
4549
6.15M
              {
4550
6.15M
              if (fc != CHAR_LF &&
4551
6.01M
                  (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4552
6.01M
                   (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4553
5.92M
#ifndef EBCDIC
4554
5.92M
                    && fc != 0x2028 && fc != 0x2029
4555
6.01M
#endif  /* Not EBCDIC */
4556
6.01M
                    )))
4557
5.92M
                break;
4558
232k
              Feptr += len;
4559
232k
              }
4560
6.15M
            }
4561
6.29M
          break;
4562
4563
6.29M
          case OP_NOT_HSPACE:
4564
10.5k
          case OP_HSPACE:
4565
138k
          for (i = Lmin; i < Lmax; i++)
4566
131k
            {
4567
131k
            BOOL gotspace;
4568
131k
            int len = 1;
4569
131k
            if (Feptr >= mb->end_subject)
4570
581
              {
4571
581
              SCHECK_PARTIAL();
4572
581
              break;
4573
581
              }
4574
131k
            GETCHARLEN(fc, Feptr, len);
4575
131k
            switch(fc)
4576
131k
              {
4577
4.20k
              HSPACE_CASES: gotspace = TRUE; break;
4578
126k
              default: gotspace = FALSE; break;
4579
131k
              }
4580
131k
            if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
4581
127k
            Feptr += len;
4582
127k
            }
4583
10.5k
          break;
4584
4585
10.5k
          case OP_NOT_VSPACE:
4586
14.6k
          case OP_VSPACE:
4587
79.1k
          for (i = Lmin; i < Lmax; i++)
4588
78.7k
            {
4589
78.7k
            BOOL gotspace;
4590
78.7k
            int len = 1;
4591
78.7k
            if (Feptr >= mb->end_subject)
4592
196
              {
4593
196
              SCHECK_PARTIAL();
4594
196
              break;
4595
196
              }
4596
78.5k
            GETCHARLEN(fc, Feptr, len);
4597
78.5k
            switch(fc)
4598
78.5k
              {
4599
3.34k
              VSPACE_CASES: gotspace = TRUE; break;
4600
75.1k
              default: gotspace = FALSE; break;
4601
78.5k
              }
4602
78.5k
            if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
4603
64.5k
            Feptr += len;
4604
64.5k
            }
4605
14.6k
          break;
4606
4607
14.6k
          case OP_NOT_DIGIT:
4608
0
          for (i = Lmin; i < Lmax; i++)
4609
0
            {
4610
0
            int len = 1;
4611
0
            if (Feptr >= mb->end_subject)
4612
0
              {
4613
0
              SCHECK_PARTIAL();
4614
0
              break;
4615
0
              }
4616
0
            GETCHARLEN(fc, Feptr, len);
4617
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
4618
0
            Feptr+= len;
4619
0
            }
4620
0
          break;
4621
4622
0
          case OP_DIGIT:
4623
0
          for (i = Lmin; i < Lmax; i++)
4624
0
            {
4625
0
            int len = 1;
4626
0
            if (Feptr >= mb->end_subject)
4627
0
              {
4628
0
              SCHECK_PARTIAL();
4629
0
              break;
4630
0
              }
4631
0
            GETCHARLEN(fc, Feptr, len);
4632
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
4633
0
            Feptr+= len;
4634
0
            }
4635
0
          break;
4636
4637
0
          case OP_NOT_WHITESPACE:
4638
0
          for (i = Lmin; i < Lmax; i++)
4639
0
            {
4640
0
            int len = 1;
4641
0
            if (Feptr >= mb->end_subject)
4642
0
              {
4643
0
              SCHECK_PARTIAL();
4644
0
              break;
4645
0
              }
4646
0
            GETCHARLEN(fc, Feptr, len);
4647
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
4648
0
            Feptr+= len;
4649
0
            }
4650
0
          break;
4651
4652
0
          case OP_WHITESPACE:
4653
0
          for (i = Lmin; i < Lmax; i++)
4654
0
            {
4655
0
            int len = 1;
4656
0
            if (Feptr >= mb->end_subject)
4657
0
              {
4658
0
              SCHECK_PARTIAL();
4659
0
              break;
4660
0
              }
4661
0
            GETCHARLEN(fc, Feptr, len);
4662
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
4663
0
            Feptr+= len;
4664
0
            }
4665
0
          break;
4666
4667
0
          case OP_NOT_WORDCHAR:
4668
0
          for (i = Lmin; i < Lmax; i++)
4669
0
            {
4670
0
            int len = 1;
4671
0
            if (Feptr >= mb->end_subject)
4672
0
              {
4673
0
              SCHECK_PARTIAL();
4674
0
              break;
4675
0
              }
4676
0
            GETCHARLEN(fc, Feptr, len);
4677
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
4678
0
            Feptr+= len;
4679
0
            }
4680
0
          break;
4681
4682
0
          case OP_WORDCHAR:
4683
0
          for (i = Lmin; i < Lmax; i++)
4684
0
            {
4685
0
            int len = 1;
4686
0
            if (Feptr >= mb->end_subject)
4687
0
              {
4688
0
              SCHECK_PARTIAL();
4689
0
              break;
4690
0
              }
4691
0
            GETCHARLEN(fc, Feptr, len);
4692
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
4693
0
            Feptr+= len;
4694
0
            }
4695
0
          break;
4696
4697
0
          default:
4698
0
          return PCRE2_ERROR_INTERNAL;
4699
8.69M
          }
4700
4701
8.69M
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4702
4703
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4704
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
4705
        too far. */
4706
4707
2.39M
        for(;;)
4708
137M
          {
4709
137M
          if (Feptr <= Lstart_eptr) break;
4710
134M
          RMATCH(Fecode, RM221);
4711
134M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4712
134M
          Feptr--;
4713
134M
          BACKCHAR(Feptr);
4714
134M
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
4715
31
              UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
4716
0
            Feptr--;
4717
134M
          }
4718
2.39M
        }
4719
111M
      else
4720
111M
#endif  /* SUPPORT_UNICODE */
4721
4722
      /* Not UTF mode */
4723
111M
        {
4724
111M
        switch(Lctype)
4725
111M
          {
4726
103M
          case OP_ANY:
4727
225M
          for (i = Lmin; i < Lmax; i++)
4728
122M
            {
4729
122M
            if (Feptr >= mb->end_subject)
4730
377k
              {
4731
377k
              SCHECK_PARTIAL();
4732
377k
              break;
4733
377k
              }
4734
122M
            if (IS_NEWLINE(Feptr)) break;
4735
122M
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4736
0
                Feptr + 1 >= mb->end_subject &&
4737
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
4738
0
                NLBLOCK->nllen == 2 &&
4739
0
                *Feptr == NLBLOCK->nl[0])
4740
0
              {
4741
0
              mb->hitend = TRUE;
4742
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4743
0
              }
4744
122M
            Feptr++;
4745
122M
            }
4746
103M
          break;
4747
4748
103M
          case OP_ALLANY:
4749
5.11M
          case OP_ANYBYTE:
4750
5.11M
          fc = Lmax - Lmin;
4751
5.11M
          if (fc > (uint32_t)(mb->end_subject - Feptr))
4752
68.8k
            {
4753
68.8k
            Feptr = mb->end_subject;
4754
68.8k
            SCHECK_PARTIAL();
4755
68.8k
            }
4756
5.04M
          else Feptr += fc;
4757
5.11M
          break;
4758
4759
5.11M
          case OP_ANYNL:
4760
96.0k
          for (i = Lmin; i < Lmax; i++)
4761
94.6k
            {
4762
94.6k
            if (Feptr >= mb->end_subject)
4763
7.25k
              {
4764
7.25k
              SCHECK_PARTIAL();
4765
7.25k
              break;
4766
7.25k
              }
4767
87.3k
            fc = *Feptr;
4768
87.3k
            if (fc == CHAR_CR)
4769
8.33k
              {
4770
8.33k
              if (++Feptr >= mb->end_subject) break;
4771
1.32k
              if (*Feptr == CHAR_LF) Feptr++;
4772
1.32k
              }
4773
79.0k
            else
4774
79.0k
              {
4775
79.0k
              if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4776
73.7k
                 (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4777
#if PCRE2_CODE_UNIT_WIDTH != 8
4778
                 && fc != 0x2028 && fc != 0x2029
4779
#endif
4780
73.7k
                 ))) break;
4781
21.4k
              Feptr++;
4782
21.4k
              }
4783
87.3k
            }
4784
73.2k
          break;
4785
4786
73.2k
          case OP_NOT_HSPACE:
4787
607k
          for (i = Lmin; i < Lmax; i++)
4788
607k
            {
4789
607k
            if (Feptr >= mb->end_subject)
4790
6.05k
              {
4791
6.05k
              SCHECK_PARTIAL();
4792
6.05k
              break;
4793
6.05k
              }
4794
601k
            switch(*Feptr)
4795
601k
              {
4796
592k
              default: Feptr++; break;
4797
19.9k
              HSPACE_BYTE_CASES:
4798
#if PCRE2_CODE_UNIT_WIDTH != 8
4799
              HSPACE_MULTIBYTE_CASES:
4800
#endif
4801
19.9k
              goto ENDLOOP00;
4802
601k
              }
4803
601k
            }
4804
15.1k
          ENDLOOP00:
4805
15.1k
          break;
4806
4807
636k
          case OP_HSPACE:
4808
668k
          for (i = Lmin; i < Lmax; i++)
4809
650k
            {
4810
650k
            if (Feptr >= mb->end_subject)
4811
2.96k
              {
4812
2.96k
              SCHECK_PARTIAL();
4813
2.96k
              break;
4814
2.96k
              }
4815
647k
            switch(*Feptr)
4816
647k
              {
4817
615k
              default: goto ENDLOOP01;
4818
615k
              HSPACE_BYTE_CASES:
4819
#if PCRE2_CODE_UNIT_WIDTH != 8
4820
              HSPACE_MULTIBYTE_CASES:
4821
#endif
4822
73.3k
              Feptr++; break;
4823
647k
              }
4824
647k
            }
4825
636k
          ENDLOOP01:
4826
636k
          break;
4827
4828
1.67M
          case OP_NOT_VSPACE:
4829
146M
          for (i = Lmin; i < Lmax; i++)
4830
146M
            {
4831
146M
            if (Feptr >= mb->end_subject)
4832
10.5k
              {
4833
10.5k
              SCHECK_PARTIAL();
4834
10.5k
              break;
4835
10.5k
              }
4836
146M
            switch(*Feptr)
4837
146M
              {
4838
145M
              default: Feptr++; break;
4839
5.55M
              VSPACE_BYTE_CASES:
4840
#if PCRE2_CODE_UNIT_WIDTH != 8
4841
              VSPACE_MULTIBYTE_CASES:
4842
#endif
4843
5.55M
              goto ENDLOOP02;
4844
146M
              }
4845
146M
            }
4846
1.67M
          ENDLOOP02:
4847
1.67M
          break;
4848
4849
11.0k
          case OP_VSPACE:
4850
504
          for (i = Lmin; i < Lmax; i++)
4851
498
            {
4852
498
            if (Feptr >= mb->end_subject)
4853
0
              {
4854
0
              SCHECK_PARTIAL();
4855
0
              break;
4856
0
              }
4857
498
            switch(*Feptr)
4858
498
              {
4859
349
              default: goto ENDLOOP03;
4860
641
              VSPACE_BYTE_CASES:
4861
#if PCRE2_CODE_UNIT_WIDTH != 8
4862
              VSPACE_MULTIBYTE_CASES:
4863
#endif
4864
641
              Feptr++; break;
4865
498
              }
4866
498
            }
4867
355
          ENDLOOP03:
4868
355
          break;
4869
4870
4.74k
          case OP_NOT_DIGIT:
4871
29.1k
          for (i = Lmin; i < Lmax; i++)
4872
29.1k
            {
4873
29.1k
            if (Feptr >= mb->end_subject)
4874
2.93k
              {
4875
2.93k
              SCHECK_PARTIAL();
4876
2.93k
              break;
4877
2.93k
              }
4878
26.2k
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
4879
1.81k
              break;
4880
24.4k
            Feptr++;
4881
24.4k
            }
4882
4.74k
          break;
4883
4884
46.6k
          case OP_DIGIT:
4885
75.1k
          for (i = Lmin; i < Lmax; i++)
4886
73.8k
            {
4887
73.8k
            if (Feptr >= mb->end_subject)
4888
4.87k
              {
4889
4.87k
              SCHECK_PARTIAL();
4890
4.87k
              break;
4891
4.87k
              }
4892
68.9k
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
4893
40.4k
              break;
4894
28.5k
            Feptr++;
4895
28.5k
            }
4896
46.6k
          break;
4897
4898
199k
          case OP_NOT_WHITESPACE:
4899
5.17M
          for (i = Lmin; i < Lmax; i++)
4900
5.17M
            {
4901
5.17M
            if (Feptr >= mb->end_subject)
4902
38.5k
              {
4903
38.5k
              SCHECK_PARTIAL();
4904
38.5k
              break;
4905
38.5k
              }
4906
5.13M
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
4907
159k
              break;
4908
4.97M
            Feptr++;
4909
4.97M
            }
4910
199k
          break;
4911
4912
199k
          case OP_WHITESPACE:
4913
13.8k
          for (i = Lmin; i < Lmax; i++)
4914
12.1k
            {
4915
12.1k
            if (Feptr >= mb->end_subject)
4916
11
              {
4917
11
              SCHECK_PARTIAL();
4918
11
              break;
4919
11
              }
4920
12.1k
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
4921
10.2k
              break;
4922
1.88k
            Feptr++;
4923
1.88k
            }
4924
11.9k
          break;
4925
4926
11.9k
          case OP_NOT_WORDCHAR:
4927
144k
          for (i = Lmin; i < Lmax; i++)
4928
144k
            {
4929
144k
            if (Feptr >= mb->end_subject)
4930
1.31k
              {
4931
1.31k
              SCHECK_PARTIAL();
4932
1.31k
              break;
4933
1.31k
              }
4934
142k
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
4935
4.26k
              break;
4936
138k
            Feptr++;
4937
138k
            }
4938
5.66k
          break;
4939
4940
79.9k
          case OP_WORDCHAR:
4941
155k
          for (i = Lmin; i < Lmax; i++)
4942
125k
            {
4943
125k
            if (Feptr >= mb->end_subject)
4944
64
              {
4945
64
              SCHECK_PARTIAL();
4946
64
              break;
4947
64
              }
4948
125k
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
4949
49.6k
              break;
4950
76.0k
            Feptr++;
4951
76.0k
            }
4952
79.9k
          break;
4953
4954
79.9k
          default:
4955
0
          return PCRE2_ERROR_INTERNAL;
4956
111M
          }
4957
4958
111M
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4959
4960
110M
        for (;;)
4961
390M
          {
4962
390M
          if (Feptr == Lstart_eptr) break;
4963
280M
          RMATCH(Fecode, RM34);
4964
280M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4965
280M
          Feptr--;
4966
280M
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
4967
48
              Feptr[-1] == CHAR_CR) Feptr--;
4968
280M
          }
4969
110M
        }
4970
128M
      }
4971
118M
    break;  /* End of repeat character type processing */
4972
4973
118M
#undef Lstart_eptr
4974
118M
#undef Lmin
4975
118M
#undef Lmax
4976
118M
#undef Lctype
4977
118M
#undef Lpropvalue
4978
4979
4980
    /* ===================================================================== */
4981
    /* Match a back reference, possibly repeatedly. Look past the end of the
4982
    item to see if there is repeat information following. The OP_REF and
4983
    OP_REFI opcodes are used for a reference to a numbered group or to a
4984
    non-duplicated named group. For a duplicated named group, OP_DNREF and
4985
    OP_DNREFI are used. In this case we must scan the list of groups to which
4986
    the name refers, and use the first one that is set. */
4987
4988
118M
#define Lmin      F->temp_32[0]
4989
118M
#define Lmax      F->temp_32[1]
4990
118M
#define Lcaseless F->temp_32[2]
4991
118M
#define Lstart    F->temp_sptr[0]
4992
118M
#define Loffset   F->temp_size
4993
4994
118M
    case OP_DNREF:
4995
0
    case OP_DNREFI:
4996
0
    Lcaseless = (Fop == OP_DNREFI);
4997
0
      {
4998
0
      int count = GET2(Fecode, 1+IMM2_SIZE);
4999
0
      PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5000
0
      Fecode += 1 + 2*IMM2_SIZE;
5001
5002
0
      while (count-- > 0)
5003
0
        {
5004
0
        Loffset = (GET2(slot, 0) << 1) - 2;
5005
0
        if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
5006
0
        slot += mb->name_entry_size;
5007
0
        }
5008
0
      }
5009
0
    goto REF_REPEAT;
5010
5011
23.2k
    case OP_REF:
5012
23.2k
    case OP_REFI:
5013
23.2k
    Lcaseless = (Fop == OP_REFI);
5014
23.2k
    Loffset = (GET2(Fecode, 1) << 1) - 2;
5015
23.2k
    Fecode += 1 + IMM2_SIZE;
5016
5017
    /* Set up for repetition, or handle the non-repeated case. The maximum and
5018
    minimum must be in the heap frame, but as they are short-term values, we
5019
    use temporary fields. */
5020
5021
23.2k
    REF_REPEAT:
5022
23.2k
    switch (*Fecode)
5023
23.2k
      {
5024
0
      case OP_CRSTAR:
5025
0
      case OP_CRMINSTAR:
5026
0
      case OP_CRPLUS:
5027
0
      case OP_CRMINPLUS:
5028
0
      case OP_CRQUERY:
5029
0
      case OP_CRMINQUERY:
5030
0
      fc = *Fecode++ - OP_CRSTAR;
5031
0
      Lmin = rep_min[fc];
5032
0
      Lmax = rep_max[fc];
5033
0
      reptype = rep_typ[fc];
5034
0
      break;
5035
5036
0
      case OP_CRRANGE:
5037
0
      case OP_CRMINRANGE:
5038
0
      Lmin = GET2(Fecode, 1);
5039
0
      Lmax = GET2(Fecode, 1 + IMM2_SIZE);
5040
0
      reptype = rep_typ[*Fecode - OP_CRSTAR];
5041
0
      if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
5042
0
      Fecode += 1 + 2 * IMM2_SIZE;
5043
0
      break;
5044
5045
23.2k
      default:                  /* No repeat follows */
5046
23.2k
        {
5047
23.2k
        rrc = match_ref(Loffset, Lcaseless, F, mb, &length);
5048
23.2k
        if (rrc != 0)
5049
23.2k
          {
5050
23.2k
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5051
23.2k
          CHECK_PARTIAL();
5052
23.2k
          RRETURN(MATCH_NOMATCH);
5053
0
          }
5054
23.2k
        }
5055
4
      Feptr += length;
5056
4
      continue;              /* With the main loop */
5057
23.2k
      }
5058
5059
    /* Handle repeated back references. If a set group has length zero, just
5060
    continue with the main loop, because it matches however many times. For an
5061
    unset reference, if the minimum is zero, we can also just continue. We can
5062
    also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
5063
    group behave as a zero-length group. For any other unset cases, carrying
5064
    on will result in NOMATCH. */
5065
5066
0
    if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
5067
0
      {
5068
0
      if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
5069
0
      }
5070
0
    else  /* Group is not set */
5071
0
      {
5072
0
      if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
5073
0
        continue;
5074
0
      }
5075
5076
    /* First, ensure the minimum number of matches are present. */
5077
5078
0
    for (i = 1; i <= Lmin; i++)
5079
0
      {
5080
0
      PCRE2_SIZE slength;
5081
0
      rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
5082
0
      if (rrc != 0)
5083
0
        {
5084
0
        if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5085
0
        CHECK_PARTIAL();
5086
0
        RRETURN(MATCH_NOMATCH);
5087
0
        }
5088
0
      Feptr += slength;
5089
0
      }
5090
5091
    /* If min = max, we are done. They are not both allowed to be zero. */
5092
5093
0
    if (Lmin == Lmax) continue;
5094
5095
    /* If minimizing, keep trying and advancing the pointer. */
5096
5097
0
    if (reptype == REPTYPE_MIN)
5098
0
      {
5099
0
      for (;;)
5100
0
        {
5101
0
        PCRE2_SIZE slength;
5102
0
        RMATCH(Fecode, RM20);
5103
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5104
0
        if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
5105
0
        rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
5106
0
        if (rrc != 0)
5107
0
          {
5108
0
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5109
0
          CHECK_PARTIAL();
5110
0
          RRETURN(MATCH_NOMATCH);
5111
0
          }
5112
0
        Feptr += slength;
5113
0
        }
5114
      /* Control never gets here */
5115
0
      }
5116
5117
    /* If maximizing, find the longest string and work backwards, as long as
5118
    the matched lengths for each iteration are the same. */
5119
5120
0
    else
5121
0
      {
5122
0
      BOOL samelengths = TRUE;
5123
0
      Lstart = Feptr;     /* Starting position */
5124
0
      Flength = Fovector[Loffset+1] - Fovector[Loffset];
5125
5126
0
      for (i = Lmin; i < Lmax; i++)
5127
0
        {
5128
0
        PCRE2_SIZE slength;
5129
0
        rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
5130
0
        if (rrc != 0)
5131
0
          {
5132
          /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
5133
          the soft partial matching case. */
5134
5135
0
          if (rrc > 0 && mb->partial != 0 &&
5136
0
              mb->end_subject > mb->start_used_ptr)
5137
0
            {
5138
0
            mb->hitend = TRUE;
5139
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5140
0
            }
5141
0
          break;
5142
0
          }
5143
5144
0
        if (slength != Flength) samelengths = FALSE;
5145
0
        Feptr += slength;
5146
0
        }
5147
5148
      /* If the length matched for each repetition is the same as the length of
5149
      the captured group, we can easily work backwards. This is the normal
5150
      case. However, in caseless UTF-8 mode there are pairs of case-equivalent
5151
      characters whose lengths (in terms of code units) differ. However, this
5152
      is very rare, so we handle it by re-matching fewer and fewer times. */
5153
5154
0
      if (samelengths)
5155
0
        {
5156
0
        while (Feptr >= Lstart)
5157
0
          {
5158
0
          RMATCH(Fecode, RM21);
5159
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5160
0
          Feptr -= Flength;
5161
0
          }
5162
0
        }
5163
5164
      /* The rare case of non-matching lengths. Re-scan the repetition for each
5165
      iteration. We know that match_ref() will succeed every time. */
5166
5167
0
      else
5168
0
        {
5169
0
        Lmax = i;
5170
0
        for (;;)
5171
0
          {
5172
0
          RMATCH(Fecode, RM22);
5173
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5174
0
          if (Feptr == Lstart) break; /* Failed after minimal repetition */
5175
0
          Feptr = Lstart;
5176
0
          Lmax--;
5177
0
          for (i = Lmin; i < Lmax; i++)
5178
0
            {
5179
0
            PCRE2_SIZE slength;
5180
0
            (void)match_ref(Loffset, Lcaseless, F, mb, &slength);
5181
0
            Feptr += slength;
5182
0
            }
5183
0
          }
5184
0
        }
5185
5186
0
      RRETURN(MATCH_NOMATCH);
5187
0
      }
5188
    /* Control never gets here */
5189
5190
0
#undef Lcaseless
5191
0
#undef Lmin
5192
0
#undef Lmax
5193
0
#undef Lstart
5194
0
#undef Loffset
5195
5196
5197
5198
/* ========================================================================= */
5199
/*           Opcodes for the start of various parenthesized items            */
5200
/* ========================================================================= */
5201
5202
    /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
5203
    (*THEN) is within the current branch by comparing the address of OP_THEN
5204
    that is passed back with the end of the branch. If (*THEN) is within the
5205
    current branch, and the branch is one of two or more alternatives (it
5206
    either starts or ends with OP_ALT), we have reached the limit of THEN's
5207
    action, so convert the return code to NOMATCH, which will cause normal
5208
    backtracking to happen from now on. Otherwise, THEN is passed back to an
5209
    outer alternative. This implements Perl's treatment of parenthesized
5210
    groups, where a group not containing | does not affect the current
5211
    alternative, that is, (X) is NOT the same as (X|(*F)). */
5212
5213
5214
    /* ===================================================================== */
5215
    /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
5216
    bracket group, indicating that it may occur zero times. It may repeat
5217
    infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
5218
    the pattern. Brackets with fixed upper repeat limits are compiled as a
5219
    number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
5220
    Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
5221
5222
2.13k
#define Lnext_ecode F->temp_sptr[0]
5223
5224
432
    case OP_BRAZERO:
5225
432
    Lnext_ecode = Fecode + 1;
5226
432
    RMATCH(Lnext_ecode, RM9);
5227
432
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5228
633
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5229
432
    Fecode = Lnext_ecode + 1 + LINK_SIZE;
5230
432
    break;
5231
5232
0
    case OP_BRAMINZERO:
5233
0
    Lnext_ecode = Fecode + 1;
5234
0
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5235
0
    RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
5236
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5237
0
    Fecode++;
5238
0
    break;
5239
5240
0
#undef Lnext_ecode
5241
5242
0
    case OP_SKIPZERO:
5243
0
    Fecode++;
5244
0
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5245
0
    Fecode += 1 + LINK_SIZE;
5246
0
    break;
5247
5248
5249
    /* ===================================================================== */
5250
    /* Handle possessive brackets with an unlimited repeat. The end of these
5251
    brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
5252
    going further in the pattern. */
5253
5254
96.9k
#define Lframe_type    F->temp_32[0]
5255
170k
#define Lmatched_once  F->temp_32[1]
5256
71.8k
#define Lzero_allowed  F->temp_32[2]
5257
73.4k
#define Lstart_eptr    F->temp_sptr[0]
5258
48.4k
#define Lstart_group   F->temp_sptr[1]
5259
5260
0
    case OP_BRAPOSZERO:
5261
0
    Lzero_allowed = TRUE;                /* Zero repeat is allowed */
5262
0
    Fecode += 1;
5263
0
    if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
5264
0
      goto POSSESSIVE_CAPTURE;
5265
0
    goto POSSESSIVE_NON_CAPTURE;
5266
5267
0
    case OP_BRAPOS:
5268
0
    case OP_SBRAPOS:
5269
0
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5270
5271
0
    POSSESSIVE_NON_CAPTURE:
5272
0
    Lframe_type = GF_NOCAPTURE;          /* Remembered frame type */
5273
0
    goto POSSESSIVE_GROUP;
5274
5275
23.4k
    case OP_CBRAPOS:
5276
48.3k
    case OP_SCBRAPOS:
5277
48.3k
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5278
5279
48.3k
    POSSESSIVE_CAPTURE:
5280
48.3k
    number = GET2(Fecode, 1+LINK_SIZE);
5281
48.3k
    Lframe_type = GF_CAPTURE | number;   /* Remembered frame type */
5282
5283
48.3k
    POSSESSIVE_GROUP:
5284
48.3k
    Lmatched_once = FALSE;               /* Never matched */
5285
48.3k
    Lstart_group = Fecode;               /* Start of this group */
5286
5287
48.3k
    for (;;)
5288
48.5k
      {
5289
48.5k
      Lstart_eptr = Feptr;               /* Position at group start */
5290
48.5k
      group_frame_type = Lframe_type;
5291
48.5k
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
5292
48.5k
      if (rrc == MATCH_KETRPOS)
5293
24.9k
        {
5294
24.9k
        Lmatched_once = TRUE;            /* Matched at least once */
5295
24.9k
        if (Feptr == Lstart_eptr)        /* Empty match; skip to end */
5296
24.9k
          {
5297
24.9k
          do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5298
24.9k
          break;
5299
24.9k
          }
5300
5301
48
        Fecode = Lstart_group;
5302
48
        continue;
5303
24.9k
        }
5304
5305
      /* See comment above about handling THEN. */
5306
5307
23.5k
      if (rrc == MATCH_THEN)
5308
0
        {
5309
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5310
0
        if (mb->verb_ecode_ptr < next_ecode &&
5311
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5312
0
          rrc = MATCH_NOMATCH;
5313
0
        }
5314
5315
23.5k
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5316
23.5k
      Fecode += GET(Fecode, 1);
5317
23.5k
      if (*Fecode != OP_ALT) break;
5318
23.5k
      }
5319
5320
    /* Success if matched something or zero repeat allowed */
5321
5322
48.3k
    if (Lmatched_once || Lzero_allowed)
5323
24.9k
      {
5324
24.9k
      Fecode += 1 + LINK_SIZE;
5325
24.9k
      break;
5326
24.9k
      }
5327
5328
48.3k
    RRETURN(MATCH_NOMATCH);
5329
5330
0
#undef Lmatched_once
5331
0
#undef Lzero_allowed
5332
0
#undef Lframe_type
5333
0
#undef Lstart_eptr
5334
0
#undef Lstart_group
5335
5336
5337
    /* ===================================================================== */
5338
    /* Handle non-capturing brackets that cannot match an empty string. When we
5339
    get to the final alternative within the brackets, as long as there are no
5340
    THEN's in the pattern, we can optimize by not recording a new backtracking
5341
    point. (Ideally we should test for a THEN within this group, but we don't
5342
    have that information.) Don't do this if we are at the very top level,
5343
    however, because that would make handling assertions and once-only brackets
5344
    messier when there is nothing to go back to. */
5345
5346
1.63M
#define Lframe_type F->temp_32[0]     /* Set for all that use GROUPLOOP */
5347
3.77k
#define Lnext_branch F->temp_sptr[0]  /* Used only in OP_BRA handling */
5348
5349
329k
    case OP_BRA:
5350
329k
    if (mb->hasthen || Frdepth == 0)
5351
329k
      {
5352
329k
      Lframe_type = 0;
5353
329k
      goto GROUPLOOP;
5354
329k
      }
5355
5356
473
    for (;;)
5357
1.41k
      {
5358
1.41k
      Lnext_branch = Fecode + GET(Fecode, 1);
5359
1.41k
      if (*Lnext_branch != OP_ALT) break;
5360
5361
      /* This is never the final branch. We do not need to test for MATCH_THEN
5362
      here because this code is not used when there is a THEN in the pattern. */
5363
5364
942
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
5365
942
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5366
942
      Fecode = Lnext_branch;
5367
942
      }
5368
5369
    /* Hit the start of the final branch. Continue at this level. */
5370
5371
473
    Fecode += PRIV(OP_lengths)[*Fecode];
5372
473
    break;
5373
5374
0
#undef Lnext_branch
5375
5376
5377
    /* ===================================================================== */
5378
    /* Handle a capturing bracket, other than those that are possessive with an
5379
    unlimited repeat. */
5380
5381
54.3k
    case OP_CBRA:
5382
80.6k
    case OP_SCBRA:
5383
80.6k
    Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
5384
80.6k
    goto GROUPLOOP;
5385
5386
5387
    /* ===================================================================== */
5388
    /* Atomic groups and non-capturing brackets that can match an empty string
5389
    must record a backtracking point and also set up a chained frame. */
5390
5391
36
    case OP_ONCE:
5392
36
    case OP_SCRIPT_RUN:
5393
56
    case OP_SBRA:
5394
56
    Lframe_type = GF_NOCAPTURE | Fop;
5395
5396
410k
    GROUPLOOP:
5397
410k
    for (;;)
5398
1.22M
      {
5399
1.22M
      group_frame_type = Lframe_type;
5400
1.22M
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
5401
1.22M
      if (rrc == MATCH_THEN)
5402
0
        {
5403
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5404
0
        if (mb->verb_ecode_ptr < next_ecode &&
5405
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5406
0
          rrc = MATCH_NOMATCH;
5407
0
        }
5408
1.22M
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5409
1.22M
      Fecode += GET(Fecode, 1);
5410
1.22M
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5411
818k
      }
5412
    /* Control never reaches here. */
5413
5414
0
#undef Lframe_type
5415
5416
5417
    /* ===================================================================== */
5418
    /* Pattern recursion either matches the current regex, or some
5419
    subexpression. The offset data is the offset to the starting bracket from
5420
    the start of the whole pattern. This is so that it works from duplicated
5421
    subpatterns. For a whole-pattern recursion, we have to infer the number
5422
    zero. */
5423
5424
789
#define Lframe_type F->temp_32[0]
5425
2.06k
#define Lstart_branch F->temp_sptr[0]
5426
5427
149
    case OP_RECURSE:
5428
149
    bracode = mb->start_code + GET(Fecode, 1);
5429
149
    number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
5430
5431
    /* If we are already in a pattern recursion, check for repeating the same
5432
    one without changing the subject pointer or the last referenced character
5433
    in the subject. This should catch convoluted mutual recursions; some
5434
    simple cases are caught at compile time. However, there are rare cases when
5435
    this check needs to be turned off. In this case, actual recursion loops
5436
    will be caught by the match or heap limits. */
5437
5438
149
    if (Fcurrent_recurse != RECURSE_UNSET)
5439
4
      {
5440
4
      offset = Flast_group_offset;
5441
4
      while (offset != PCRE2_UNSET)
5442
4
        {
5443
4
        N = (heapframe *)((char *)match_data->heapframes + offset);
5444
4
        P = (heapframe *)((char *)N - frame_size);
5445
4
        if (N->group_frame_type == (GF_RECURSE | number))
5446
4
          {
5447
4
          if (Feptr == P->eptr && mb->last_used_ptr == P->recurse_last_used &&
5448
4
               (mb->moptions & PCRE2_DISABLE_RECURSELOOP_CHECK) == 0)
5449
4
            return PCRE2_ERROR_RECURSELOOP;
5450
0
          break;
5451
4
          }
5452
0
        offset = P->last_group_offset;
5453
0
        }
5454
4
      }
5455
5456
    /* Remember the current last referenced character and then run the
5457
    recursion branch by branch. */
5458
5459
145
    F->recurse_last_used = mb->last_used_ptr;
5460
145
    Lstart_branch = bracode;
5461
145
    Lframe_type = GF_RECURSE | number;
5462
5463
145
    for (;;)
5464
644
      {
5465
644
      PCRE2_SPTR next_ecode;
5466
5467
644
      group_frame_type = Lframe_type;
5468
644
      RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
5469
640
      next_ecode = Lstart_branch + GET(Lstart_branch,1);
5470
5471
      /* Handle backtracking verbs, which are defined in a range that can
5472
      easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
5473
      escape beyond a recursion; they cause a NOMATCH for the entire recursion.
5474
5475
      When one of these verbs triggers, the current recursion group number is
5476
      recorded. If it matches the recursion we are processing, the verb
5477
      happened within the recursion and we must deal with it. Otherwise it must
5478
      have happened after the recursion completed, and so has to be passed
5479
      back. See comment above about handling THEN. */
5480
5481
640
      if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
5482
0
          mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
5483
0
        {
5484
0
        if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
5485
0
            (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
5486
0
          rrc = MATCH_NOMATCH;
5487
0
        else RRETURN(MATCH_NOMATCH);
5488
0
        }
5489
5490
      /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
5491
      OP_ACCEPT code. Nothing needs to be done here. */
5492
5493
640
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5494
640
      Lstart_branch = next_ecode;
5495
640
      if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
5496
499
      }
5497
    /* Control never reaches here. */
5498
5499
0
#undef Lframe_type
5500
0
#undef Lstart_branch
5501
5502
5503
    /* ===================================================================== */
5504
    /* Positive assertions are like other groups except that PCRE doesn't allow
5505
    the effect of (*THEN) to escape beyond an assertion; it is therefore
5506
    treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
5507
    captures and mark retained. Any other return is an error. */
5508
5509
2.36k
#define Lframe_type  F->temp_32[0]
5510
5511
30
    case OP_ASSERT:
5512
36
    case OP_ASSERTBACK:
5513
354
    case OP_ASSERT_NA:
5514
552
    case OP_ASSERTBACK_NA:
5515
552
    Lframe_type = GF_NOCAPTURE | Fop;
5516
552
    for (;;)
5517
1.81k
      {
5518
1.81k
      group_frame_type = Lframe_type;
5519
1.81k
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
5520
1.81k
      if (rrc == MATCH_ACCEPT)
5521
0
        {
5522
0
        memcpy(Fovector,
5523
0
              (char *)assert_accept_frame + offsetof(heapframe, ovector),
5524
0
              assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5525
0
        Foffset_top = assert_accept_frame->offset_top;
5526
0
        Fmark = assert_accept_frame->mark;
5527
0
        break;
5528
0
        }
5529
1.81k
      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
5530
1.81k
      Fecode += GET(Fecode, 1);
5531
1.81k
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5532
1.26k
      }
5533
5534
0
    do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5535
0
    Fecode += 1 + LINK_SIZE;
5536
0
    break;
5537
5538
0
#undef Lframe_type
5539
5540
5541
    /* ===================================================================== */
5542
    /* Handle negative assertions. Loop for each non-matching branch as for
5543
    positive assertions. */
5544
5545
110
#define Lframe_type  F->temp_32[0]
5546
5547
30
    case OP_ASSERT_NOT:
5548
30
    case OP_ASSERTBACK_NOT:
5549
30
    Lframe_type  = GF_NOCAPTURE | Fop;
5550
5551
30
    for (;;)
5552
80
      {
5553
80
      group_frame_type = Lframe_type;
5554
80
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
5555
80
      switch(rrc)
5556
80
        {
5557
0
        case MATCH_ACCEPT:   /* Assertion matched, therefore it fails. */
5558
30
        case MATCH_MATCH:
5559
30
        RRETURN (MATCH_NOMATCH);
5560
5561
50
        case MATCH_NOMATCH:  /* Branch failed, try next if present. */
5562
50
        case MATCH_THEN:
5563
50
        Fecode += GET(Fecode, 1);
5564
50
        if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
5565
50
        break;
5566
5567
50
        case MATCH_COMMIT:   /* Assertion forced to fail, therefore continue. */
5568
0
        case MATCH_SKIP:
5569
0
        case MATCH_PRUNE:
5570
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5571
0
        goto ASSERT_NOT_FAILED;
5572
5573
0
        default:             /* Pass back any other return */
5574
0
        RRETURN(rrc);
5575
80
        }
5576
80
      }
5577
5578
    /* None of the branches have matched or there was a backtrack to (*COMMIT),
5579
    (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
5580
    negative assertion, so carry on. */
5581
5582
0
    ASSERT_NOT_FAILED:
5583
0
    Fecode += 1 + LINK_SIZE;
5584
0
    break;
5585
5586
0
#undef Lframe_type
5587
5588
5589
    /* ===================================================================== */
5590
    /* The callout item calls an external function, if one is provided, passing
5591
    details of the match so far. This is mainly for debugging, though the
5592
    function is able to force a failure. */
5593
5594
0
    case OP_CALLOUT:
5595
0
    case OP_CALLOUT_STR:
5596
0
    rrc = do_callout(F, mb, &length);
5597
0
    if (rrc > 0) RRETURN(MATCH_NOMATCH);
5598
0
    if (rrc < 0) RRETURN(rrc);
5599
0
    Fecode += length;
5600
0
    break;
5601
5602
5603
    /* ===================================================================== */
5604
    /* Conditional group: compilation checked that there are no more than two
5605
    branches. If the condition is false, skipping the first branch takes us
5606
    past the end of the item if there is only one branch, but that's exactly
5607
    what we want. */
5608
5609
0
    case OP_COND:
5610
0
    case OP_SCOND:
5611
5612
    /* The variable Flength will be added to Fecode when the condition is
5613
    false, to get to the second branch. Setting it to the offset to the ALT or
5614
    KET, then incrementing Fecode achieves this effect. However, if the second
5615
    branch is non-existent, we must point to the KET so that the end of the
5616
    group is correctly processed. We now have Fecode pointing to the condition
5617
    or callout. */
5618
5619
0
    Flength = GET(Fecode, 1);    /* Offset to the second branch */
5620
0
    if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
5621
0
    Fecode += 1 + LINK_SIZE;     /* From this opcode */
5622
5623
    /* Because of the way auto-callout works during compile, a callout item is
5624
    inserted between OP_COND and an assertion condition. Such a callout can
5625
    also be inserted manually. */
5626
5627
0
    if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
5628
0
      {
5629
0
      rrc = do_callout(F, mb, &length);
5630
0
      if (rrc > 0) RRETURN(MATCH_NOMATCH);
5631
0
      if (rrc < 0) RRETURN(rrc);
5632
5633
      /* Advance Fecode past the callout, so it now points to the condition. We
5634
      must adjust Flength so that the value of Fecode+Flength is unchanged. */
5635
5636
0
      Fecode += length;
5637
0
      Flength -= length;
5638
0
      }
5639
5640
    /* Test the various possible conditions */
5641
5642
0
    condition = FALSE;
5643
0
    switch(*Fecode)
5644
0
      {
5645
0
      case OP_RREF:                  /* Group recursion test */
5646
0
      if (Fcurrent_recurse != RECURSE_UNSET)
5647
0
        {
5648
0
        number = GET2(Fecode, 1);
5649
0
        condition = (number == RREF_ANY || number == Fcurrent_recurse);
5650
0
        }
5651
0
      break;
5652
5653
0
      case OP_DNRREF:       /* Duplicate named group recursion test */
5654
0
      if (Fcurrent_recurse != RECURSE_UNSET)
5655
0
        {
5656
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
5657
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5658
0
        while (count-- > 0)
5659
0
          {
5660
0
          number = GET2(slot, 0);
5661
0
          condition = number == Fcurrent_recurse;
5662
0
          if (condition) break;
5663
0
          slot += mb->name_entry_size;
5664
0
          }
5665
0
        }
5666
0
      break;
5667
5668
0
      case OP_CREF:                         /* Numbered group used test */
5669
0
      offset = (GET2(Fecode, 1) << 1) - 2;  /* Doubled ref number */
5670
0
      condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5671
0
      break;
5672
5673
0
      case OP_DNCREF:      /* Duplicate named group used test */
5674
0
        {
5675
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
5676
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5677
0
        while (count-- > 0)
5678
0
          {
5679
0
          offset = (GET2(slot, 0) << 1) - 2;
5680
0
          condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5681
0
          if (condition) break;
5682
0
          slot += mb->name_entry_size;
5683
0
          }
5684
0
        }
5685
0
      break;
5686
5687
0
      case OP_FALSE:
5688
0
      case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
5689
0
      break;
5690
5691
0
      case OP_TRUE:
5692
0
      condition = TRUE;
5693
0
      break;
5694
5695
      /* The condition is an assertion. Run code similar to the assertion code
5696
      above. */
5697
5698
0
#define Lpositive      F->temp_32[0]
5699
0
#define Lstart_branch  F->temp_sptr[0]
5700
5701
0
      default:
5702
0
      Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
5703
0
      Lstart_branch = Fecode;
5704
5705
0
      for (;;)
5706
0
        {
5707
0
        group_frame_type = GF_CONDASSERT | *Fecode;
5708
0
        RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
5709
5710
0
        switch(rrc)
5711
0
          {
5712
0
          case MATCH_ACCEPT:  /* Save captures */
5713
0
          memcpy(Fovector,
5714
0
                (char *)assert_accept_frame + offsetof(heapframe, ovector),
5715
0
                assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5716
0
          Foffset_top = assert_accept_frame->offset_top;
5717
5718
          /* Fall through */
5719
          /* In the case of a match, the captures have already been put into
5720
          the current frame. */
5721
5722
0
          case MATCH_MATCH:
5723
0
          condition = Lpositive;   /* TRUE for positive assertion */
5724
0
          break;
5725
5726
          /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
5727
          assertion; it is therefore always treated as NOMATCH. */
5728
5729
0
          case MATCH_NOMATCH:
5730
0
          case MATCH_THEN:
5731
0
          Lstart_branch += GET(Lstart_branch, 1);
5732
0
          if (*Lstart_branch == OP_ALT) continue;  /* Try next branch */
5733
0
          condition = !Lpositive;  /* TRUE for negative assertion */
5734
0
          break;
5735
5736
          /* These force no match without checking other branches. */
5737
5738
0
          case MATCH_COMMIT:
5739
0
          case MATCH_SKIP:
5740
0
          case MATCH_PRUNE:
5741
0
          condition = !Lpositive;
5742
0
          break;
5743
5744
0
          default:
5745
0
          RRETURN(rrc);
5746
0
          }
5747
0
        break;  /* Out of the branch loop */
5748
0
        }
5749
5750
      /* If the condition is true, find the end of the assertion so that
5751
      advancing past it gets us to the start of the first branch. */
5752
5753
0
      if (condition)
5754
0
        {
5755
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5756
0
        }
5757
0
      break;  /* End of assertion condition */
5758
0
      }
5759
5760
0
#undef Lpositive
5761
0
#undef Lstart_branch
5762
5763
    /* Choose branch according to the condition. */
5764
5765
0
    Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
5766
5767
    /* If the opcode is OP_SCOND it means we are at a repeated conditional
5768
    group that might match an empty string. We must therefore descend a level
5769
    so that the start is remembered for checking. For OP_COND we can just
5770
    continue at this level. */
5771
5772
0
    if (Fop == OP_SCOND)
5773
0
      {
5774
0
      group_frame_type  = GF_NOCAPTURE | Fop;
5775
0
      RMATCH(Fecode, RM35);
5776
0
      RRETURN(rrc);
5777
0
      }
5778
0
    break;
5779
5780
5781
5782
/* ========================================================================= */
5783
/*                  End of start of parenthesis opcodes                      */
5784
/* ========================================================================= */
5785
5786
5787
    /* ===================================================================== */
5788
    /* Move the subject pointer back by one fixed amount. This occurs at the
5789
    start of each branch that has a fixed length in a lookbehind assertion. If
5790
    we are too close to the start to move back, fail. When working with UTF-8
5791
    we move back a number of characters, not bytes. */
5792
5793
218
    case OP_REVERSE:
5794
218
    number = GET2(Fecode, 1);
5795
218
#ifdef SUPPORT_UNICODE
5796
218
    if (utf)
5797
0
      {
5798
0
      while (number-- > 0)
5799
0
        {
5800
0
        if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
5801
0
        Feptr--;
5802
0
        BACKCHAR(Feptr);
5803
0
        }
5804
0
      }
5805
218
    else
5806
218
#endif
5807
5808
    /* No UTF support, or not in UTF mode: count is code unit count */
5809
5810
218
      {
5811
218
      if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
5812
143
      Feptr -= number;
5813
143
      }
5814
5815
    /* Save the earliest consulted character, then skip to next opcode */
5816
5817
143
    if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
5818
143
    Fecode += 1 + IMM2_SIZE;
5819
143
    break;
5820
5821
5822
    /* ===================================================================== */
5823
    /* Move the subject pointer back by a variable amount. This occurs at the
5824
    start of each branch of a lookbehind assertion when the branch has a
5825
    variable, but limited, length. A loop is needed to try matching the branch
5826
    after moving back different numbers of characters. If we are too close to
5827
    the start to move back even the minimum amount, fail. When working with
5828
    UTF-8 we move back a number of characters, not bytes. */
5829
5830
2.49k
#define Lmin F->temp_32[0]
5831
2.74k
#define Lmax F->temp_32[1]
5832
144
#define Leptr F->temp_sptr[0]
5833
5834
144
    case OP_VREVERSE:
5835
144
    Lmin = GET2(Fecode, 1);
5836
144
    Lmax = GET2(Fecode, 1 + IMM2_SIZE);
5837
144
    Leptr = Feptr;
5838
5839
    /* Move back by the maximum branch length and then work forwards. This
5840
    ensures that items such as \d{3,5} get the maximum length, which is
5841
    relevant for captures, and makes for Perl compatibility. */
5842
5843
144
#ifdef SUPPORT_UNICODE
5844
144
    if (utf)
5845
0
      {
5846
0
      for (i = 0; i < Lmax; i++)
5847
0
        {
5848
0
        if (Feptr == mb->start_subject)
5849
0
          {
5850
0
          if (i < Lmin) RRETURN(MATCH_NOMATCH);
5851
0
          Lmax = i;
5852
0
          break;
5853
0
          }
5854
0
        Feptr--;
5855
0
        BACKCHAR(Feptr);
5856
0
        }
5857
0
      }
5858
144
    else
5859
144
#endif
5860
5861
    /* No UTF support or not in UTF mode */
5862
5863
144
      {
5864
144
      ptrdiff_t diff = Feptr - mb->start_subject;
5865
144
      uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? (int)diff : 0);
5866
144
      if (Lmin > available) RRETURN(MATCH_NOMATCH);
5867
132
      if (Lmax > available) Lmax = available;
5868
132
      Feptr -= Lmax;
5869
132
      }
5870
5871
    /* Now try matching, moving forward one character on failure, until we
5872
    reach the mimimum back length. */
5873
5874
132
    for (;;)
5875
2.20k
      {
5876
2.20k
      RMATCH(Fecode + 1 + 2 * IMM2_SIZE, RM37);
5877
2.20k
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5878
2.20k
      if (Lmax-- <= Lmin) RRETURN(MATCH_NOMATCH);
5879
2.07k
      Feptr++;
5880
2.07k
#ifdef SUPPORT_UNICODE
5881
2.07k
      if (utf) { FORWARDCHARTEST(Feptr, mb->end_subject); }
5882
2.07k
#endif
5883
2.07k
      }
5884
    /* Control never reaches here */
5885
5886
0
#undef Lmin
5887
0
#undef Lmax
5888
0
#undef Leptr
5889
5890
    /* ===================================================================== */
5891
    /* An alternation is the end of a branch; scan along to find the end of the
5892
    bracketed group. */
5893
5894
30.6k
    case OP_ALT:
5895
30.6k
    branch_end = Fecode;
5896
9.10M
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5897
30.6k
    break;
5898
5899
5900
    /* ===================================================================== */
5901
    /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
5902
    starting frame was added to the chained frames in order to remember the
5903
    starting subject position for the group. (Not true for OP_BRA when it's a
5904
    whole pattern recursion, but that is handled separately below.)*/
5905
5906
37.4k
    case OP_KET:
5907
37.4k
    case OP_KETRMIN:
5908
63.9k
    case OP_KETRMAX:
5909
88.8k
    case OP_KETRPOS:
5910
5911
88.8k
    bracode = Fecode - GET(Fecode, 1);
5912
5913
88.8k
    if (branch_end == NULL) branch_end = Fecode;
5914
88.8k
    branch_start = bracode;
5915
9.10M
    while (branch_start + GET(branch_start, 1) != branch_end)
5916
9.02M
      branch_start += GET(branch_start, 1);
5917
88.8k
    branch_end = NULL;
5918
5919
    /* Point N to the frame at the start of the most recent group, and P to its
5920
    predecessor. Remember the subject pointer at the start of the group. */
5921
5922
88.8k
    if (*bracode != OP_BRA && *bracode != OP_COND)
5923
82.6k
      {
5924
82.6k
      N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset);
5925
82.6k
      P = (heapframe *)((char *)N - frame_size);
5926
82.6k
      Flast_group_offset = P->last_group_offset;
5927
5928
#ifdef DEBUG_SHOW_RMATCH
5929
      fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
5930
        N->rdepth, N->group_frame_type,
5931
        (char *)P->eptr - (char *)mb->start_subject);
5932
#endif
5933
5934
      /* If we are at the end of an assertion that is a condition, return a
5935
      match, discarding any intermediate backtracking points. Copy back the
5936
      mark setting and the captures into the frame before N so that they are
5937
      set on return. Doing this for all assertions, both positive and negative,
5938
      seems to match what Perl does. */
5939
5940
82.6k
      if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
5941
0
        {
5942
0
        memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
5943
0
          Foffset_top * sizeof(PCRE2_SIZE));
5944
0
        P->offset_top = Foffset_top;
5945
0
        P->mark = Fmark;
5946
0
        Fback_frame = (char *)F - (char *)P;
5947
0
        RRETURN(MATCH_MATCH);
5948
0
        }
5949
82.6k
      }
5950
6.21k
    else P = NULL;   /* Indicates starting frame not recorded */
5951
5952
    /* The group was not a conditional assertion. */
5953
5954
88.8k
    switch (*bracode)
5955
88.8k
      {
5956
      /* Whole pattern recursion is handled as a recursion into group 0, but
5957
      the entire pattern is wrapped in OP_BRA/OP_KET rather than a capturing
5958
      group - a design mistake: it should perhaps have been capture group 0.
5959
      Anyway, that means the end of such recursion must be handled here. It is
5960
      detected by checking for an immediately following OP_END when we are
5961
      recursing in group 0. If this is not the end of a whole-pattern
5962
      recursion, there is nothing to be done. */
5963
5964
6.21k
      case OP_BRA:
5965
6.21k
      if (Fcurrent_recurse != 0 || Fecode[1+LINK_SIZE] != OP_END) break;
5966
5967
      /* It is the end of whole-pattern recursion. */
5968
5969
0
      offset = Flast_group_offset;
5970
0
      if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
5971
0
      N = (heapframe *)((char *)match_data->heapframes + offset);
5972
0
      P = (heapframe *)((char *)N - frame_size);
5973
0
      Flast_group_offset = P->last_group_offset;
5974
5975
      /* Reinstate the previous set of captures and then carry on after the
5976
      recursion call. */
5977
5978
0
      memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
5979
0
        Foffset_top * sizeof(PCRE2_SIZE));
5980
0
      Foffset_top = P->offset_top;
5981
0
      Fcapture_last = P->capture_last;
5982
0
      Fcurrent_recurse = P->current_recurse;
5983
0
      Fecode = P->ecode + 1 + LINK_SIZE;
5984
0
      continue;  /* With next opcode */
5985
5986
0
      case OP_COND:     /* No need to do anything for these */
5987
0
      case OP_SCOND:
5988
0
      break;
5989
5990
      /* Non-atomic positive assertions are like OP_BRA, except that the
5991
      subject pointer must be put back to where it was at the start of the
5992
      assertion. For a variable lookbehind, check its end point. */
5993
5994
1
      case OP_ASSERTBACK_NA:
5995
1
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
5996
1
        RRETURN(MATCH_NOMATCH);
5997
      /* Fall through */
5998
5999
1.03k
      case OP_ASSERT_NA:
6000
1.03k
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6001
1.03k
      Feptr = P->eptr;
6002
1.03k
      break;
6003
6004
      /* Atomic positive assertions are like OP_ONCE, except that in addition
6005
      the subject pointer must be put back to where it was at the start of the
6006
      assertion. For a variable lookbehind, check its end point. */
6007
6008
0
      case OP_ASSERTBACK:
6009
0
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6010
0
        RRETURN(MATCH_NOMATCH);
6011
      /* Fall through */
6012
6013
30
      case OP_ASSERT:
6014
30
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6015
30
      Feptr = P->eptr;
6016
      /* Fall through */
6017
6018
      /* For an atomic group, discard internal backtracking points. We must
6019
      also ensure that any remaining branches within the top-level of the group
6020
      are not tried. Do this by adjusting the code pointer within the backtrack
6021
      frame so that it points to the final branch. */
6022
6023
66
      case OP_ONCE:
6024
66
      Fback_frame = ((char *)F - (char *)P);
6025
66
      for (;;)
6026
918
        {
6027
918
        uint32_t y = GET(P->ecode,1);
6028
918
        if ((P->ecode)[y] != OP_ALT) break;
6029
852
        P->ecode += y;
6030
852
        }
6031
66
      break;
6032
6033
      /* A matching negative assertion returns MATCH, which is turned into
6034
      NOMATCH at the assertion level. For a variable lookbehind, check its end
6035
      point. */
6036
6037
0
      case OP_ASSERTBACK_NOT:
6038
0
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6039
0
        RRETURN(MATCH_NOMATCH);
6040
      /* Fall through */
6041
6042
30
      case OP_ASSERT_NOT:
6043
30
      RRETURN(MATCH_MATCH);
6044
6045
      /* At the end of a script run, apply the script-checking rules. This code
6046
      will never by exercised if Unicode support it not compiled, because in
6047
      that environment script runs cause an error at compile time. */
6048
6049
0
      case OP_SCRIPT_RUN:
6050
0
      if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
6051
0
      break;
6052
6053
      /* Whole-pattern recursion is coded as a recurse into group 0, and is
6054
      handled with OP_BRA above. Other recursion is handled here. */
6055
6056
30.1k
      case OP_CBRA:
6057
30.2k
      case OP_CBRAPOS:
6058
56.4k
      case OP_SCBRA:
6059
81.3k
      case OP_SCBRAPOS:
6060
81.3k
      number = GET2(bracode, 1+LINK_SIZE);
6061
6062
      /* Handle a recursively called group. We reinstate the previous set of
6063
      captures and then carry on after the recursion call. */
6064
6065
81.3k
      if (Fcurrent_recurse == number)
6066
351
        {
6067
351
        P = (heapframe *)((char *)N - frame_size);
6068
351
        memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
6069
351
          Foffset_top * sizeof(PCRE2_SIZE));
6070
351
        Foffset_top = P->offset_top;
6071
351
        Fcapture_last = P->capture_last;
6072
351
        Fcurrent_recurse = P->current_recurse;
6073
351
        Fecode = P->ecode + 1 + LINK_SIZE;
6074
351
        continue;  /* With next opcode */
6075
351
        }
6076
6077
      /* Deal with actual capturing. */
6078
6079
81.0k
      offset = (number << 1) - 2;
6080
81.0k
      Fcapture_last = number;
6081
81.0k
      Fovector[offset] = P->eptr - mb->start_subject;
6082
81.0k
      Fovector[offset+1] = Feptr - mb->start_subject;
6083
81.0k
      if (offset >= Foffset_top) Foffset_top = offset + 2;
6084
81.0k
      break;
6085
88.8k
      }  /* End actions relating to the starting opcode */
6086
6087
    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
6088
    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
6089
    at a time from the outer level. This must precede the empty string test -
6090
    in this case that test is done at the outer level. */
6091
6092
88.5k
    if (*Fecode == OP_KETRPOS)
6093
24.9k
      {
6094
24.9k
      memcpy((char *)P + offsetof(heapframe, eptr),
6095
24.9k
             (char *)F + offsetof(heapframe, eptr),
6096
24.9k
             frame_copy_size);
6097
24.9k
      RRETURN(MATCH_KETRPOS);
6098
0
      }
6099
6100
    /* Handle the different kinds of closing brackets. A non-repeating ket
6101
    needs no special action, just continuing at this level. This also happens
6102
    for the repeating kets if the group matched no characters, in order to
6103
    forcibly break infinite loops. Otherwise, the repeating kets try the rest
6104
    of the pattern or restart from the preceding bracket, in the appropriate
6105
    order. */
6106
6107
63.5k
    if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
6108
32
      {
6109
32
      if (Fop == OP_KETRMIN)
6110
0
        {
6111
0
        RMATCH(Fecode + 1 + LINK_SIZE, RM6);
6112
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6113
0
        Fecode -= GET(Fecode, 1);
6114
0
        break;   /* End of ket processing */
6115
0
        }
6116
6117
      /* Repeat the maximum number of times (KETRMAX) */
6118
6119
32
      RMATCH(bracode, RM7);
6120
32
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6121
32
      }
6122
6123
    /* Carry on at this level for a non-repeating ket, or after matching an
6124
    empty string, or after repeating for a maximum number of times. */
6125
6126
63.5k
    Fecode += 1 + LINK_SIZE;
6127
63.5k
    break;
6128
6129
6130
    /* ===================================================================== */
6131
    /* Start and end of line assertions, not multiline mode. */
6132
6133
719k
    case OP_CIRC:   /* Start of line, unless PCRE2_NOTBOL is set. */
6134
719k
    if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
6135
719k
      RRETURN(MATCH_NOMATCH);
6136
315
    Fecode++;
6137
315
    break;
6138
6139
80.8k
    case OP_SOD:    /* Unconditional start of subject */
6140
80.8k
    if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
6141
21
    Fecode++;
6142
21
    break;
6143
6144
    /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
6145
    terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
6146
6147
186k
    case OP_DOLL:
6148
186k
    if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6149
186k
    if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
6150
6151
    /* Fall through */
6152
    /* Unconditional end of subject assertion (\z). */
6153
6154
634
    case OP_EOD:
6155
634
    if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH);
6156
2
    if (mb->partial != 0)
6157
0
      {
6158
0
      mb->hitend = TRUE;
6159
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6160
0
      }
6161
2
    Fecode++;
6162
2
    break;
6163
6164
    /* End of subject or ending \n assertion (\Z) */
6165
6166
6.20k
    case OP_EODN:
6167
192k
    ASSERT_NL_OR_EOS:
6168
192k
    if (Feptr < mb->end_subject &&
6169
192k
        (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen))
6170
192k
      {
6171
192k
      if (mb->partial != 0 &&
6172
0
          Feptr + 1 >= mb->end_subject &&
6173
0
          NLBLOCK->nltype == NLTYPE_FIXED &&
6174
0
          NLBLOCK->nllen == 2 &&
6175
0
          UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6176
0
        {
6177
0
        mb->hitend = TRUE;
6178
0
        if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6179
0
        }
6180
192k
      RRETURN(MATCH_NOMATCH);
6181
0
      }
6182
6183
    /* Either at end of string or \n before end. */
6184
6185
212
    if (mb->partial != 0)
6186
0
      {
6187
0
      mb->hitend = TRUE;
6188
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6189
0
      }
6190
212
    Fecode++;
6191
212
    break;
6192
6193
6194
    /* ===================================================================== */
6195
    /* Start and end of line assertions, multiline mode. */
6196
6197
    /* Start of subject unless notbol, or after any newline except for one at
6198
    the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
6199
6200
629k
    case OP_CIRCM:
6201
629k
    if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
6202
629k
      RRETURN(MATCH_NOMATCH);
6203
629k
    if (Feptr != mb->start_subject &&
6204
629k
        ((Feptr == mb->end_subject &&
6205
3.02k
           (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
6206
626k
         !WAS_NEWLINE(Feptr)))
6207
629k
      RRETURN(MATCH_NOMATCH);
6208
305
    Fecode++;
6209
305
    break;
6210
6211
    /* Assert before any newline, or before end of subject unless noteol is
6212
    set. */
6213
6214
33.2k
    case OP_DOLLM:
6215
33.2k
    if (Feptr < mb->end_subject)
6216
33.0k
      {
6217
33.0k
      if (!IS_NEWLINE(Feptr))
6218
32.5k
        {
6219
32.5k
        if (mb->partial != 0 &&
6220
0
            Feptr + 1 >= mb->end_subject &&
6221
0
            NLBLOCK->nltype == NLTYPE_FIXED &&
6222
0
            NLBLOCK->nllen == 2 &&
6223
0
            UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6224
0
          {
6225
0
          mb->hitend = TRUE;
6226
0
          if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6227
0
          }
6228
32.5k
        RRETURN(MATCH_NOMATCH);
6229
0
        }
6230
33.0k
      }
6231
187
    else
6232
187
      {
6233
187
      if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6234
187
      SCHECK_PARTIAL();
6235
187
      }
6236
640
    Fecode++;
6237
640
    break;
6238
6239
6240
    /* ===================================================================== */
6241
    /* Start of match assertion */
6242
6243
2.40k
    case OP_SOM:
6244
2.40k
    if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
6245
40
    Fecode++;
6246
40
    break;
6247
6248
6249
    /* ===================================================================== */
6250
    /* Reset the start of match point */
6251
6252
377
    case OP_SET_SOM:
6253
377
    Fstart_match = Feptr;
6254
377
    Fecode++;
6255
377
    break;
6256
6257
6258
    /* ===================================================================== */
6259
    /* Word boundary assertions. Find out if the previous and current
6260
    characters are "word" characters. It takes a bit more work in UTF mode.
6261
    Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
6262
    not set. When it is set, use Unicode properties if available, even when not
6263
    in UTF mode. Remember the earliest and latest consulted characters. */
6264
6265
147k
    case OP_NOT_WORD_BOUNDARY:
6266
150k
    case OP_WORD_BOUNDARY:
6267
10.9M
    case OP_NOT_UCP_WORD_BOUNDARY:
6268
10.9M
    case OP_UCP_WORD_BOUNDARY:
6269
10.9M
    if (Feptr == mb->check_subject) prev_is_word = FALSE; else
6270
10.9M
      {
6271
10.9M
      PCRE2_SPTR lastptr = Feptr - 1;
6272
10.9M
#ifdef SUPPORT_UNICODE
6273
10.9M
      if (utf)
6274
10.7M
        {
6275
10.7M
        BACKCHAR(lastptr);
6276
10.7M
        GETCHAR(fc, lastptr);
6277
10.7M
        }
6278
150k
      else
6279
150k
#endif  /* SUPPORT_UNICODE */
6280
150k
      fc = *lastptr;
6281
10.9M
      if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
6282
10.9M
#ifdef SUPPORT_UNICODE
6283
10.9M
      if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
6284
10.7M
        {
6285
10.7M
        int chartype = UCD_CHARTYPE(fc);
6286
10.7M
        int category = PRIV(ucp_gentype)[chartype];
6287
10.7M
        prev_is_word = (category == ucp_L || category == ucp_N ||
6288
8.14M
          chartype == ucp_Mn || chartype == ucp_Pc);
6289
10.7M
        }
6290
150k
      else
6291
150k
#endif  /* SUPPORT_UNICODE */
6292
150k
      prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6293
10.9M
      }
6294
6295
    /* Get status of next character */
6296
6297
10.9M
    if (Feptr >= mb->end_subject)
6298
166k
      {
6299
166k
      SCHECK_PARTIAL();
6300
166k
      cur_is_word = FALSE;
6301
166k
      }
6302
10.7M
    else
6303
10.7M
      {
6304
10.7M
      PCRE2_SPTR nextptr = Feptr + 1;
6305
10.7M
#ifdef SUPPORT_UNICODE
6306
10.7M
      if (utf)
6307
10.6M
        {
6308
10.6M
        FORWARDCHARTEST(nextptr, mb->end_subject);
6309
10.6M
        GETCHAR(fc, Feptr);
6310
10.6M
        }
6311
150k
      else
6312
150k
#endif  /* SUPPORT_UNICODE */
6313
150k
      fc = *Feptr;
6314
10.7M
      if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
6315
10.7M
#ifdef SUPPORT_UNICODE
6316
10.7M
      if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
6317
10.6M
        {
6318
10.6M
        int chartype = UCD_CHARTYPE(fc);
6319
10.6M
        int category = PRIV(ucp_gentype)[chartype];
6320
10.6M
        cur_is_word = (category == ucp_L || category == ucp_N ||
6321
8.03M
          chartype == ucp_Mn || chartype == ucp_Pc);
6322
10.6M
        }
6323
150k
      else
6324
150k
#endif  /* SUPPORT_UNICODE */
6325
150k
      cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6326
10.7M
      }
6327
6328
    /* Now see if the situation is what we want */
6329
6330
10.9M
    if ((*Fecode++ == OP_WORD_BOUNDARY || Fop == OP_UCP_WORD_BOUNDARY)?
6331
10.9M
         cur_is_word == prev_is_word : cur_is_word != prev_is_word)
6332
9.82M
      RRETURN(MATCH_NOMATCH);
6333
9.82M
    break;
6334
6335
6336
    /* ===================================================================== */
6337
    /* Backtracking (*VERB)s, with and without arguments. Note that if the
6338
    pattern is successfully matched, we do not come back from RMATCH. */
6339
6340
25
    case OP_MARK:
6341
25
    Fmark = mb->nomatch_mark = Fecode + 2;
6342
25
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
6343
6344
    /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
6345
    argument, and we must check whether that argument matches this MARK's
6346
    argument. It is passed back in mb->verb_skip_ptr. If it does match, we
6347
    return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
6348
    position that corresponds to this mark. Otherwise, pass back the return
6349
    code unaltered. */
6350
6351
25
    if (rrc == MATCH_SKIP_ARG &&
6352
0
             PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
6353
0
      {
6354
0
      mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6355
0
      RRETURN(MATCH_SKIP);
6356
0
      }
6357
25
    RRETURN(rrc);
6358
6359
0
    case OP_FAIL:
6360
0
    RRETURN(MATCH_NOMATCH);
6361
6362
    /* Record the current recursing group number in mb->verb_current_recurse
6363
    when a backtracking return such as MATCH_COMMIT is given. This enables the
6364
    recurse processing to catch verbs from within the recursion. */
6365
6366
0
    case OP_COMMIT:
6367
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
6368
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6369
0
    mb->verb_current_recurse = Fcurrent_recurse;
6370
0
    RRETURN(MATCH_COMMIT);
6371
6372
0
    case OP_COMMIT_ARG:
6373
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6374
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
6375
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6376
0
    mb->verb_current_recurse = Fcurrent_recurse;
6377
0
    RRETURN(MATCH_COMMIT);
6378
6379
0
    case OP_PRUNE:
6380
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
6381
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6382
0
    mb->verb_current_recurse = Fcurrent_recurse;
6383
0
    RRETURN(MATCH_PRUNE);
6384
6385
0
    case OP_PRUNE_ARG:
6386
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6387
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
6388
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6389
0
    mb->verb_current_recurse = Fcurrent_recurse;
6390
0
    RRETURN(MATCH_PRUNE);
6391
6392
0
    case OP_SKIP:
6393
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
6394
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6395
0
    mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6396
0
    mb->verb_current_recurse = Fcurrent_recurse;
6397
0
    RRETURN(MATCH_SKIP);
6398
6399
    /* Note that, for Perl compatibility, SKIP with an argument does NOT set
6400
    nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
6401
    not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
6402
    that failed and any that precede it (either they also failed, or were not
6403
    triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
6404
    SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
6405
    set to the count of the one that failed. */
6406
6407
0
    case OP_SKIP_ARG:
6408
0
    mb->skip_arg_count++;
6409
0
    if (mb->skip_arg_count <= mb->ignore_skip_arg)
6410
0
      {
6411
0
      Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
6412
0
      break;
6413
0
      }
6414
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
6415
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6416
6417
    /* Pass back the current skip name and return the special MATCH_SKIP_ARG
6418
    return code. This will either be caught by a matching MARK, or get to the
6419
    top, where it causes a rematch with mb->ignore_skip_arg set to the value of
6420
    mb->skip_arg_count. */
6421
6422
0
    mb->verb_skip_ptr = Fecode + 2;
6423
0
    mb->verb_current_recurse = Fcurrent_recurse;
6424
0
    RRETURN(MATCH_SKIP_ARG);
6425
6426
    /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
6427
    the branch in which it occurs can be determined. */
6428
6429
0
    case OP_THEN:
6430
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
6431
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6432
0
    mb->verb_ecode_ptr = Fecode;
6433
0
    mb->verb_current_recurse = Fcurrent_recurse;
6434
0
    RRETURN(MATCH_THEN);
6435
6436
0
    case OP_THEN_ARG:
6437
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6438
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
6439
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6440
0
    mb->verb_ecode_ptr = Fecode;
6441
0
    mb->verb_current_recurse = Fcurrent_recurse;
6442
0
    RRETURN(MATCH_THEN);
6443
6444
6445
    /* ===================================================================== */
6446
    /* There's been some horrible disaster. Arrival here can only mean there is
6447
    something seriously wrong in the code above or the OP_xxx definitions. */
6448
6449
0
    default:
6450
0
    return PCRE2_ERROR_INTERNAL;
6451
929M
    }
6452
6453
  /* Do not insert any code in here without much thought; it is assumed
6454
  that "continue" in the code above comes out to here to repeat the main
6455
  loop. */
6456
6457
929M
  }  /* End of main loop */
6458
/* Control never reaches here */
6459
6460
6461
/* ========================================================================= */
6462
/* The RRETURN() macro jumps here. The number that is saved in Freturn_id
6463
indicates which label we actually want to return to. The value in Frdepth is
6464
the index number of the frame in the vector. The return value has been placed
6465
in rrc. */
6466
6467
504M
#define LBL(val) case val: goto L_RM##val;
6468
6469
504M
RETURN_SWITCH:
6470
504M
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6471
504M
if (Frdepth == 0) return rrc;                     /* Exit from the top level */
6472
504M
F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
6473
504M
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
6474
6475
#ifdef DEBUG_SHOW_RMATCH
6476
fprintf(stderr, "++ RETURN %d to RM%d\n", rrc, Freturn_id);
6477
#endif
6478
6479
504M
switch (Freturn_id)
6480
504M
  {
6481
1.22M
  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6482
640
  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
6483
288k
  LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
6484
5.05M
  LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
6485
280M
  LBL(33) LBL(34) LBL(35) LBL(36) LBL(37)
6486
6487
0
#ifdef SUPPORT_WIDE_CHARS
6488
291k
  LBL(100) LBL(101)
6489
0
#endif
6490
6491
0
#ifdef SUPPORT_UNICODE
6492
120k
  LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
6493
59.4k
  LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
6494
14.6M
  LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
6495
134M
  LBL(221) LBL(222) LBL(223) LBL(224) LBL(225)
6496
0
#endif
6497
6498
0
  default:
6499
0
  return PCRE2_ERROR_INTERNAL;
6500
504M
  }
6501
504M
#undef LBL
6502
504M
}
6503
6504
6505
/*************************************************
6506
*           Match a Regular Expression           *
6507
*************************************************/
6508
6509
/* This function applies a compiled pattern to a subject string and picks out
6510
portions of the string if it matches. Two elements in the vector are set for
6511
each substring: the offsets to the start and end of the substring.
6512
6513
Arguments:
6514
  code            points to the compiled expression
6515
  subject         points to the subject string
6516
  length          length of subject string (may contain binary zeros)
6517
  start_offset    where to start in the subject string
6518
  options         option bits
6519
  match_data      points to a match_data block
6520
  mcontext        points a PCRE2 context
6521
6522
Returns:          > 0 => success; value is the number of ovector pairs filled
6523
                  = 0 => success, but ovector is not big enough
6524
                  = -1 => failed to match (PCRE2_ERROR_NOMATCH)
6525
                  = -2 => partial match (PCRE2_ERROR_PARTIAL)
6526
                  < -2 => some kind of unexpected problem
6527
*/
6528
6529
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
6530
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6531
  PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6532
  pcre2_match_context *mcontext)
6533
5.97k
{
6534
5.97k
int rc;
6535
5.97k
int was_zero_terminated = 0;
6536
5.97k
const uint8_t *start_bits = NULL;
6537
5.97k
const pcre2_real_code *re = (const pcre2_real_code *)code;
6538
6539
5.97k
BOOL anchored;
6540
5.97k
BOOL firstline;
6541
5.97k
BOOL has_first_cu = FALSE;
6542
5.97k
BOOL has_req_cu = FALSE;
6543
5.97k
BOOL startline;
6544
6545
5.97k
#if PCRE2_CODE_UNIT_WIDTH == 8
6546
5.97k
PCRE2_SPTR memchr_found_first_cu;
6547
5.97k
PCRE2_SPTR memchr_found_first_cu2;
6548
5.97k
#endif
6549
6550
5.97k
PCRE2_UCHAR first_cu = 0;
6551
5.97k
PCRE2_UCHAR first_cu2 = 0;
6552
5.97k
PCRE2_UCHAR req_cu = 0;
6553
5.97k
PCRE2_UCHAR req_cu2 = 0;
6554
6555
5.97k
PCRE2_SPTR bumpalong_limit;
6556
5.97k
PCRE2_SPTR end_subject;
6557
5.97k
PCRE2_SPTR true_end_subject;
6558
5.97k
PCRE2_SPTR start_match;
6559
5.97k
PCRE2_SPTR req_cu_ptr;
6560
5.97k
PCRE2_SPTR start_partial;
6561
5.97k
PCRE2_SPTR match_partial;
6562
6563
#ifdef SUPPORT_JIT
6564
BOOL use_jit;
6565
#endif
6566
6567
/* This flag is needed even when Unicode is not supported for convenience
6568
(it is used by the IS_NEWLINE macro). */
6569
6570
5.97k
BOOL utf = FALSE;
6571
6572
5.97k
#ifdef SUPPORT_UNICODE
6573
5.97k
BOOL ucp = FALSE;
6574
5.97k
BOOL allow_invalid;
6575
5.97k
uint32_t fragment_options = 0;
6576
#ifdef SUPPORT_JIT
6577
BOOL jit_checked_utf = FALSE;
6578
#endif
6579
5.97k
#endif  /* SUPPORT_UNICODE */
6580
6581
5.97k
PCRE2_SIZE frame_size;
6582
5.97k
PCRE2_SIZE heapframes_size;
6583
6584
/* We need to have mb as a pointer to a match block, because the IS_NEWLINE
6585
macro is used below, and it expects NLBLOCK to be defined as a pointer. */
6586
6587
5.97k
pcre2_callout_block cb;
6588
5.97k
match_block actual_match_block;
6589
5.97k
match_block *mb = &actual_match_block;
6590
6591
/* Recognize NULL, length 0 as an empty string. */
6592
6593
5.97k
if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
6594
6595
/* Plausibility checks */
6596
6597
5.97k
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
6598
5.97k
if (code == NULL || subject == NULL || match_data == NULL)
6599
0
  return PCRE2_ERROR_NULL;
6600
6601
5.97k
start_match = subject + start_offset;
6602
5.97k
req_cu_ptr = start_match - 1;
6603
5.97k
if (length == PCRE2_ZERO_TERMINATED)
6604
0
  {
6605
0
  length = PRIV(strlen)(subject);
6606
0
  was_zero_terminated = 1;
6607
0
  }
6608
5.97k
true_end_subject = end_subject = subject + length;
6609
6610
5.97k
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
6611
6612
/* Check that the first field in the block is the magic number. */
6613
6614
5.97k
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
6615
6616
/* Check the code unit width. */
6617
6618
5.97k
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
6619
0
  return PCRE2_ERROR_BADMODE;
6620
6621
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
6622
options variable for this function. Users of PCRE2 who are not calling the
6623
function directly would like to have a way of setting these flags, in the same
6624
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
6625
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
6626
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
6627
transfer to the options for this function. The bits are guaranteed to be
6628
adjacent, but do not have the same values. This bit of Boolean trickery assumes
6629
that the match-time bits are not more significant than the flag bits. If by
6630
accident this is not the case, a compile-time division by zero error will
6631
occur. */
6632
6633
17.9k
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
6634
11.9k
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
6635
5.97k
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
6636
5.97k
#undef FF
6637
5.97k
#undef OO
6638
6639
/* If the pattern was successfully studied with JIT support, we will run the
6640
JIT executable instead of the rest of this function. Most options must be set
6641
at compile time for the JIT code to be usable. */
6642
6643
#ifdef SUPPORT_JIT
6644
use_jit = (re->executable_jit != NULL &&
6645
          (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
6646
#endif
6647
6648
/* Initialize UTF/UCP parameters. */
6649
6650
5.97k
#ifdef SUPPORT_UNICODE
6651
5.97k
utf = (re->overall_options & PCRE2_UTF) != 0;
6652
5.97k
allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
6653
5.97k
ucp = (re->overall_options & PCRE2_UCP) != 0;
6654
5.97k
#endif  /* SUPPORT_UNICODE */
6655
6656
/* Convert the partial matching flags into an integer. */
6657
6658
5.97k
mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
6659
5.97k
              ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
6660
6661
/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
6662
time. */
6663
6664
5.97k
if (mb->partial != 0 &&
6665
0
   ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
6666
0
  return PCRE2_ERROR_BADOPTION;
6667
6668
/* It is an error to set an offset limit without setting the flag at compile
6669
time. */
6670
6671
5.97k
if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
6672
0
     (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
6673
0
  return PCRE2_ERROR_BADOFFSETLIMIT;
6674
6675
/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
6676
free the memory that was obtained. Set the field to NULL for no match cases. */
6677
6678
5.97k
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
6679
0
  {
6680
0
  match_data->memctl.free((void *)match_data->subject,
6681
0
    match_data->memctl.memory_data);
6682
0
  match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
6683
0
  }
6684
5.97k
match_data->subject = NULL;
6685
6686
/* Zero the error offset in case the first code unit is invalid UTF. */
6687
6688
5.97k
match_data->startchar = 0;
6689
6690
6691
/* ============================= JIT matching ============================== */
6692
6693
/* Prepare for JIT matching. Check a UTF string for validity unless no check is
6694
requested or invalid UTF can be handled. We check only the portion of the
6695
subject that might be be inspected during matching - from the offset minus the
6696
maximum lookbehind to the given length. This saves time when a small part of a
6697
large subject is being matched by the use of a starting offset. Note that the
6698
maximum lookbehind is a number of characters, not code units. */
6699
6700
#ifdef SUPPORT_JIT
6701
if (use_jit)
6702
  {
6703
#ifdef SUPPORT_UNICODE
6704
  if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
6705
    {
6706
#if PCRE2_CODE_UNIT_WIDTH != 32
6707
    unsigned int i;
6708
#endif
6709
6710
    /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6711
    character start. */
6712
6713
#if PCRE2_CODE_UNIT_WIDTH != 32
6714
    if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6715
      {
6716
      if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6717
#if PCRE2_CODE_UNIT_WIDTH == 8
6718
      return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6719
#else
6720
      return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6721
#endif
6722
      }
6723
#endif  /* WIDTH != 32 */
6724
6725
    /* Move back by the maximum lookbehind, just in case it happens at the very
6726
    start of matching. */
6727
6728
#if PCRE2_CODE_UNIT_WIDTH != 32
6729
    for (i = re->max_lookbehind; i > 0 && start_match > subject; i--)
6730
      {
6731
      start_match--;
6732
      while (start_match > subject &&
6733
#if PCRE2_CODE_UNIT_WIDTH == 8
6734
      (*start_match & 0xc0) == 0x80)
6735
#else  /* 16-bit */
6736
      (*start_match & 0xfc00) == 0xdc00)
6737
#endif
6738
        start_match--;
6739
      }
6740
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6741
6742
    /* In the 32-bit library, one code unit equals one character. However,
6743
    we cannot just subtract the lookbehind and then compare pointers, because
6744
    a very large lookbehind could create an invalid pointer. */
6745
6746
    if (start_offset >= re->max_lookbehind)
6747
      start_match -= re->max_lookbehind;
6748
    else
6749
      start_match = subject;
6750
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6751
6752
    /* Validate the relevant portion of the subject. Adjust the offset of an
6753
    invalid code point to be an absolute offset in the whole string. */
6754
6755
    match_data->rc = PRIV(valid_utf)(start_match,
6756
      length - (start_match - subject), &(match_data->startchar));
6757
    if (match_data->rc != 0)
6758
      {
6759
      match_data->startchar += start_match - subject;
6760
      return match_data->rc;
6761
      }
6762
    jit_checked_utf = TRUE;
6763
    }
6764
#endif  /* SUPPORT_UNICODE */
6765
6766
  /* If JIT returns BADOPTION, which means that the selected complete or
6767
  partial matching mode was not compiled, fall through to the interpreter. */
6768
6769
  rc = pcre2_jit_match(code, subject, length, start_offset, options,
6770
    match_data, mcontext);
6771
  if (rc != PCRE2_ERROR_JIT_BADOPTION)
6772
    {
6773
    match_data->subject_length = length;
6774
    if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
6775
      {
6776
      length = CU2BYTES(length + was_zero_terminated);
6777
      match_data->subject = match_data->memctl.malloc(length,
6778
        match_data->memctl.memory_data);
6779
      if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
6780
      memcpy((void *)match_data->subject, subject, length);
6781
      match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
6782
      }
6783
    return rc;
6784
    }
6785
  }
6786
#endif  /* SUPPORT_JIT */
6787
6788
/* ========================= End of JIT matching ========================== */
6789
6790
6791
/* Proceed with non-JIT matching. The default is to allow lookbehinds to the
6792
start of the subject. A UTF check when there is a non-zero offset may change
6793
this. */
6794
6795
5.97k
mb->check_subject = subject;
6796
6797
/* If a UTF subject string was not checked for validity in the JIT code above,
6798
check it here, and handle support for invalid UTF strings. The check above
6799
happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
6800
If we get here in those circumstances, it means the subject string is valid,
6801
but for some reason JIT matching was not successful. There is no need to check
6802
the subject again.
6803
6804
We check only the portion of the subject that might be be inspected during
6805
matching - from the offset minus the maximum lookbehind to the given length.
6806
This saves time when a small part of a large subject is being matched by the
6807
use of a starting offset. Note that the maximum lookbehind is a number of
6808
characters, not code units.
6809
6810
Note also that support for invalid UTF forces a check, overriding the setting
6811
of PCRE2_NO_CHECK_UTF. */
6812
6813
5.97k
#ifdef SUPPORT_UNICODE
6814
5.97k
if (utf &&
6815
#ifdef SUPPORT_JIT
6816
    !jit_checked_utf &&
6817
#endif
6818
1.24k
    ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
6819
1.21k
  {
6820
1.21k
#if PCRE2_CODE_UNIT_WIDTH != 32
6821
1.21k
  BOOL skipped_bad_start = FALSE;
6822
1.21k
#endif
6823
6824
  /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6825
  character start. If we are handling invalid UTF, just skip over such code
6826
  units. Otherwise, give an appropriate error. */
6827
6828
1.21k
#if PCRE2_CODE_UNIT_WIDTH != 32
6829
1.21k
  if (allow_invalid)
6830
0
    {
6831
0
    while (start_match < end_subject && NOT_FIRSTCU(*start_match))
6832
0
      {
6833
0
      start_match++;
6834
0
      skipped_bad_start = TRUE;
6835
0
      }
6836
0
    }
6837
1.21k
  else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6838
1
    {
6839
1
    if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6840
1
#if PCRE2_CODE_UNIT_WIDTH == 8
6841
1
    return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6842
#else
6843
    return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6844
#endif
6845
1
    }
6846
1.21k
#endif  /* WIDTH != 32 */
6847
6848
  /* The mb->check_subject field points to the start of UTF checking;
6849
  lookbehinds can go back no further than this. */
6850
6851
1.21k
  mb->check_subject = start_match;
6852
6853
  /* Move back by the maximum lookbehind, just in case it happens at the very
6854
  start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
6855
  units above. */
6856
6857
1.21k
#if PCRE2_CODE_UNIT_WIDTH != 32
6858
1.21k
  if (!skipped_bad_start)
6859
1.21k
    {
6860
1.21k
    unsigned int i;
6861
1.21k
    for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
6862
0
      {
6863
0
      mb->check_subject--;
6864
0
      while (mb->check_subject > subject &&
6865
0
#if PCRE2_CODE_UNIT_WIDTH == 8
6866
0
      (*mb->check_subject & 0xc0) == 0x80)
6867
#else  /* 16-bit */
6868
      (*mb->check_subject & 0xfc00) == 0xdc00)
6869
#endif
6870
0
        mb->check_subject--;
6871
0
      }
6872
1.21k
    }
6873
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6874
6875
  /* In the 32-bit library, one code unit equals one character. However,
6876
  we cannot just subtract the lookbehind and then compare pointers, because
6877
  a very large lookbehind could create an invalid pointer. */
6878
6879
  if (start_offset >= re->max_lookbehind)
6880
    mb->check_subject -= re->max_lookbehind;
6881
  else
6882
    mb->check_subject = subject;
6883
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6884
6885
  /* Validate the relevant portion of the subject. There's a loop in case we
6886
  encounter bad UTF in the characters preceding start_match which we are
6887
  scanning because of a lookbehind. */
6888
6889
1.21k
  for (;;)
6890
1.21k
    {
6891
1.21k
    match_data->rc = PRIV(valid_utf)(mb->check_subject,
6892
1.21k
      length - (mb->check_subject - subject), &(match_data->startchar));
6893
6894
1.21k
    if (match_data->rc == 0) break;   /* Valid UTF string */
6895
6896
    /* Invalid UTF string. Adjust the offset to be an absolute offset in the
6897
    whole string. If we are handling invalid UTF strings, set end_subject to
6898
    stop before the bad code unit, and set the options to "not end of line".
6899
    Otherwise return the error. */
6900
6901
163
    match_data->startchar += mb->check_subject - subject;
6902
163
    if (!allow_invalid || match_data->rc > 0) return match_data->rc;
6903
0
    end_subject = subject + match_data->startchar;
6904
6905
    /* If the end precedes start_match, it means there is invalid UTF in the
6906
    extra code units we reversed over because of a lookbehind. Advance past the
6907
    first bad code unit, and then skip invalid character starting code units in
6908
    8-bit and 16-bit modes, and try again with the original end point. */
6909
6910
0
    if (end_subject < start_match)
6911
0
      {
6912
0
      mb->check_subject = end_subject + 1;
6913
0
#if PCRE2_CODE_UNIT_WIDTH != 32
6914
0
      while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
6915
0
        mb->check_subject++;
6916
0
#endif
6917
0
      end_subject = true_end_subject;
6918
0
      }
6919
6920
    /* Otherwise, set the not end of line option, and do the match. */
6921
6922
0
    else
6923
0
      {
6924
0
      fragment_options = PCRE2_NOTEOL;
6925
0
      break;
6926
0
      }
6927
0
    }
6928
1.21k
  }
6929
5.81k
#endif  /* SUPPORT_UNICODE */
6930
6931
/* A NULL match context means "use a default context", but we take the memory
6932
control functions from the pattern. */
6933
6934
5.81k
if (mcontext == NULL)
6935
0
  {
6936
0
  mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
6937
0
  mb->memctl = re->memctl;
6938
0
  }
6939
5.81k
else mb->memctl = mcontext->memctl;
6940
6941
5.81k
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
6942
5.81k
firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0;
6943
5.81k
startline = (re->flags & PCRE2_STARTLINE) != 0;
6944
5.81k
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
6945
5.81k
  true_end_subject : subject + mcontext->offset_limit;
6946
6947
/* Initialize and set up the fixed fields in the callout block, with a pointer
6948
in the match block. */
6949
6950
5.81k
mb->cb = &cb;
6951
5.81k
cb.version = 2;
6952
5.81k
cb.subject = subject;
6953
5.81k
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
6954
5.81k
cb.callout_flags = 0;
6955
6956
/* Fill in the remaining fields in the match block, except for moptions, which
6957
gets set later. */
6958
6959
5.81k
mb->callout = mcontext->callout;
6960
5.81k
mb->callout_data = mcontext->callout_data;
6961
6962
5.81k
mb->start_subject = subject;
6963
5.81k
mb->start_offset = start_offset;
6964
5.81k
mb->end_subject = end_subject;
6965
5.81k
mb->true_end_subject = true_end_subject;
6966
5.81k
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
6967
5.81k
mb->allowemptypartial = (re->max_lookbehind > 0) ||
6968
4.98k
    (re->flags & PCRE2_MATCH_EMPTY) != 0;
6969
5.81k
mb->poptions = re->overall_options;          /* Pattern options */
6970
5.81k
mb->ignore_skip_arg = 0;
6971
5.81k
mb->mark = mb->nomatch_mark = NULL;          /* In case never set */
6972
6973
/* The name table is needed for finding all the numbers associated with a
6974
given name, for condition testing. The code follows the name table. */
6975
6976
5.81k
mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
6977
5.81k
mb->name_count = re->name_count;
6978
5.81k
mb->name_entry_size = re->name_entry_size;
6979
5.81k
mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
6980
6981
/* Process the \R and newline settings. */
6982
6983
5.81k
mb->bsr_convention = re->bsr_convention;
6984
5.81k
mb->nltype = NLTYPE_FIXED;
6985
5.81k
switch(re->newline_convention)
6986
5.81k
  {
6987
0
  case PCRE2_NEWLINE_CR:
6988
0
  mb->nllen = 1;
6989
0
  mb->nl[0] = CHAR_CR;
6990
0
  break;
6991
6992
5.81k
  case PCRE2_NEWLINE_LF:
6993
5.81k
  mb->nllen = 1;
6994
5.81k
  mb->nl[0] = CHAR_NL;
6995
5.81k
  break;
6996
6997
0
  case PCRE2_NEWLINE_NUL:
6998
0
  mb->nllen = 1;
6999
0
  mb->nl[0] = CHAR_NUL;
7000
0
  break;
7001
7002
0
  case PCRE2_NEWLINE_CRLF:
7003
0
  mb->nllen = 2;
7004
0
  mb->nl[0] = CHAR_CR;
7005
0
  mb->nl[1] = CHAR_NL;
7006
0
  break;
7007
7008
0
  case PCRE2_NEWLINE_ANY:
7009
0
  mb->nltype = NLTYPE_ANY;
7010
0
  break;
7011
7012
0
  case PCRE2_NEWLINE_ANYCRLF:
7013
0
  mb->nltype = NLTYPE_ANYCRLF;
7014
0
  break;
7015
7016
0
  default: return PCRE2_ERROR_INTERNAL;
7017
5.81k
  }
7018
7019
/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
7020
vector at the end, whose size depends on the number of capturing parentheses in
7021
the pattern. It is not used at all if there are no capturing parentheses.
7022
7023
  frame_size                   is the total size of each frame
7024
  match_data->heapframes       is the pointer to the frames vector
7025
  match_data->heapframes_size  is the allocated size of the vector
7026
7027
We must pad the frame_size for alignment to ensure subsequent frames are as
7028
aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
7029
array, that does not guarantee it is suitably aligned for pointers, as some
7030
architectures have pointers that are larger than a size_t. */
7031
7032
5.81k
frame_size = (offsetof(heapframe, ovector) +
7033
5.81k
  re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) &
7034
5.81k
  ~(HEAPFRAME_ALIGNMENT - 1);
7035
7036
/* Limits set in the pattern override the match context only if they are
7037
smaller. */
7038
7039
5.81k
mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)?
7040
5.81k
  mcontext->heap_limit : re->limit_heap);
7041
7042
5.81k
mb->match_limit = (mcontext->match_limit < re->limit_match)?
7043
5.81k
  mcontext->match_limit : re->limit_match;
7044
7045
5.81k
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
7046
5.81k
  mcontext->depth_limit : re->limit_depth;
7047
7048
/* If a pattern has very many capturing parentheses, the frame size may be very
7049
large. Set the initial frame vector size to ensure that there are at least 10
7050
available frames, but enforce a minimum of START_FRAMES_SIZE. If this is
7051
greater than the heap limit, get as large a vector as possible. */
7052
7053
5.81k
heapframes_size = frame_size * 10;
7054
5.81k
if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE;
7055
5.81k
if (heapframes_size / 1024 > mb->heap_limit)
7056
0
  {
7057
0
  PCRE2_SIZE max_size = 1024 * mb->heap_limit;
7058
0
  if (max_size < frame_size) return PCRE2_ERROR_HEAPLIMIT;
7059
0
  heapframes_size = max_size;
7060
0
  }
7061
7062
/* If an existing frame vector in the match_data block is large enough, we can
7063
use it. Otherwise, free any pre-existing vector and get a new one. */
7064
7065
5.81k
if (match_data->heapframes_size < heapframes_size)
7066
709
  {
7067
709
  match_data->memctl.free(match_data->heapframes,
7068
709
    match_data->memctl.memory_data);
7069
709
  match_data->heapframes = match_data->memctl.malloc(heapframes_size,
7070
709
    match_data->memctl.memory_data);
7071
709
  if (match_data->heapframes == NULL)
7072
0
    {
7073
0
    match_data->heapframes_size = 0;
7074
0
    return PCRE2_ERROR_NOMEMORY;
7075
0
    }
7076
709
  match_data->heapframes_size = heapframes_size;
7077
709
  }
7078
7079
/* Write to the ovector within the first frame to mark every capture unset and
7080
to avoid uninitialized memory read errors when it is copied to a new frame. */
7081
7082
5.81k
memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff,
7083
5.81k
  frame_size - offsetof(heapframe, ovector));
7084
7085
/* Pointers to the individual character tables */
7086
7087
5.81k
mb->lcc = re->tables + lcc_offset;
7088
5.81k
mb->fcc = re->tables + fcc_offset;
7089
5.81k
mb->ctypes = re->tables + ctypes_offset;
7090
7091
/* Set up the first code unit to match, if available. If there's no first code
7092
unit there may be a bitmap of possible first characters. */
7093
7094
5.81k
if ((re->flags & PCRE2_FIRSTSET) != 0)
7095
762
  {
7096
762
  has_first_cu = TRUE;
7097
762
  first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
7098
762
  if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
7099
25
    {
7100
25
    first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
7101
25
#ifdef SUPPORT_UNICODE
7102
25
#if PCRE2_CODE_UNIT_WIDTH == 8
7103
25
    if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
7104
#else
7105
    if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
7106
#endif
7107
25
#endif  /* SUPPORT_UNICODE */
7108
25
    }
7109
762
  }
7110
5.04k
else
7111
5.04k
  if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
7112
1.68k
    start_bits = re->start_bitmap;
7113
7114
/* There may also be a "last known required character" set. */
7115
7116
5.81k
if ((re->flags & PCRE2_LASTSET) != 0)
7117
795
  {
7118
795
  has_req_cu = TRUE;
7119
795
  req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
7120
795
  if ((re->flags & PCRE2_LASTCASELESS) != 0)
7121
66
    {
7122
66
    req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
7123
66
#ifdef SUPPORT_UNICODE
7124
66
#if PCRE2_CODE_UNIT_WIDTH == 8
7125
66
    if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
7126
#else
7127
    if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
7128
#endif
7129
66
#endif  /* SUPPORT_UNICODE */
7130
66
    }
7131
795
  }
7132
7133
7134
/* ==========================================================================*/
7135
7136
/* Loop for handling unanchored repeated matching attempts; for anchored regexs
7137
the loop runs just once. */
7138
7139
5.81k
#ifdef SUPPORT_UNICODE
7140
5.81k
FRAGMENT_RESTART:
7141
5.81k
#endif
7142
7143
5.81k
start_partial = match_partial = NULL;
7144
5.81k
mb->hitend = FALSE;
7145
7146
5.81k
#if PCRE2_CODE_UNIT_WIDTH == 8
7147
5.81k
memchr_found_first_cu = NULL;
7148
5.81k
memchr_found_first_cu2 = NULL;
7149
5.81k
#endif
7150
7151
5.81k
for(;;)
7152
332k
  {
7153
332k
  PCRE2_SPTR new_start_match;
7154
7155
  /* ----------------- Start of match optimizations ---------------- */
7156
7157
  /* There are some optimizations that avoid running the match if a known
7158
  starting point is not found, or if a known later code unit is not present.
7159
  However, there is an option (settable at compile time) that disables these,
7160
  for testing and for ensuring that all callouts do actually occur. */
7161
7162
332k
  if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
7163
332k
    {
7164
    /* If firstline is TRUE, the start of the match is constrained to the first
7165
    line of a multiline string. That is, the match must be before or at the
7166
    first newline following the start of matching. Temporarily adjust
7167
    end_subject so that we stop the scans for a first code unit at a newline.
7168
    If the match fails at the newline, later code breaks the loop. */
7169
7170
332k
    if (firstline)
7171
0
      {
7172
0
      PCRE2_SPTR t = start_match;
7173
0
#ifdef SUPPORT_UNICODE
7174
0
      if (utf)
7175
0
        {
7176
0
        while (t < end_subject && !IS_NEWLINE(t))
7177
0
          {
7178
0
          t++;
7179
0
          ACROSSCHAR(t < end_subject, t, t++);
7180
0
          }
7181
0
        }
7182
0
      else
7183
0
#endif
7184
0
      while (t < end_subject && !IS_NEWLINE(t)) t++;
7185
0
      end_subject = t;
7186
0
      }
7187
7188
    /* Anchored: check the first code unit if one is recorded. This may seem
7189
    pointless but it can help in detecting a no match case without scanning for
7190
    the required code unit. */
7191
7192
332k
    if (anchored)
7193
947
      {
7194
947
      if (has_first_cu || start_bits != NULL)
7195
52
        {
7196
52
        BOOL ok = start_match < end_subject;
7197
52
        if (ok)
7198
48
          {
7199
48
          PCRE2_UCHAR c = UCHAR21TEST(start_match);
7200
48
          ok = has_first_cu && (c == first_cu || c == first_cu2);
7201
48
          if (!ok && start_bits != NULL)
7202
42
            {
7203
#if PCRE2_CODE_UNIT_WIDTH != 8
7204
            if (c > 255) c = 255;
7205
#endif
7206
42
            ok = (start_bits[c/8] & (1u << (c&7))) != 0;
7207
42
            }
7208
48
          }
7209
52
        if (!ok)
7210
10
          {
7211
10
          rc = MATCH_NOMATCH;
7212
10
          break;
7213
10
          }
7214
52
        }
7215
947
      }
7216
7217
    /* Not anchored. Advance to a unique first code unit if there is one. */
7218
7219
331k
    else
7220
331k
      {
7221
331k
      if (has_first_cu)
7222
2.76k
        {
7223
2.76k
        if (first_cu != first_cu2)  /* Caseless */
7224
102
          {
7225
          /* In 16-bit and 32_bit modes we have to do our own search, so can
7226
          look for both cases at once. */
7227
7228
#if PCRE2_CODE_UNIT_WIDTH != 8
7229
          PCRE2_UCHAR smc;
7230
          while (start_match < end_subject &&
7231
                (smc = UCHAR21TEST(start_match)) != first_cu &&
7232
                 smc != first_cu2)
7233
            start_match++;
7234
#else
7235
          /* In 8-bit mode, the use of memchr() gives a big speed up, even
7236
          though we have to call it twice in order to find the earliest
7237
          occurrence of the code unit in either of its cases. Caching is used
7238
          to remember the positions of previously found code units. This can
7239
          make a huge difference when the strings are very long and only one
7240
          case is actually present. */
7241
7242
102
          PCRE2_SPTR pp1 = NULL;
7243
102
          PCRE2_SPTR pp2 = NULL;
7244
102
          PCRE2_SIZE searchlength = end_subject - start_match;
7245
7246
          /* If we haven't got a previously found position for first_cu, or if
7247
          the current starting position is later, we need to do a search. If
7248
          the code unit is not found, set it to the end. */
7249
7250
102
          if (memchr_found_first_cu == NULL ||
7251
77
              start_match > memchr_found_first_cu)
7252
82
            {
7253
82
            pp1 = memchr(start_match, first_cu, searchlength);
7254
82
            memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
7255
82
            }
7256
7257
          /* If the start is before a previously found position, use the
7258
          previous position, or NULL if a previous search failed. */
7259
7260
20
          else pp1 = (memchr_found_first_cu == end_subject)? NULL :
7261
20
            memchr_found_first_cu;
7262
7263
          /* Do the same thing for the other case. */
7264
7265
102
          if (memchr_found_first_cu2 == NULL ||
7266
77
              start_match > memchr_found_first_cu2)
7267
45
            {
7268
45
            pp2 = memchr(start_match, first_cu2, searchlength);
7269
45
            memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
7270
45
            }
7271
7272
57
          else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
7273
57
            memchr_found_first_cu2;
7274
7275
          /* Set the start to the end of the subject if neither case was found.
7276
          Otherwise, use the earlier found point. */
7277
7278
102
          if (pp1 == NULL)
7279
24
            start_match = (pp2 == NULL)? end_subject : pp2;
7280
78
          else
7281
78
            start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
7282
7283
102
#endif  /* 8-bit handling */
7284
102
          }
7285
7286
        /* The caseful case is much simpler. */
7287
7288
2.66k
        else
7289
2.66k
          {
7290
#if PCRE2_CODE_UNIT_WIDTH != 8
7291
          while (start_match < end_subject && UCHAR21TEST(start_match) !=
7292
                 first_cu)
7293
            start_match++;
7294
#else
7295
2.66k
          start_match = memchr(start_match, first_cu, end_subject - start_match);
7296
2.66k
          if (start_match == NULL) start_match = end_subject;
7297
2.66k
#endif
7298
2.66k
          }
7299
7300
        /* If we can't find the required first code unit, having reached the
7301
        true end of the subject, break the bumpalong loop, to force a match
7302
        failure, except when doing partial matching, when we let the next cycle
7303
        run at the end of the subject. To see why, consider the pattern
7304
        /(?<=abc)def/, which partially matches "abc", even though the string
7305
        does not contain the starting character "d". If we have not reached the
7306
        true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
7307
        temporarily modified) we also let the cycle run, because the matching
7308
        string is legitimately allowed to start with the first code unit of a
7309
        newline. */
7310
7311
2.76k
        if (mb->partial == 0 && start_match >= mb->end_subject)
7312
284
          {
7313
284
          rc = MATCH_NOMATCH;
7314
284
          break;
7315
284
          }
7316
2.76k
        }
7317
7318
      /* If there's no first code unit, advance to just after a linebreak for a
7319
      multiline match if required. */
7320
7321
329k
      else if (startline)
7322
18
        {
7323
18
        if (start_match > mb->start_subject + start_offset)
7324
14
          {
7325
14
#ifdef SUPPORT_UNICODE
7326
14
          if (utf)
7327
9
            {
7328
164
            while (start_match < end_subject && !WAS_NEWLINE(start_match))
7329
155
              {
7330
155
              start_match++;
7331
155
              ACROSSCHAR(start_match < end_subject, start_match, start_match++);
7332
155
              }
7333
9
            }
7334
5
          else
7335
5
#endif
7336
140
          while (start_match < end_subject && !WAS_NEWLINE(start_match))
7337
135
            start_match++;
7338
7339
          /* If we have just passed a CR and the newline option is ANY or
7340
          ANYCRLF, and we are now at a LF, advance the match position by one
7341
          more code unit. */
7342
7343
14
          if (start_match[-1] == CHAR_CR &&
7344
0
               (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
7345
0
               start_match < end_subject &&
7346
0
               UCHAR21TEST(start_match) == CHAR_NL)
7347
0
            start_match++;
7348
14
          }
7349
18
        }
7350
7351
      /* If there's no first code unit or a requirement for a multiline line
7352
      start, advance to a non-unique first code unit if any have been
7353
      identified. The bitmap contains only 256 bits. When code units are 16 or
7354
      32 bits wide, all code units greater than 254 set the 255 bit. */
7355
7356
329k
      else if (start_bits != NULL)
7357
50.5k
        {
7358
105k
        while (start_match < end_subject)
7359
104k
          {
7360
104k
          uint32_t c = UCHAR21TEST(start_match);
7361
#if PCRE2_CODE_UNIT_WIDTH != 8
7362
          if (c > 255) c = 255;
7363
#endif
7364
104k
          if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
7365
54.9k
          start_match++;
7366
54.9k
          }
7367
7368
        /* See comment above in first_cu checking about the next few lines. */
7369
7370
50.5k
        if (mb->partial == 0 && start_match >= mb->end_subject)
7371
814
          {
7372
814
          rc = MATCH_NOMATCH;
7373
814
          break;
7374
814
          }
7375
50.5k
        }
7376
331k
      }   /* End first code unit handling */
7377
7378
    /* Restore fudged end_subject */
7379
7380
331k
    end_subject = mb->end_subject;
7381
7382
    /* The following two optimizations must be disabled for partial matching. */
7383
7384
331k
    if (mb->partial == 0)
7385
331k
      {
7386
331k
      PCRE2_SPTR p;
7387
7388
      /* The minimum matching length is a lower bound; no string of that length
7389
      may actually match the pattern. Although the value is, strictly, in
7390
      characters, we treat it as code units to avoid spending too much time in
7391
      this optimization. */
7392
7393
331k
      if (end_subject - start_match < re->minlength)
7394
2.16k
        {
7395
2.16k
        rc = MATCH_NOMATCH;
7396
2.16k
        break;
7397
2.16k
        }
7398
7399
      /* If req_cu is set, we know that that code unit must appear in the
7400
      subject for the (non-partial) match to succeed. If the first code unit is
7401
      set, req_cu must be later in the subject; otherwise the test starts at
7402
      the match point. This optimization can save a huge amount of backtracking
7403
      in patterns with nested unlimited repeats that aren't going to match.
7404
      Writing separate code for caseful/caseless versions makes it go faster,
7405
      as does using an autoincrement and backing off on a match. As in the case
7406
      of the first code unit, using memchr() in the 8-bit library gives a big
7407
      speed up. Unlike the first_cu check above, we do not need to call
7408
      memchr() twice in the caseless case because we only need to check for the
7409
      presence of the character in either case, not find the first occurrence.
7410
7411
      The search can be skipped if the code unit was found later than the
7412
      current starting point in a previous iteration of the bumpalong loop.
7413
7414
      HOWEVER: when the subject string is very, very long, searching to its end
7415
      can take a long time, and give bad performance on quite ordinary
7416
      anchored patterns. This showed up when somebody was matching something
7417
      like /^\d+C/ on a 32-megabyte string... so we don't do this when the
7418
      string is sufficiently long, but it's worth searching a lot more for
7419
      unanchored patterns. */
7420
7421
329k
      p = start_match + (has_first_cu? 1:0);
7422
329k
      if (has_req_cu && p > req_cu_ptr)
7423
2.36k
        {
7424
2.36k
        PCRE2_SIZE check_length = end_subject - start_match;
7425
7426
2.36k
        if (check_length < REQ_CU_MAX ||
7427
0
              (!anchored && check_length < REQ_CU_MAX * 1000))
7428
2.36k
          {
7429
2.36k
          if (req_cu != req_cu2)  /* Caseless */
7430
122
            {
7431
#if PCRE2_CODE_UNIT_WIDTH != 8
7432
            while (p < end_subject)
7433
              {
7434
              uint32_t pp = UCHAR21INCTEST(p);
7435
              if (pp == req_cu || pp == req_cu2) { p--; break; }
7436
              }
7437
#else  /* 8-bit code units */
7438
122
            PCRE2_SPTR pp = p;
7439
122
            p = memchr(pp, req_cu, end_subject - pp);
7440
122
            if (p == NULL)
7441
34
              {
7442
34
              p = memchr(pp, req_cu2, end_subject - pp);
7443
34
              if (p == NULL) p = end_subject;
7444
34
              }
7445
122
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
7446
122
            }
7447
7448
          /* The caseful case */
7449
7450
2.23k
          else
7451
2.23k
            {
7452
#if PCRE2_CODE_UNIT_WIDTH != 8
7453
            while (p < end_subject)
7454
              {
7455
              if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
7456
              }
7457
7458
#else  /* 8-bit code units */
7459
2.23k
            p = memchr(p, req_cu, end_subject - p);
7460
2.23k
            if (p == NULL) p = end_subject;
7461
2.23k
#endif
7462
2.23k
            }
7463
7464
          /* If we can't find the required code unit, break the bumpalong loop,
7465
          forcing a match failure. */
7466
7467
2.36k
          if (p >= end_subject)
7468
196
            {
7469
196
            rc = MATCH_NOMATCH;
7470
196
            break;
7471
196
            }
7472
7473
          /* If we have found the required code unit, save the point where we
7474
          found it, so that we don't search again next time round the bumpalong
7475
          loop if the start hasn't yet passed this code unit. */
7476
7477
2.16k
          req_cu_ptr = p;
7478
2.16k
          }
7479
2.36k
        }
7480
329k
      }
7481
331k
    }
7482
7483
  /* ------------ End of start of match optimizations ------------ */
7484
7485
  /* Give no match if we have passed the bumpalong limit. */
7486
7487
329k
  if (start_match > bumpalong_limit)
7488
0
    {
7489
0
    rc = MATCH_NOMATCH;
7490
0
    break;
7491
0
    }
7492
7493
  /* OK, we can now run the match. If "hitend" is set afterwards, remember the
7494
  first starting point for which a partial match was found. */
7495
7496
329k
  cb.start_match = (PCRE2_SIZE)(start_match - subject);
7497
329k
  cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
7498
7499
329k
  mb->start_used_ptr = start_match;
7500
329k
  mb->last_used_ptr = start_match;
7501
329k
#ifdef SUPPORT_UNICODE
7502
329k
  mb->moptions = options | fragment_options;
7503
#else
7504
  mb->moptions = options;
7505
#endif
7506
329k
  mb->match_call_count = 0;
7507
329k
  mb->end_offset_top = 0;
7508
329k
  mb->skip_arg_count = 0;
7509
7510
#ifdef DEBUG_SHOW_OPS
7511
  fprintf(stderr, "++ Calling match()\n");
7512
#endif
7513
7514
329k
  rc = match(start_match, mb->start_code, re->top_bracket, frame_size,
7515
329k
    match_data, mb);
7516
7517
#ifdef DEBUG_SHOW_OPS
7518
  fprintf(stderr, "++ match() returned %d\n\n", rc);
7519
#endif
7520
7521
329k
  if (mb->hitend && start_partial == NULL)
7522
0
    {
7523
0
    start_partial = mb->start_used_ptr;
7524
0
    match_partial = start_match;
7525
0
    }
7526
7527
329k
  switch(rc)
7528
329k
    {
7529
    /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7530
    the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7531
    entirely. The only way we can do that is to re-do the match at the same
7532
    point, with a flag to force SKIP with an argument to be ignored. Just
7533
    treating this case as NOMATCH does not work because it does not check other
7534
    alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7535
7536
0
    case MATCH_SKIP_ARG:
7537
0
    new_start_match = start_match;
7538
0
    mb->ignore_skip_arg = mb->skip_arg_count;
7539
0
    break;
7540
7541
    /* SKIP passes back the next starting point explicitly, but if it is no
7542
    greater than the match we have just done, treat it as NOMATCH. */
7543
7544
0
    case MATCH_SKIP:
7545
0
    if (mb->verb_skip_ptr > start_match)
7546
0
      {
7547
0
      new_start_match = mb->verb_skip_ptr;
7548
0
      break;
7549
0
      }
7550
    /* Fall through */
7551
7552
    /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7553
    exactly like PRUNE. Unset ignore SKIP-with-argument. */
7554
7555
327k
    case MATCH_NOMATCH:
7556
327k
    case MATCH_PRUNE:
7557
327k
    case MATCH_THEN:
7558
327k
    mb->ignore_skip_arg = 0;
7559
327k
    new_start_match = start_match + 1;
7560
327k
#ifdef SUPPORT_UNICODE
7561
327k
    if (utf)
7562
87.8k
      ACROSSCHAR(new_start_match < end_subject, new_start_match,
7563
327k
        new_start_match++);
7564
327k
#endif
7565
327k
    break;
7566
7567
    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
7568
7569
0
    case MATCH_COMMIT:
7570
0
    rc = MATCH_NOMATCH;
7571
0
    goto ENDLOOP;
7572
7573
    /* Any other return is either a match, or some kind of error. */
7574
7575
1.45k
    default:
7576
1.45k
    goto ENDLOOP;
7577
329k
    }
7578
7579
  /* Control reaches here for the various types of "no match at this point"
7580
  result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7581
7582
327k
  rc = MATCH_NOMATCH;
7583
7584
  /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
7585
  newline in the subject (though it may continue over the newline). Therefore,
7586
  if we have just failed to match, starting at a newline, do not continue. */
7587
7588
327k
  if (firstline && IS_NEWLINE(start_match)) break;
7589
7590
  /* Advance to new matching position */
7591
7592
327k
  start_match = new_start_match;
7593
7594
  /* Break the loop if the pattern is anchored or if we have passed the end of
7595
  the subject. */
7596
7597
327k
  if (anchored || start_match > end_subject) break;
7598
7599
  /* If we have just passed a CR and we are now at a LF, and the pattern does
7600
  not contain any explicit matches for \r or \n, and the newline option is CRLF
7601
  or ANY or ANYCRLF, advance the match position by one more code unit. In
7602
  normal matching start_match will aways be greater than the first position at
7603
  this stage, but a failed *SKIP can cause a return at the same point, which is
7604
  why the first test exists. */
7605
7606
327k
  if (start_match > subject + start_offset &&
7607
327k
      start_match[-1] == CHAR_CR &&
7608
1.92k
      start_match < end_subject &&
7609
1.92k
      *start_match == CHAR_NL &&
7610
347
      (re->flags & PCRE2_HASCRORLF) == 0 &&
7611
240
        (mb->nltype == NLTYPE_ANY ||
7612
240
         mb->nltype == NLTYPE_ANYCRLF ||
7613
240
         mb->nllen == 2))
7614
0
    start_match++;
7615
7616
327k
  mb->mark = NULL;   /* Reset for start of next match attempt */
7617
327k
  }                  /* End of for(;;) "bumpalong" loop */
7618
7619
/* ==========================================================================*/
7620
7621
/* When we reach here, one of the following stopping conditions is true:
7622
7623
(1) The match succeeded, either completely, or partially;
7624
7625
(2) The pattern is anchored or the match was failed after (*COMMIT);
7626
7627
(3) We are past the end of the subject or the bumpalong limit;
7628
7629
(4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
7630
    this option requests that a match occur at or before the first newline in
7631
    the subject.
7632
7633
(5) Some kind of error occurred.
7634
7635
*/
7636
7637
5.81k
ENDLOOP:
7638
7639
/* If end_subject != true_end_subject, it means we are handling invalid UTF,
7640
and have just processed a non-terminal fragment. If this resulted in no match
7641
or a partial match we must carry on to the next fragment (a partial match is
7642
returned to the caller only at the very end of the subject). A loop is used to
7643
avoid trying to match against empty fragments; if the pattern can match an
7644
empty string it would have done so already. */
7645
7646
5.81k
#ifdef SUPPORT_UNICODE
7647
5.81k
if (utf && end_subject != true_end_subject &&
7648
0
    (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
7649
0
  {
7650
0
  for (;;)
7651
0
    {
7652
    /* Advance past the first bad code unit, and then skip invalid character
7653
    starting code units in 8-bit and 16-bit modes. */
7654
7655
0
    start_match = end_subject + 1;
7656
7657
0
#if PCRE2_CODE_UNIT_WIDTH != 32
7658
0
    while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
7659
0
      start_match++;
7660
0
#endif
7661
7662
    /* If we have hit the end of the subject, there isn't another non-empty
7663
    fragment, so give up. */
7664
7665
0
    if (start_match >= true_end_subject)
7666
0
      {
7667
0
      rc = MATCH_NOMATCH;  /* In case it was partial */
7668
0
      match_partial = NULL;
7669
0
      break;
7670
0
      }
7671
7672
    /* Check the rest of the subject */
7673
7674
0
    mb->check_subject = start_match;
7675
0
    rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
7676
0
      &(match_data->startchar));
7677
7678
    /* The rest of the subject is valid UTF. */
7679
7680
0
    if (rc == 0)
7681
0
      {
7682
0
      mb->end_subject = end_subject = true_end_subject;
7683
0
      fragment_options = PCRE2_NOTBOL;
7684
0
      goto FRAGMENT_RESTART;
7685
0
      }
7686
7687
    /* A subsequent UTF error has been found; if the next fragment is
7688
    non-empty, set up to process it. Otherwise, let the loop advance. */
7689
7690
0
    else if (rc < 0)
7691
0
      {
7692
0
      mb->end_subject = end_subject = start_match + match_data->startchar;
7693
0
      if (end_subject > start_match)
7694
0
        {
7695
0
        fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
7696
0
        goto FRAGMENT_RESTART;
7697
0
        }
7698
0
      }
7699
0
    }
7700
0
  }
7701
5.81k
#endif  /* SUPPORT_UNICODE */
7702
7703
/* Fill in fields that are always returned in the match data. */
7704
7705
5.81k
match_data->code = re;
7706
5.81k
match_data->mark = mb->mark;
7707
5.81k
match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
7708
7709
/* Handle a fully successful match. Set the return code to the number of
7710
captured strings, or 0 if there were too many to fit into the ovector, and then
7711
set the remaining returned values before returning. Make a copy of the subject
7712
string if requested. */
7713
7714
5.81k
if (rc == MATCH_MATCH)
7715
1.42k
  {
7716
1.42k
  match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
7717
1.42k
    0 : (int)mb->end_offset_top/2 + 1;
7718
1.42k
  match_data->subject_length = length;
7719
1.42k
  match_data->startchar = start_match - subject;
7720
1.42k
  match_data->leftchar = mb->start_used_ptr - subject;
7721
1.42k
  match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
7722
1.03k
    mb->last_used_ptr : mb->end_match_ptr) - subject;
7723
1.42k
  if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
7724
0
    {
7725
0
    length = CU2BYTES(length + was_zero_terminated);
7726
0
    match_data->subject = match_data->memctl.malloc(length,
7727
0
      match_data->memctl.memory_data);
7728
0
    if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
7729
0
    memcpy((void *)match_data->subject, subject, length);
7730
0
    match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
7731
0
    }
7732
1.42k
  else match_data->subject = subject;
7733
7734
1.42k
  return match_data->rc;
7735
1.42k
  }
7736
7737
/* Control gets here if there has been a partial match, an error, or if the
7738
overall match attempt has failed at all permitted starting positions. Any mark
7739
data is in the nomatch_mark field. */
7740
7741
4.38k
match_data->mark = mb->nomatch_mark;
7742
7743
/* For anything other than nomatch or partial match, just return the code. */
7744
7745
4.38k
if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
7746
7747
/* Handle a partial match. If a "soft" partial match was requested, searching
7748
for a complete match will have continued, and the value of rc at this point
7749
will be MATCH_NOMATCH. For a "hard" partial match, it will already be
7750
PCRE2_ERROR_PARTIAL. */
7751
7752
4.35k
else if (match_partial != NULL)
7753
0
  {
7754
0
  match_data->subject = subject;
7755
0
  match_data->subject_length = length;
7756
0
  match_data->ovector[0] = match_partial - subject;
7757
0
  match_data->ovector[1] = end_subject - subject;
7758
0
  match_data->startchar = match_partial - subject;
7759
0
  match_data->leftchar = start_partial - subject;
7760
0
  match_data->rightchar = end_subject - subject;
7761
0
  match_data->rc = PCRE2_ERROR_PARTIAL;
7762
0
  }
7763
7764
/* Else this is the classic nomatch case. */
7765
7766
4.35k
else match_data->rc = PCRE2_ERROR_NOMATCH;
7767
7768
4.38k
return match_data->rc;
7769
5.81k
}
7770
7771
/* These #undefs are here to enable unity builds with CMake. */
7772
7773
#undef NLBLOCK /* Block containing newline information */
7774
#undef PSSTART /* Field containing processed string start */
7775
#undef PSEND   /* Field containing processed string end */
7776
7777
/* End of pcre2_match.c */