Coverage Report

Created: 2025-07-23 07:29

/src/pcre2-10.39/src/pcre2_match.c
Line
Count
Source (jump to first uncovered line)
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2015-2021 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
/* These defines enable debugging code */
47
48
/* #define DEBUG_FRAMES_DISPLAY */
49
/* #define DEBUG_SHOW_OPS */
50
/* #define DEBUG_SHOW_RMATCH */
51
52
#ifdef DEBUG_FRAME_DISPLAY
53
#include <stdarg.h>
54
#endif
55
56
/* These defines identify the name of the block containing "static"
57
information, and fields within it. */
58
59
3.21G
#define NLBLOCK mb              /* Block containing newline information */
60
43.6M
#define PSSTART start_subject   /* Field containing processed string start */
61
757M
#define PSEND   end_subject     /* Field containing processed string end */
62
63
#include "pcre2_internal.h"
64
65
54.4M
#define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */
66
67
/* Masks for identifying the public options that are permitted at match time. */
68
69
#define PUBLIC_MATCH_OPTIONS \
70
3.71M
  (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
71
3.71M
   PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
72
3.71M
   PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT)
73
74
#define PUBLIC_JIT_MATCH_OPTIONS \
75
   (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
76
    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
77
    PCRE2_COPY_MATCHED_SUBJECT)
78
79
/* Non-error returns from and within the match() function. Error returns are
80
externally defined PCRE2_ERROR_xxx codes, which are all negative. */
81
82
6.54M
#define MATCH_MATCH        1
83
1.18G
#define MATCH_NOMATCH      0
84
85
/* Special internal returns used in the match() function. Make them
86
sufficiently negative to avoid the external error codes. */
87
88
426k
#define MATCH_ACCEPT       (-999)
89
3.52k
#define MATCH_KETRPOS      (-998)
90
/* The next 5 must be kept together and in sequence so that a test that checks
91
for any one of them can use a range. */
92
0
#define MATCH_COMMIT       (-997)
93
49.3M
#define MATCH_PRUNE        (-996)
94
0
#define MATCH_SKIP         (-995)
95
0
#define MATCH_SKIP_ARG     (-994)
96
221M
#define MATCH_THEN         (-993)
97
0
#define MATCH_BACKTRACK_MAX MATCH_THEN
98
0
#define MATCH_BACKTRACK_MIN MATCH_COMMIT
99
100
/* Group frame type values. Zero means the frame is not a group frame. The
101
lower 16 bits are used for data (e.g. the capture number). Group frames are
102
used for most groups so that information about the start is easily available at
103
the end without having to scan back through intermediate frames (backtrack
104
points). */
105
106
6.14M
#define GF_CAPTURE     0x00010000u
107
17.1M
#define GF_NOCAPTURE   0x00020000u
108
56.2M
#define GF_CONDASSERT  0x00030000u
109
70.3M
#define GF_RECURSE     0x00040000u
110
111
/* Masks for the identity and data parts of the group frame type. */
112
113
126M
#define GF_IDMASK(a)   ((a) & 0xffff0000u)
114
0
#define GF_DATAMASK(a) ((a) & 0x0000ffffu)
115
116
/* Repetition types */
117
118
enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
119
120
/* Min and max values for the common repeats; a maximum of UINT32_MAX =>
121
infinity. */
122
123
static const uint32_t rep_min[] = {
124
  0, 0,       /* * and *? */
125
  1, 1,       /* + and +? */
126
  0, 0,       /* ? and ?? */
127
  0, 0,       /* dummy placefillers for OP_CR[MIN]RANGE */
128
  0, 1, 0 };  /* OP_CRPOS{STAR, PLUS, QUERY} */
129
130
static const uint32_t rep_max[] = {
131
  UINT32_MAX, UINT32_MAX,      /* * and *? */
132
  UINT32_MAX, UINT32_MAX,      /* + and +? */
133
  1, 1,                        /* ? and ?? */
134
  0, 0,                        /* dummy placefillers for OP_CR[MIN]RANGE */
135
  UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
136
137
/* Repetition types - must include OP_CRPOSRANGE (not needed above) */
138
139
static const uint32_t rep_typ[] = {
140
  REPTYPE_MAX, REPTYPE_MIN,    /* * and *? */
141
  REPTYPE_MAX, REPTYPE_MIN,    /* + and +? */
142
  REPTYPE_MAX, REPTYPE_MIN,    /* ? and ?? */
143
  REPTYPE_MAX, REPTYPE_MIN,    /* OP_CRRANGE and OP_CRMINRANGE */
144
  REPTYPE_POS, REPTYPE_POS,    /* OP_CRPOSSTAR, OP_CRPOSPLUS */
145
  REPTYPE_POS, REPTYPE_POS };  /* OP_CRPOSQUERY, OP_CRPOSRANGE */
146
147
/* Numbers for RMATCH calls at backtracking points. When these lists are
148
changed, the code at RETURN_SWITCH below must be updated in sync.  */
149
150
enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
151
       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
152
       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
153
       RM31,  RM32, RM33, RM34, RM35, RM36 };
154
155
#ifdef SUPPORT_WIDE_CHARS
156
enum { RM100=100, RM101 };
157
#endif
158
159
#ifdef SUPPORT_UNICODE
160
enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
161
       RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
162
       RM216,     RM217, RM218, RM219, RM220, RM221, RM222 };
163
#endif
164
165
/* Define short names for general fields in the current backtrack frame, which
166
is always pointed to by the F variable. Occasional references to fields in
167
other frames are written out explicitly. There are also some fields in the
168
current frame whose names start with "temp" that are used for short-term,
169
localised backtracking memory. These are #defined with Lxxx names at the point
170
of use and undefined afterwards. */
171
172
2.23G
#define Fback_frame        F->back_frame
173
77.5M
#define Fcapture_last      F->capture_last
174
80.1M
#define Fcurrent_recurse   F->current_recurse
175
7.99G
#define Fecode             F->ecode
176
15.5G
#define Feptr              F->eptr
177
1.14G
#define Fgroup_frame_type  F->group_frame_type
178
234M
#define Flast_group_offset F->last_group_offset
179
0
#define Flength            F->length
180
54.4M
#define Fmark              F->mark
181
3.50G
#define Frdepth            F->rdepth
182
59.9M
#define Fstart_match       F->start_match
183
126M
#define Foffset_top        F->offset_top
184
0
#define Foccu              F->occu
185
3.79G
#define Fop                F->op
186
61.1M
#define Fovector           F->ovector
187
2.18G
#define Freturn_id         F->return_id
188
189
190
#ifdef DEBUG_FRAMES_DISPLAY
191
/*************************************************
192
*      Display current frames and contents       *
193
*************************************************/
194
195
/* This debugging function displays the current set of frames and their
196
contents. It is not called automatically from anywhere, the intention being
197
that calls can be inserted where necessary when debugging frame-related
198
problems.
199
200
Arguments:
201
  f           the file to write to
202
  F           the current top frame
203
  P           a previous frame of interest
204
  frame_size  the frame size
205
  mb          points to the match block
206
  s           identification text
207
208
Returns:    nothing
209
*/
210
211
static void
212
display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
213
  match_block *mb, const char *s, ...)
214
{
215
uint32_t i;
216
heapframe *Q;
217
va_list ap;
218
va_start(ap, s);
219
220
fprintf(f, "FRAMES ");
221
vfprintf(f, s, ap);
222
va_end(ap);
223
224
if (P != NULL) fprintf(f, " P=%lu",
225
  ((char *)P - (char *)(mb->match_frames))/frame_size);
226
fprintf(f, "\n");
227
228
for (i = 0, Q = mb->match_frames;
229
     Q <= F;
230
     i++, Q = (heapframe *)((char *)Q + frame_size))
231
  {
232
  fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
233
    i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
234
    Q->back_frame, Q->return_id);
235
236
  if (Q->last_group_offset == PCRE2_UNSET)
237
    fprintf(f, " lgoffset=unset\n");
238
  else
239
    fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
240
  }
241
}
242
243
#endif
244
245
246
247
/*************************************************
248
*                Process a callout               *
249
*************************************************/
250
251
/* This function is called for all callouts, whether "standalone" or at the
252
start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
253
OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
254
with fixed values.
255
256
Arguments:
257
  F          points to the current backtracking frame
258
  mb         points to the match block
259
  lengthptr  where to return the length of the callout item
260
261
Returns:     the return from the callout
262
             or 0 if no callout function exists
263
*/
264
265
static int
266
do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
267
0
{
268
0
int rc;
269
0
PCRE2_SIZE save0, save1;
270
0
PCRE2_SIZE *callout_ovector;
271
0
pcre2_callout_block *cb;
272
273
0
*lengthptr = (*Fecode == OP_CALLOUT)?
274
0
  PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
275
276
0
if (mb->callout == NULL) return 0;   /* No callout function provided */
277
278
/* The original matching code (pre 10.30) worked directly with the ovector
279
passed by the user, and this was passed to callouts. Now that the working
280
ovector is in the backtracking frame, it no longer needs to reserve space for
281
the overall match offsets (which would waste space in the frame). For backward
282
compatibility, however, we pass capture_top and offset_vector to the callout as
283
if for the extended ovector, and we ensure that the first two slots are unset
284
by preserving and restoring their current contents. Picky compilers complain if
285
references such as Fovector[-2] are use directly, so we set up a separate
286
pointer. */
287
288
0
callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
289
290
/* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
291
are set externally. The first 3 never change; the last is updated for each
292
bumpalong. */
293
294
0
cb = mb->cb;
295
0
cb->capture_top      = (uint32_t)Foffset_top/2 + 1;
296
0
cb->capture_last     = Fcapture_last;
297
0
cb->offset_vector    = callout_ovector;
298
0
cb->mark             = mb->nomatch_mark;
299
0
cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
300
0
cb->pattern_position = GET(Fecode, 1);
301
0
cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
302
303
0
if (*Fecode == OP_CALLOUT)  /* Numerical callout */
304
0
  {
305
0
  cb->callout_number = Fecode[1 + 2*LINK_SIZE];
306
0
  cb->callout_string_offset = 0;
307
0
  cb->callout_string = NULL;
308
0
  cb->callout_string_length = 0;
309
0
  }
310
0
else  /* String callout */
311
0
  {
312
0
  cb->callout_number = 0;
313
0
  cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
314
0
  cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
315
0
  cb->callout_string_length =
316
0
    *lengthptr - (1 + 4*LINK_SIZE) - 2;
317
0
  }
318
319
0
save0 = callout_ovector[0];
320
0
save1 = callout_ovector[1];
321
0
callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
322
0
rc = mb->callout(cb, mb->callout_data);
323
0
callout_ovector[0] = save0;
324
0
callout_ovector[1] = save1;
325
0
cb->callout_flags = 0;
326
0
return rc;
327
0
}
328
329
330
331
/*************************************************
332
*          Match a back-reference                *
333
*************************************************/
334
335
/* This function is called only when it is known that the offset lies within
336
the offsets that have so far been used in the match. Note that in caseless
337
UTF-8 mode, the number of subject bytes matched may be different to the number
338
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
339
seems unlikely.)
340
341
Arguments:
342
  offset      index into the offset vector
343
  caseless    TRUE if caseless
344
  F           the current backtracking frame pointer
345
  mb          points to match block
346
  lengthptr   pointer for returning the length matched
347
348
Returns:      = 0 sucessful match; number of code units matched is set
349
              < 0 no match
350
              > 0 partial match
351
*/
352
353
static int
354
match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb,
355
  PCRE2_SIZE *lengthptr)
356
7.77M
{
357
7.77M
PCRE2_SPTR p;
358
7.77M
PCRE2_SIZE length;
359
7.77M
PCRE2_SPTR eptr;
360
7.77M
PCRE2_SPTR eptr_start;
361
362
/* Deal with an unset group. The default is no match, but there is an option to
363
match an empty string. */
364
365
7.77M
if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
366
5.99M
  {
367
5.99M
  if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
368
0
    {
369
0
    *lengthptr = 0;
370
0
    return 0;      /* Match */
371
0
    }
372
5.99M
  else return -1;  /* No match */
373
5.99M
  }
374
375
/* Separate the caseless and UTF cases for speed. */
376
377
1.78M
eptr = eptr_start = Feptr;
378
1.78M
p = mb->start_subject + Fovector[offset];
379
1.78M
length = Fovector[offset+1] - Fovector[offset];
380
381
1.78M
if (caseless)
382
0
  {
383
0
#if defined SUPPORT_UNICODE
384
0
  BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
385
386
0
  if (utf || (mb->poptions & PCRE2_UCP) != 0)
387
0
    {
388
0
    PCRE2_SPTR endptr = p + length;
389
390
    /* Match characters up to the end of the reference. NOTE: the number of
391
    code units matched may differ, because in UTF-8 there are some characters
392
    whose upper and lower case codes have different numbers of bytes. For
393
    example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
394
    bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
395
    sequence of two of the latter. It is important, therefore, to check the
396
    length along the reference, not along the subject (earlier code did this
397
    wrong). UCP without uses Unicode properties but without UTF encoding. */
398
399
0
    while (p < endptr)
400
0
      {
401
0
      uint32_t c, d;
402
0
      const ucd_record *ur;
403
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
404
405
0
      if (utf)
406
0
        {
407
0
        GETCHARINC(c, eptr);
408
0
        GETCHARINC(d, p);
409
0
        }
410
0
      else
411
0
        {
412
0
        c = *eptr++;
413
0
        d = *p++;
414
0
        }
415
416
0
      ur = GET_UCD(d);
417
0
      if (c != d && c != (uint32_t)((int)d + ur->other_case))
418
0
        {
419
0
        const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
420
0
        for (;;)
421
0
          {
422
0
          if (c < *pp) return -1;  /* No match */
423
0
          if (c == *pp++) break;
424
0
          }
425
0
        }
426
0
      }
427
0
    }
428
0
  else
429
0
#endif
430
431
  /* Not in UTF or UCP mode */
432
0
    {
433
0
    for (; length > 0; length--)
434
0
      {
435
0
      uint32_t cc, cp;
436
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
437
0
      cc = UCHAR21TEST(eptr);
438
0
      cp = UCHAR21TEST(p);
439
0
      if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
440
0
        return -1;  /* No match */
441
0
      p++;
442
0
      eptr++;
443
0
      }
444
0
    }
445
0
  }
446
447
/* In the caseful case, we can just compare the code units, whether or not we
448
are in UTF and/or UCP mode. When partial matching, we have to do this unit by
449
unit. */
450
451
1.78M
else
452
1.78M
  {
453
1.78M
  if (mb->partial != 0)
454
0
    {
455
0
    for (; length > 0; length--)
456
0
      {
457
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
458
0
      if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /* No match */
459
0
      }
460
0
    }
461
462
  /* Not partial matching */
463
464
1.78M
  else
465
1.78M
    {
466
1.78M
    if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
467
1.78M
    if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1;  /* No match */
468
1.78M
    eptr += length;
469
1.78M
    }
470
1.78M
  }
471
472
1.78M
*lengthptr = eptr - eptr_start;
473
1.78M
return 0;  /* Match */
474
1.78M
}
475
476
477
478
/******************************************************************************
479
*******************************************************************************
480
                   "Recursion" in the match() function
481
482
The original match() function was highly recursive, but this proved to be the
483
source of a number of problems over the years, mostly because of the relatively
484
small system stacks that are commonly found. As new features were added to
485
patterns, various kludges were invented to reduce the amount of stack used,
486
making the code hard to understand in places.
487
488
A version did exist that used individual frames on the heap instead of calling
489
match() recursively, but this ran substantially slower. The current version is
490
a refactoring that uses a vector of frames to remember backtracking points.
491
This runs no slower, and possibly even a bit faster than the original recursive
492
implementation. An initial vector of size START_FRAMES_SIZE (enough for maybe
493
50 frames) is allocated on the system stack. If this is not big enough, the
494
heap is used for a larger vector.
495
496
*******************************************************************************
497
******************************************************************************/
498
499
500
501
502
/*************************************************
503
*       Macros for the match() function          *
504
*************************************************/
505
506
/* These macros pack up tests that are used for partial matching several times
507
in the code. The second one is used when we already know we are past the end of
508
the subject. We set the "hit end" flag if the pointer is at the end of the
509
subject and either (a) the pointer is past the earliest inspected character
510
(i.e. something has been matched, even if not part of the actual matched
511
string), or (b) the pattern contains a lookbehind. These are the conditions for
512
which adding more characters may allow the current match to continue.
513
514
For hard partial matching, we immediately return a partial match. Otherwise,
515
carrying on means that a complete match on the current subject will be sought.
516
A partial match is returned only if no complete match can be found. */
517
518
#define CHECK_PARTIAL()\
519
112M
  if (Feptr >= mb->end_subject) \
520
112M
    { \
521
524k
    SCHECK_PARTIAL(); \
522
524k
    }
523
524
#define SCHECK_PARTIAL()\
525
12.5M
  if (mb->partial != 0 && \
526
12.5M
      (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
527
12.5M
    { \
528
0
    mb->hitend = TRUE; \
529
0
    if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
530
0
    }
531
532
533
/* These macros are used to implement backtracking. They simulate a recursive
534
call to the match() function by means of a local vector of frames which
535
remember the backtracking points. */
536
537
#define RMATCH(ra,rb)\
538
754M
  {\
539
754M
  start_ecode = ra;\
540
1.09G
  Freturn_id = rb;\
541
754M
  goto MATCH_RECURSE;\
542
1.09G
  L_##rb:;\
543
1.08G
  }
544
545
#define RRETURN(ra)\
546
407M
  {\
547
407M
  rrc = ra;\
548
407M
  goto RETURN_SWITCH;\
549
1.13G
  }
550
551
552
553
/*************************************************
554
*         Match from current position            *
555
*************************************************/
556
557
/* This function is called to run one match attempt at a single starting point
558
in the subject.
559
560
Performance note: It might be tempting to extract commonly used fields from the
561
mb structure (e.g. end_subject) into individual variables to improve
562
performance. Tests using gcc on a SPARC disproved this; in the first case, it
563
made performance worse.
564
565
Arguments:
566
   start_eptr   starting character in subject
567
   start_ecode  starting position in compiled code
568
   ovector      pointer to the final output vector
569
   oveccount    number of pairs in ovector
570
   top_bracket  number of capturing parentheses in the pattern
571
   frame_size   size of each backtracking frame
572
   mb           pointer to "static" variables block
573
574
Returns:        MATCH_MATCH if matched            )  these values are >= 0
575
                MATCH_NOMATCH if failed to match  )
576
                negative MATCH_xxx value for PRUNE, SKIP, etc
577
                negative PCRE2_ERROR_xxx value if aborted by an error condition
578
                (e.g. stopped by repeated call or depth limit)
579
*/
580
581
static int
582
match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector,
583
  uint16_t oveccount, uint16_t top_bracket, PCRE2_SIZE frame_size,
584
  match_block *mb)
585
51.8M
{
586
/* Frame-handling variables */
587
588
51.8M
heapframe *F;           /* Current frame pointer */
589
51.8M
heapframe *N = NULL;    /* Temporary frame pointers */
590
51.8M
heapframe *P = NULL;
591
51.8M
heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
592
51.8M
PCRE2_SIZE frame_copy_size;     /* Amount to copy when creating a new frame */
593
594
/* Local variables that do not need to be preserved over calls to RRMATCH(). */
595
596
51.8M
PCRE2_SPTR bracode;     /* Temp pointer to start of group */
597
51.8M
PCRE2_SIZE offset;      /* Used for group offsets */
598
51.8M
PCRE2_SIZE length;      /* Used for various length calculations */
599
600
51.8M
int rrc;                /* Return from functions & backtracking "recursions" */
601
51.8M
#ifdef SUPPORT_UNICODE
602
51.8M
int proptype;           /* Type of character property */
603
51.8M
#endif
604
605
51.8M
uint32_t i;             /* Used for local loops */
606
51.8M
uint32_t fc;            /* Character values */
607
51.8M
uint32_t number;        /* Used for group and other numbers */
608
51.8M
uint32_t reptype = 0;   /* Type of repetition (0 to avoid compiler warning) */
609
51.8M
uint32_t group_frame_type;  /* Specifies type for new group frames */
610
611
51.8M
BOOL condition;         /* Used in conditional groups */
612
51.8M
BOOL cur_is_word;       /* Used in "word" tests */
613
51.8M
BOOL prev_is_word;      /* Used in "word" tests */
614
615
/* UTF and UCP flags */
616
617
51.8M
#ifdef SUPPORT_UNICODE
618
51.8M
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
619
51.8M
BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
620
#else
621
BOOL utf = FALSE;  /* Required for convenience even when no Unicode support */
622
#endif
623
624
/* This is the length of the last part of a backtracking frame that must be
625
copied when a new frame is created. */
626
627
51.8M
frame_copy_size = frame_size - offsetof(heapframe, eptr);
628
629
/* Set up the first current frame at the start of the vector, and initialize
630
fields that are not reset for new frames. */
631
632
51.8M
F = mb->match_frames;
633
51.8M
Frdepth = 0;                        /* "Recursion" depth */
634
51.8M
Fcapture_last = 0;                  /* Number of most recent capture */
635
51.8M
Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
636
51.8M
Fstart_match = Feptr = start_eptr;  /* Current data pointer and start match */
637
51.8M
Fmark = NULL;                       /* Most recent mark */
638
51.8M
Foffset_top = 0;                    /* End of captures within the frame */
639
51.8M
Flast_group_offset = PCRE2_UNSET;   /* Saved frame of most recent group */
640
51.8M
group_frame_type = 0;               /* Not a start of group frame */
641
51.8M
goto NEW_FRAME;                     /* Start processing with this frame */
642
643
/* Come back here when we want to create a new frame for remembering a
644
backtracking point. */
645
646
1.09G
MATCH_RECURSE:
647
648
/* Set up a new backtracking frame. If the vector is full, get a new one
649
on the heap, doubling the size, but constrained by the heap limit. */
650
651
1.09G
N = (heapframe *)((char *)F + frame_size);
652
1.09G
if (N >= mb->match_frames_top)
653
14
  {
654
14
  PCRE2_SIZE newsize = mb->frame_vector_size * 2;
655
14
  heapframe *new;
656
657
14
  if ((newsize / 1024) > mb->heap_limit)
658
0
    {
659
0
    PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
660
0
    if (mb->frame_vector_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
661
0
    newsize = maxsize;
662
0
    }
663
664
14
  new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
665
14
  if (new == NULL) return PCRE2_ERROR_NOMEMORY;
666
14
  memcpy(new, mb->match_frames, mb->frame_vector_size);
667
668
14
  F = (heapframe *)((char *)new + ((char *)F - (char *)mb->match_frames));
669
14
  N = (heapframe *)((char *)F + frame_size);
670
671
14
  if (mb->match_frames != mb->stack_frames)
672
8
    mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
673
14
  mb->match_frames = new;
674
14
  mb->match_frames_top = (heapframe *)((char *)mb->match_frames + newsize);
675
14
  mb->frame_vector_size = newsize;
676
14
  }
677
678
#ifdef DEBUG_SHOW_RMATCH
679
fprintf(stderr, "++ RMATCH %2d frame=%d", Freturn_id, Frdepth + 1);
680
if (group_frame_type != 0)
681
  {
682
  fprintf(stderr, " type=%x ", group_frame_type);
683
  switch (GF_IDMASK(group_frame_type))
684
    {
685
    case GF_CAPTURE:
686
    fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
687
    break;
688
689
    case GF_NOCAPTURE:
690
    fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
691
    break;
692
693
    case GF_CONDASSERT:
694
    fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
695
    break;
696
697
    case GF_RECURSE:
698
    fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
699
    break;
700
701
    default:
702
    fprintf(stderr, "*** unknown ***");
703
    break;
704
    }
705
  }
706
fprintf(stderr, "\n");
707
#endif
708
709
/* Copy those fields that must be copied into the new frame, increase the
710
"recursion" depth (i.e. the new frame's index) and then make the new frame
711
current. */
712
713
1.09G
memcpy((char *)N + offsetof(heapframe, eptr),
714
1.09G
       (char *)F + offsetof(heapframe, eptr),
715
1.09G
       frame_copy_size);
716
717
1.09G
N->rdepth = Frdepth + 1;
718
1.09G
F = N;
719
720
/* Carry on processing with a new frame. */
721
722
1.14G
NEW_FRAME:
723
1.14G
Fgroup_frame_type = group_frame_type;
724
1.14G
Fecode = start_ecode;      /* Starting code pointer */
725
1.14G
Fback_frame = frame_size;  /* Default is go back one frame */
726
727
/* If this is a special type of group frame, remember its offset for quick
728
access at the end of the group. If this is a recursion, set a new current
729
recursion value. */
730
731
1.14G
if (group_frame_type != 0)
732
70.3M
  {
733
70.3M
  Flast_group_offset = (char *)F - (char *)mb->match_frames;
734
70.3M
  if (GF_IDMASK(group_frame_type) == GF_RECURSE)
735
0
    Fcurrent_recurse = GF_DATAMASK(group_frame_type);
736
70.3M
  group_frame_type = 0;
737
70.3M
  }
738
739
740
/* ========================================================================= */
741
/* This is the main processing loop. First check that we haven't recorded too
742
many backtracks (search tree is too large), or that we haven't exceeded the
743
recursive depth limit (used too many backtracking frames). If not, process the
744
opcodes. */
745
746
1.14G
if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
747
1.14G
if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
748
749
1.14G
for (;;)
750
1.77G
  {
751
#ifdef DEBUG_SHOW_OPS
752
fprintf(stderr, "++ op=%d\n", *Fecode);
753
#endif
754
755
1.77G
  Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
756
1.77G
  switch(Fop)
757
1.77G
    {
758
    /* ===================================================================== */
759
    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
760
    any currently open capturing brackets. Unlike reaching the end of a group,
761
    where we know the starting frame is at the top of the chained frames, in
762
    this case we have to search back for the relevant frame in case other types
763
    of group that use chained frames have intervened. Multiple OP_CLOSEs always
764
    come innermost first, which matches the chain order. We can ignore this in
765
    a recursion, because captures are not passed out of recursions. */
766
767
0
    case OP_CLOSE:
768
0
    if (Fcurrent_recurse == RECURSE_UNSET)
769
0
      {
770
0
      number = GET2(Fecode, 1);
771
0
      offset = Flast_group_offset;
772
0
      for(;;)
773
0
        {
774
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
775
0
        N = (heapframe *)((char *)mb->match_frames + offset);
776
0
        P = (heapframe *)((char *)N - frame_size);
777
0
        if (N->group_frame_type == (GF_CAPTURE | number)) break;
778
0
        offset = P->last_group_offset;
779
0
        }
780
0
      offset = (number << 1) - 2;
781
0
      Fcapture_last = number;
782
0
      Fovector[offset] = P->eptr - mb->start_subject;
783
0
      Fovector[offset+1] = Feptr - mb->start_subject;
784
0
      if (offset >= Foffset_top) Foffset_top = offset + 2;
785
0
      }
786
0
    Fecode += PRIV(OP_lengths)[*Fecode];
787
0
    break;
788
789
790
    /* ===================================================================== */
791
    /* Real or forced end of the pattern, assertion, or recursion. In an
792
    assertion ACCEPT, update the last used pointer and remember the current
793
    frame so that the captures and mark can be fished out of it. */
794
795
0
    case OP_ASSERT_ACCEPT:
796
0
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
797
0
    assert_accept_frame = F;
798
0
    RRETURN(MATCH_ACCEPT);
799
800
    /* If recursing, we have to find the most recent recursion. */
801
802
0
    case OP_ACCEPT:
803
2.56M
    case OP_END:
804
805
    /* Handle end of a recursion. */
806
807
2.56M
    if (Fcurrent_recurse != RECURSE_UNSET)
808
0
      {
809
0
      offset = Flast_group_offset;
810
0
      for(;;)
811
0
        {
812
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
813
0
        N = (heapframe *)((char *)mb->match_frames + offset);
814
0
        P = (heapframe *)((char *)N - frame_size);
815
0
        if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
816
0
        offset = P->last_group_offset;
817
0
        }
818
819
      /* N is now the frame of the recursion; the previous frame is at the
820
      OP_RECURSE position. Go back there, copying the current subject position
821
      and mark, and the start_match position (\K might have changed it), and
822
      then move on past the OP_RECURSE. */
823
824
0
      P->eptr = Feptr;
825
0
      P->mark = Fmark;
826
0
      P->start_match = Fstart_match;
827
0
      F = P;
828
0
      Fecode += 1 + LINK_SIZE;
829
0
      continue;
830
0
      }
831
832
    /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY
833
    is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the
834
    start of the subject. In both cases, backtracking will then try other
835
    alternatives, if any. */
836
837
2.56M
    if (Feptr == Fstart_match &&
838
2.56M
         ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
839
32.0k
           ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
840
32.0k
             Fstart_match == mb->start_subject + mb->start_offset)))
841
2.56M
      RRETURN(MATCH_NOMATCH);
842
843
    /* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not
844
    the end of the subject. After (*ACCEPT) we fail the entire match (at this
845
    position) but backtrack on reaching the end of the pattern. */
846
847
2.56M
    if (Feptr < mb->end_subject &&
848
2.56M
        ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
849
0
      {
850
0
      if (Fop == OP_END) RRETURN(MATCH_NOMATCH);
851
0
      return MATCH_NOMATCH;
852
0
      }
853
854
    /* We have a successful match of the whole pattern. Record the result and
855
    then do a direct return from the function. If there is space in the offset
856
    vector, set any pairs that follow the highest-numbered captured string but
857
    are less than the number of capturing groups in the pattern to PCRE2_UNSET.
858
    It is documented that this happens. "Gaps" are set to PCRE2_UNSET
859
    dynamically. It is only those at the end that need setting here. */
860
861
2.56M
    mb->end_match_ptr = Feptr;           /* Record where we ended */
862
2.56M
    mb->end_offset_top = Foffset_top;    /* and how many extracts were taken */
863
2.56M
    mb->mark = Fmark;                    /* and the last success mark */
864
2.56M
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
865
866
2.56M
    ovector[0] = Fstart_match - mb->start_subject;
867
2.56M
    ovector[1] = Feptr - mb->start_subject;
868
869
    /* Set i to the smaller of the sizes of the external and frame ovectors. */
870
871
2.56M
    i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
872
2.56M
    memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
873
5.40M
    while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
874
2.56M
    return MATCH_MATCH;  /* Note: NOT RRETURN */
875
876
877
    /*===================================================================== */
878
    /* Match any single character type except newline; have to take care with
879
    CRLF newlines and partial matching. */
880
881
217M
    case OP_ANY:
882
217M
    if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
883
214M
    if (mb->partial != 0 &&
884
214M
        Feptr == mb->end_subject - 1 &&
885
214M
        NLBLOCK->nltype == NLTYPE_FIXED &&
886
214M
        NLBLOCK->nllen == 2 &&
887
214M
        UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
888
0
      {
889
0
      mb->hitend = TRUE;
890
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
891
0
      }
892
    /* Fall through */
893
894
    /* Match any single character whatsoever. */
895
896
216M
    case OP_ALLANY:
897
216M
    if (Feptr >= mb->end_subject)  /* DO NOT merge the Feptr++ here; it must */
898
577k
      {                            /* not be updated before SCHECK_PARTIAL. */
899
577k
      SCHECK_PARTIAL();
900
577k
      RRETURN(MATCH_NOMATCH);
901
0
      }
902
215M
    Feptr++;
903
215M
#ifdef SUPPORT_UNICODE
904
215M
    if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
905
215M
#endif
906
215M
    Fecode++;
907
215M
    break;
908
909
910
    /* ===================================================================== */
911
    /* Match a single code unit, even in UTF mode. This opcode really does
912
    match any code unit, even newline. (It really should be called ANYCODEUNIT,
913
    of course - the byte name is from pre-16 bit days.) */
914
915
0
    case OP_ANYBYTE:
916
0
    if (Feptr >= mb->end_subject)   /* DO NOT merge the Feptr++ here; it must */
917
0
      {                             /* not be updated before SCHECK_PARTIAL. */
918
0
      SCHECK_PARTIAL();
919
0
      RRETURN(MATCH_NOMATCH);
920
0
      }
921
0
    Feptr++;
922
0
    Fecode++;
923
0
    break;
924
925
926
    /* ===================================================================== */
927
    /* Match a single character, casefully */
928
929
455M
    case OP_CHAR:
930
455M
#ifdef SUPPORT_UNICODE
931
455M
    if (utf)
932
0
      {
933
0
      Flength = 1;
934
0
      Fecode++;
935
0
      GETCHARLEN(fc, Fecode, Flength);
936
0
      if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
937
0
        {
938
0
        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
939
0
        RRETURN(MATCH_NOMATCH);
940
0
        }
941
0
      for (; Flength > 0; Flength--)
942
0
        {
943
0
        if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
944
0
        }
945
0
      }
946
455M
    else
947
455M
#endif
948
949
    /* Not UTF mode */
950
455M
      {
951
455M
      if (mb->end_subject - Feptr < 1)
952
3.06M
        {
953
3.06M
        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
954
3.06M
        RRETURN(MATCH_NOMATCH);
955
0
        }
956
452M
      if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
957
11.4M
      Fecode += 2;
958
11.4M
      }
959
11.4M
    break;
960
961
962
    /* ===================================================================== */
963
    /* Match a single character, caselessly. If we are at the end of the
964
    subject, give up immediately. We get here only when the pattern character
965
    has at most one other case. Characters with more than two cases are coded
966
    as OP_PROP with the pseudo-property PT_CLIST. */
967
968
160M
    case OP_CHARI:
969
160M
    if (Feptr >= mb->end_subject)
970
121k
      {
971
121k
      SCHECK_PARTIAL();
972
121k
      RRETURN(MATCH_NOMATCH);
973
0
      }
974
975
160M
#ifdef SUPPORT_UNICODE
976
160M
    if (utf)
977
0
      {
978
0
      Flength = 1;
979
0
      Fecode++;
980
0
      GETCHARLEN(fc, Fecode, Flength);
981
982
      /* If the pattern character's value is < 128, we know that its other case
983
      (if any) is also < 128 (and therefore only one code unit long in all
984
      code-unit widths), so we can use the fast lookup table. We checked above
985
      that there is at least one character left in the subject. */
986
987
0
      if (fc < 128)
988
0
        {
989
0
        uint32_t cc = UCHAR21(Feptr);
990
0
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
991
0
        Fecode++;
992
0
        Feptr++;
993
0
        }
994
995
      /* Otherwise we must pick up the subject character and use Unicode
996
      property support to test its other case. Note that we cannot use the
997
      value of "Flength" to check for sufficient bytes left, because the other
998
      case of the character may have more or fewer code units. */
999
1000
0
      else
1001
0
        {
1002
0
        uint32_t dc;
1003
0
        GETCHARINC(dc, Feptr);
1004
0
        Fecode += Flength;
1005
0
        if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1006
0
        }
1007
0
      }
1008
1009
    /* If UCP is set without UTF we must do the same as above, but with one
1010
    character per code unit. */
1011
1012
160M
    else if (ucp)
1013
0
      {
1014
0
      uint32_t cc = UCHAR21(Feptr);
1015
0
      fc = Fecode[1];
1016
0
      if (fc < 128)
1017
0
        {
1018
0
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1019
0
        }
1020
0
      else
1021
0
        {
1022
0
        if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1023
0
        }
1024
0
      Feptr++;
1025
0
      Fecode += 2;
1026
0
      }
1027
1028
160M
    else
1029
160M
#endif   /* SUPPORT_UNICODE */
1030
1031
    /* Not UTF or UCP mode; use the table for characters < 256. */
1032
160M
      {
1033
160M
      if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
1034
160M
          != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
1035
3.93M
      Feptr++;
1036
3.93M
      Fecode += 2;
1037
3.93M
      }
1038
3.93M
    break;
1039
1040
1041
    /* ===================================================================== */
1042
    /* Match not a single character. */
1043
1044
3.93M
    case OP_NOT:
1045
117
    case OP_NOTI:
1046
117
    if (Feptr >= mb->end_subject)
1047
0
      {
1048
0
      SCHECK_PARTIAL();
1049
0
      RRETURN(MATCH_NOMATCH);
1050
0
      }
1051
1052
117
#ifdef SUPPORT_UNICODE
1053
117
    if (utf)
1054
0
      {
1055
0
      uint32_t ch;
1056
0
      Fecode++;
1057
0
      GETCHARINC(ch, Fecode);
1058
0
      GETCHARINC(fc, Feptr);
1059
0
      if (ch == fc)
1060
0
        {
1061
0
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1062
0
        }
1063
0
      else if (Fop == OP_NOTI)   /* If caseless */
1064
0
        {
1065
0
        if (ch > 127)
1066
0
          ch = UCD_OTHERCASE(ch);
1067
0
        else
1068
0
          ch = (mb->fcc)[ch];
1069
0
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1070
0
        }
1071
0
      }
1072
1073
    /* UCP without UTF is as above, but with one character per code unit. */
1074
1075
117
    else if (ucp)
1076
0
      {
1077
0
      uint32_t ch;
1078
0
      fc = UCHAR21INC(Feptr);
1079
0
      ch = Fecode[1];
1080
0
      Fecode += 2;
1081
1082
0
      if (ch == fc)
1083
0
        {
1084
0
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1085
0
        }
1086
0
      else if (Fop == OP_NOTI)   /* If caseless */
1087
0
        {
1088
0
        if (ch > 127)
1089
0
          ch = UCD_OTHERCASE(ch);
1090
0
        else
1091
0
          ch = (mb->fcc)[ch];
1092
0
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1093
0
        }
1094
0
      }
1095
1096
117
    else
1097
117
#endif  /* SUPPORT_UNICODE */
1098
1099
    /* Neither UTF nor UCP is set */
1100
1101
117
      {
1102
117
      uint32_t ch = Fecode[1];
1103
117
      fc = UCHAR21INC(Feptr);
1104
117
      if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
1105
117
        RRETURN(MATCH_NOMATCH);
1106
117
      Fecode += 2;
1107
117
      }
1108
117
    break;
1109
1110
1111
    /* ===================================================================== */
1112
    /* Match a single character repeatedly. */
1113
1114
117
#define Loclength    F->temp_size
1115
126M
#define Lstart_eptr  F->temp_sptr[0]
1116
117
#define Lcharptr     F->temp_sptr[1]
1117
471M
#define Lmin         F->temp_32[0]
1118
342M
#define Lmax         F->temp_32[1]
1119
274M
#define Lc           F->temp_32[2]
1120
28.3M
#define Loc          F->temp_32[3]
1121
1122
20.3M
    case OP_EXACT:
1123
20.5M
    case OP_EXACTI:
1124
20.5M
    Lmin = Lmax = GET2(Fecode, 1);
1125
20.5M
    Fecode += 1 + IMM2_SIZE;
1126
20.5M
    goto REPEATCHAR;
1127
1128
0
    case OP_POSUPTO:
1129
0
    case OP_POSUPTOI:
1130
0
    reptype = REPTYPE_POS;
1131
0
    Lmin = 0;
1132
0
    Lmax = GET2(Fecode, 1);
1133
0
    Fecode += 1 + IMM2_SIZE;
1134
0
    goto REPEATCHAR;
1135
1136
0
    case OP_UPTO:
1137
0
    case OP_UPTOI:
1138
0
    reptype = REPTYPE_MAX;
1139
0
    Lmin = 0;
1140
0
    Lmax = GET2(Fecode, 1);
1141
0
    Fecode += 1 + IMM2_SIZE;
1142
0
    goto REPEATCHAR;
1143
1144
0
    case OP_MINUPTO:
1145
0
    case OP_MINUPTOI:
1146
0
    reptype = REPTYPE_MIN;
1147
0
    Lmin = 0;
1148
0
    Lmax = GET2(Fecode, 1);
1149
0
    Fecode += 1 + IMM2_SIZE;
1150
0
    goto REPEATCHAR;
1151
1152
7.43M
    case OP_POSSTAR:
1153
10.7M
    case OP_POSSTARI:
1154
10.7M
    reptype = REPTYPE_POS;
1155
10.7M
    Lmin = 0;
1156
10.7M
    Lmax = UINT32_MAX;
1157
10.7M
    Fecode++;
1158
10.7M
    goto REPEATCHAR;
1159
1160
2.19M
    case OP_POSPLUS:
1161
2.38M
    case OP_POSPLUSI:
1162
2.38M
    reptype = REPTYPE_POS;
1163
2.38M
    Lmin = 1;
1164
2.38M
    Lmax = UINT32_MAX;
1165
2.38M
    Fecode++;
1166
2.38M
    goto REPEATCHAR;
1167
1168
70.7M
    case OP_POSQUERY:
1169
74.2M
    case OP_POSQUERYI:
1170
74.2M
    reptype = REPTYPE_POS;
1171
74.2M
    Lmin = 0;
1172
74.2M
    Lmax = 1;
1173
74.2M
    Fecode++;
1174
74.2M
    goto REPEATCHAR;
1175
1176
13.3M
    case OP_STAR:
1177
19.2M
    case OP_STARI:
1178
19.2M
    case OP_MINSTAR:
1179
19.3M
    case OP_MINSTARI:
1180
19.3M
    case OP_PLUS:
1181
19.8M
    case OP_PLUSI:
1182
19.8M
    case OP_MINPLUS:
1183
20.2M
    case OP_MINPLUSI:
1184
21.3M
    case OP_QUERY:
1185
21.5M
    case OP_QUERYI:
1186
21.5M
    case OP_MINQUERY:
1187
21.5M
    case OP_MINQUERYI:
1188
21.5M
    fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
1189
21.5M
    Lmin = rep_min[fc];
1190
21.5M
    Lmax = rep_max[fc];
1191
21.5M
    reptype = rep_typ[fc];
1192
1193
    /* Common code for all repeated single-character matches. We first check
1194
    for the minimum number of characters. If the minimum equals the maximum, we
1195
    are done. Otherwise, if minimizing, check the rest of the pattern for a
1196
    match; if there isn't one, advance up to the maximum, one character at a
1197
    time.
1198
1199
    If maximizing, advance up to the maximum number of matching characters,
1200
    until Feptr is past the end of the maximum run. If possessive, we are
1201
    then done (no backing up). Otherwise, match at this position; anything
1202
    other than no match is immediately returned. For nomatch, back up one
1203
    character, unless we are matching \R and the last thing matched was
1204
    \r\n, in which case, back up two code units until we reach the first
1205
    optional character position.
1206
1207
    The various UTF/non-UTF and caseful/caseless cases are handled separately,
1208
    for speed. */
1209
1210
129M
    REPEATCHAR:
1211
129M
#ifdef SUPPORT_UNICODE
1212
129M
    if (utf)
1213
0
      {
1214
0
      Flength = 1;
1215
0
      Lcharptr = Fecode;
1216
0
      GETCHARLEN(fc, Fecode, Flength);
1217
0
      Fecode += Flength;
1218
1219
      /* Handle multi-code-unit character matching, caseful and caseless. */
1220
1221
0
      if (Flength > 1)
1222
0
        {
1223
0
        uint32_t othercase;
1224
1225
0
        if (Fop >= OP_STARI &&     /* Caseless */
1226
0
            (othercase = UCD_OTHERCASE(fc)) != fc)
1227
0
          Loclength = PRIV(ord2utf)(othercase, Foccu);
1228
0
        else Loclength = 0;
1229
1230
0
        for (i = 1; i <= Lmin; i++)
1231
0
          {
1232
0
          if (Feptr <= mb->end_subject - Flength &&
1233
0
            memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1234
0
          else if (Loclength > 0 &&
1235
0
                   Feptr <= mb->end_subject - Loclength &&
1236
0
                   memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1237
0
            Feptr += Loclength;
1238
0
          else
1239
0
            {
1240
0
            CHECK_PARTIAL();
1241
0
            RRETURN(MATCH_NOMATCH);
1242
0
            }
1243
0
          }
1244
1245
0
        if (Lmin == Lmax) continue;
1246
1247
0
        if (reptype == REPTYPE_MIN)
1248
0
          {
1249
0
          for (;;)
1250
0
            {
1251
0
            RMATCH(Fecode, RM202);
1252
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1253
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1254
0
            if (Feptr <= mb->end_subject - Flength &&
1255
0
              memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1256
0
            else if (Loclength > 0 &&
1257
0
                     Feptr <= mb->end_subject - Loclength &&
1258
0
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1259
0
              Feptr += Loclength;
1260
0
            else
1261
0
              {
1262
0
              CHECK_PARTIAL();
1263
0
              RRETURN(MATCH_NOMATCH);
1264
0
              }
1265
0
            }
1266
          /* Control never gets here */
1267
0
          }
1268
1269
0
        else  /* Maximize */
1270
0
          {
1271
0
          Lstart_eptr = Feptr;
1272
0
          for (i = Lmin; i < Lmax; i++)
1273
0
            {
1274
0
            if (Feptr <= mb->end_subject - Flength &&
1275
0
                memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
1276
0
              Feptr += Flength;
1277
0
            else if (Loclength > 0 &&
1278
0
                     Feptr <= mb->end_subject - Loclength &&
1279
0
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1280
0
              Feptr += Loclength;
1281
0
            else
1282
0
              {
1283
0
              CHECK_PARTIAL();
1284
0
              break;
1285
0
              }
1286
0
            }
1287
1288
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1289
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1290
          go too far. */
1291
1292
0
          if (reptype != REPTYPE_POS) for(;;)
1293
0
            {
1294
0
            if (Feptr <= Lstart_eptr) break;
1295
0
            RMATCH(Fecode, RM203);
1296
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1297
0
            Feptr--;
1298
0
            BACKCHAR(Feptr);
1299
0
            }
1300
0
          }
1301
0
        break;   /* End of repeated wide character handling */
1302
0
        }
1303
1304
      /* Length of UTF character is 1. Put it into the preserved variable and
1305
      fall through to the non-UTF code. */
1306
1307
0
      Lc = fc;
1308
0
      }
1309
129M
    else
1310
129M
#endif  /* SUPPORT_UNICODE */
1311
1312
    /* When not in UTF mode, load a single-code-unit character. Then proceed as
1313
    above, using Unicode casing if either UTF or UCP is set. */
1314
1315
129M
    Lc = *Fecode++;
1316
1317
    /* Caseless comparison */
1318
1319
129M
    if (Fop >= OP_STARI)
1320
14.1M
      {
1321
14.1M
#if PCRE2_CODE_UNIT_WIDTH == 8
1322
14.1M
#ifdef SUPPORT_UNICODE
1323
14.1M
      if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1324
14.1M
      else
1325
14.1M
#endif  /* SUPPORT_UNICODE */
1326
      /* Lc will be < 128 in UTF-8 mode. */
1327
14.1M
      Loc = mb->fcc[Lc];
1328
#else /* 16-bit & 32-bit */
1329
#ifdef SUPPORT_UNICODE
1330
      if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1331
      else
1332
#endif  /* SUPPORT_UNICODE */
1333
      Loc = TABLE_GET(Lc, mb->fcc, Lc);
1334
#endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
1335
1336
14.2M
      for (i = 1; i <= Lmin; i++)
1337
1.30M
        {
1338
1.30M
        uint32_t cc;                 /* Faster than PCRE2_UCHAR */
1339
1.30M
        if (Feptr >= mb->end_subject)
1340
2.87k
          {
1341
2.87k
          SCHECK_PARTIAL();
1342
2.87k
          RRETURN(MATCH_NOMATCH);
1343
0
          }
1344
1.29M
        cc = UCHAR21TEST(Feptr);
1345
1.29M
        if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1346
27.0k
        Feptr++;
1347
27.0k
        }
1348
12.9M
      if (Lmin == Lmax) continue;
1349
1350
12.9M
      if (reptype == REPTYPE_MIN)
1351
48.1k
        {
1352
48.1k
        for (;;)
1353
56.4k
          {
1354
56.4k
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1355
56.4k
          RMATCH(Fecode, RM25);
1356
56.4k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1357
56.4k
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1358
56.4k
          if (Feptr >= mb->end_subject)
1359
74
            {
1360
74
            SCHECK_PARTIAL();
1361
74
            RRETURN(MATCH_NOMATCH);
1362
0
            }
1363
56.3k
          cc = UCHAR21TEST(Feptr);
1364
56.3k
          if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1365
8.28k
          Feptr++;
1366
8.28k
          }
1367
        /* Control never gets here */
1368
48.1k
        }
1369
1370
12.8M
      else  /* Maximize */
1371
12.8M
        {
1372
12.8M
        Lstart_eptr = Feptr;
1373
12.9M
        for (i = Lmin; i < Lmax; i++)
1374
12.9M
          {
1375
12.9M
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1376
12.9M
          if (Feptr >= mb->end_subject)
1377
43.5k
            {
1378
43.5k
            SCHECK_PARTIAL();
1379
43.5k
            break;
1380
43.5k
            }
1381
12.9M
          cc = UCHAR21TEST(Feptr);
1382
12.9M
          if (Lc != cc && Loc != cc) break;
1383
107k
          Feptr++;
1384
107k
          }
1385
12.8M
        if (reptype != REPTYPE_POS) for (;;)
1386
6.07M
          {
1387
6.07M
          if (Feptr == Lstart_eptr) break;
1388
43.0k
          RMATCH(Fecode, RM26);
1389
42.9k
          Feptr--;
1390
42.9k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1391
42.9k
          }
1392
12.8M
        }
1393
12.9M
      }
1394
1395
    /* Caseful comparisons (includes all multi-byte characters) */
1396
1397
115M
    else
1398
115M
      {
1399
115M
      for (i = 1; i <= Lmin; i++)
1400
23.0M
        {
1401
23.0M
        if (Feptr >= mb->end_subject)
1402
3.87k
          {
1403
3.87k
          SCHECK_PARTIAL();
1404
3.87k
          RRETURN(MATCH_NOMATCH);
1405
0
          }
1406
23.0M
        if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1407
573k
        }
1408
1409
92.8M
      if (Lmin == Lmax) continue;
1410
1411
92.8M
      if (reptype == REPTYPE_MIN)
1412
39.4k
        {
1413
39.4k
        for (;;)
1414
46.1k
          {
1415
46.1k
          RMATCH(Fecode, RM27);
1416
46.0k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1417
46.0k
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1418
45.7k
          if (Feptr >= mb->end_subject)
1419
7
            {
1420
7
            SCHECK_PARTIAL();
1421
7
            RRETURN(MATCH_NOMATCH);
1422
0
            }
1423
45.7k
          if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1424
6.65k
          }
1425
        /* Control never gets here */
1426
39.4k
        }
1427
92.7M
      else  /* Maximize */
1428
92.7M
        {
1429
92.7M
        Lstart_eptr = Feptr;
1430
94.2M
        for (i = Lmin; i < Lmax; i++)
1431
93.7M
          {
1432
93.7M
          if (Feptr >= mb->end_subject)
1433
541k
            {
1434
541k
            SCHECK_PARTIAL();
1435
541k
            break;
1436
541k
            }
1437
1438
93.2M
          if (Lc != UCHAR21TEST(Feptr)) break;
1439
1.50M
          Feptr++;
1440
1.50M
          }
1441
1442
92.7M
        if (reptype != REPTYPE_POS) for (;;)
1443
14.6M
          {
1444
14.6M
          if (Feptr <= Lstart_eptr) break;
1445
187k
          RMATCH(Fecode, RM28);
1446
143k
          Feptr--;
1447
143k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1448
143k
          }
1449
92.7M
        }
1450
92.8M
      }
1451
105M
    break;
1452
1453
105M
#undef Loclength
1454
105M
#undef Lstart_eptr
1455
105M
#undef Lcharptr
1456
105M
#undef Lmin
1457
105M
#undef Lmax
1458
105M
#undef Lc
1459
105M
#undef Loc
1460
1461
1462
    /* ===================================================================== */
1463
    /* Match a negated single one-byte character repeatedly. This is almost a
1464
    repeat of the code for a repeated single character, but I haven't found a
1465
    nice way of commoning these up that doesn't require a test of the
1466
    positive/negative option for each character match. Maybe that wouldn't add
1467
    very much to the time taken, but character matching *is* what this is all
1468
    about... */
1469
1470
105M
#define Lstart_eptr  F->temp_sptr[0]
1471
105M
#define Lmin         F->temp_32[0]
1472
105M
#define Lmax         F->temp_32[1]
1473
105M
#define Lc           F->temp_32[2]
1474
105M
#define Loc          F->temp_32[3]
1475
1476
105M
    case OP_NOTEXACT:
1477
0
    case OP_NOTEXACTI:
1478
0
    Lmin = Lmax = GET2(Fecode, 1);
1479
0
    Fecode += 1 + IMM2_SIZE;
1480
0
    goto REPEATNOTCHAR;
1481
1482
0
    case OP_NOTUPTO:
1483
0
    case OP_NOTUPTOI:
1484
0
    Lmin = 0;
1485
0
    Lmax = GET2(Fecode, 1);
1486
0
    reptype = REPTYPE_MAX;
1487
0
    Fecode += 1 + IMM2_SIZE;
1488
0
    goto REPEATNOTCHAR;
1489
1490
0
    case OP_NOTMINUPTO:
1491
0
    case OP_NOTMINUPTOI:
1492
0
    Lmin = 0;
1493
0
    Lmax = GET2(Fecode, 1);
1494
0
    reptype = REPTYPE_MIN;
1495
0
    Fecode += 1 + IMM2_SIZE;
1496
0
    goto REPEATNOTCHAR;
1497
1498
768k
    case OP_NOTPOSSTAR:
1499
768k
    case OP_NOTPOSSTARI:
1500
768k
    reptype = REPTYPE_POS;
1501
768k
    Lmin = 0;
1502
768k
    Lmax = UINT32_MAX;
1503
768k
    Fecode++;
1504
768k
    goto REPEATNOTCHAR;
1505
1506
19.7k
    case OP_NOTPOSPLUS:
1507
19.7k
    case OP_NOTPOSPLUSI:
1508
19.7k
    reptype = REPTYPE_POS;
1509
19.7k
    Lmin = 1;
1510
19.7k
    Lmax = UINT32_MAX;
1511
19.7k
    Fecode++;
1512
19.7k
    goto REPEATNOTCHAR;
1513
1514
0
    case OP_NOTPOSQUERY:
1515
0
    case OP_NOTPOSQUERYI:
1516
0
    reptype = REPTYPE_POS;
1517
0
    Lmin = 0;
1518
0
    Lmax = 1;
1519
0
    Fecode++;
1520
0
    goto REPEATNOTCHAR;
1521
1522
0
    case OP_NOTPOSUPTO:
1523
0
    case OP_NOTPOSUPTOI:
1524
0
    reptype = REPTYPE_POS;
1525
0
    Lmin = 0;
1526
0
    Lmax = GET2(Fecode, 1);
1527
0
    Fecode += 1 + IMM2_SIZE;
1528
0
    goto REPEATNOTCHAR;
1529
1530
2.71k
    case OP_NOTSTAR:
1531
2.71k
    case OP_NOTSTARI:
1532
2.71k
    case OP_NOTMINSTAR:
1533
2.71k
    case OP_NOTMINSTARI:
1534
2.71k
    case OP_NOTPLUS:
1535
2.71k
    case OP_NOTPLUSI:
1536
2.71k
    case OP_NOTMINPLUS:
1537
2.71k
    case OP_NOTMINPLUSI:
1538
2.71k
    case OP_NOTQUERY:
1539
2.71k
    case OP_NOTQUERYI:
1540
2.71k
    case OP_NOTMINQUERY:
1541
2.71k
    case OP_NOTMINQUERYI:
1542
2.71k
    fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1543
2.71k
    Lmin = rep_min[fc];
1544
2.71k
    Lmax = rep_max[fc];
1545
2.71k
    reptype = rep_typ[fc];
1546
1547
    /* Common code for all repeated single-character non-matches. */
1548
1549
791k
    REPEATNOTCHAR:
1550
791k
    GETCHARINCTEST(Lc, Fecode);
1551
1552
    /* The code is duplicated for the caseless and caseful cases, for speed,
1553
    since matching characters is likely to be quite common. First, ensure the
1554
    minimum number of matches are present. If Lmin = Lmax, we are done.
1555
    Otherwise, if minimizing, keep trying the rest of the expression and
1556
    advancing one matching character if failing, up to the maximum.
1557
    Alternatively, if maximizing, find the maximum number of characters and
1558
    work backwards. */
1559
1560
791k
    if (Fop >= OP_NOTSTARI)     /* Caseless */
1561
0
      {
1562
0
#ifdef SUPPORT_UNICODE
1563
0
      if ((utf || ucp) && Lc > 127)
1564
0
        Loc = UCD_OTHERCASE(Lc);
1565
0
      else
1566
0
#endif /* SUPPORT_UNICODE */
1567
1568
0
      Loc = TABLE_GET(Lc, mb->fcc, Lc);  /* Other case from table */
1569
1570
0
#ifdef SUPPORT_UNICODE
1571
0
      if (utf)
1572
0
        {
1573
0
        uint32_t d;
1574
0
        for (i = 1; i <= Lmin; i++)
1575
0
          {
1576
0
          if (Feptr >= mb->end_subject)
1577
0
            {
1578
0
            SCHECK_PARTIAL();
1579
0
            RRETURN(MATCH_NOMATCH);
1580
0
            }
1581
0
          GETCHARINC(d, Feptr);
1582
0
          if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1583
0
          }
1584
0
        }
1585
0
      else
1586
0
#endif  /* SUPPORT_UNICODE */
1587
1588
      /* Not UTF mode */
1589
0
        {
1590
0
        for (i = 1; i <= Lmin; i++)
1591
0
          {
1592
0
          if (Feptr >= mb->end_subject)
1593
0
            {
1594
0
            SCHECK_PARTIAL();
1595
0
            RRETURN(MATCH_NOMATCH);
1596
0
            }
1597
0
          if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1598
0
          Feptr++;
1599
0
          }
1600
0
        }
1601
1602
0
      if (Lmin == Lmax) continue;  /* Finished for exact count */
1603
1604
0
      if (reptype == REPTYPE_MIN)
1605
0
        {
1606
0
#ifdef SUPPORT_UNICODE
1607
0
        if (utf)
1608
0
          {
1609
0
          uint32_t d;
1610
0
          for (;;)
1611
0
            {
1612
0
            RMATCH(Fecode, RM204);
1613
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1614
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1615
0
            if (Feptr >= mb->end_subject)
1616
0
              {
1617
0
              SCHECK_PARTIAL();
1618
0
              RRETURN(MATCH_NOMATCH);
1619
0
              }
1620
0
            GETCHARINC(d, Feptr);
1621
0
            if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1622
0
            }
1623
0
          }
1624
0
        else
1625
0
#endif  /*SUPPORT_UNICODE */
1626
1627
        /* Not UTF mode */
1628
0
          {
1629
0
          for (;;)
1630
0
            {
1631
0
            RMATCH(Fecode, RM29);
1632
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1633
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1634
0
            if (Feptr >= mb->end_subject)
1635
0
              {
1636
0
              SCHECK_PARTIAL();
1637
0
              RRETURN(MATCH_NOMATCH);
1638
0
              }
1639
0
            if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1640
0
            Feptr++;
1641
0
            }
1642
0
          }
1643
        /* Control never gets here */
1644
0
        }
1645
1646
      /* Maximize case */
1647
1648
0
      else
1649
0
        {
1650
0
        Lstart_eptr = Feptr;
1651
1652
0
#ifdef SUPPORT_UNICODE
1653
0
        if (utf)
1654
0
          {
1655
0
          uint32_t d;
1656
0
          for (i = Lmin; i < Lmax; i++)
1657
0
            {
1658
0
            int len = 1;
1659
0
            if (Feptr >= mb->end_subject)
1660
0
              {
1661
0
              SCHECK_PARTIAL();
1662
0
              break;
1663
0
              }
1664
0
            GETCHARLEN(d, Feptr, len);
1665
0
            if (Lc == d || Loc == d) break;
1666
0
            Feptr += len;
1667
0
            }
1668
1669
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1670
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1671
          go too far. */
1672
1673
0
          if (reptype != REPTYPE_POS) for(;;)
1674
0
            {
1675
0
            if (Feptr <= Lstart_eptr) break;
1676
0
            RMATCH(Fecode, RM205);
1677
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1678
0
            Feptr--;
1679
0
            BACKCHAR(Feptr);
1680
0
            }
1681
0
          }
1682
0
        else
1683
0
#endif  /* SUPPORT_UNICODE */
1684
1685
        /* Not UTF mode */
1686
0
          {
1687
0
          for (i = Lmin; i < Lmax; i++)
1688
0
            {
1689
0
            if (Feptr >= mb->end_subject)
1690
0
              {
1691
0
              SCHECK_PARTIAL();
1692
0
              break;
1693
0
              }
1694
0
            if (Lc == *Feptr || Loc == *Feptr) break;
1695
0
            Feptr++;
1696
0
            }
1697
0
          if (reptype != REPTYPE_POS) for (;;)
1698
0
            {
1699
0
            if (Feptr == Lstart_eptr) break;
1700
0
            RMATCH(Fecode, RM30);
1701
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1702
0
            Feptr--;
1703
0
            }
1704
0
          }
1705
0
        }
1706
0
      }
1707
1708
    /* Caseful comparisons */
1709
1710
791k
    else
1711
791k
      {
1712
791k
#ifdef SUPPORT_UNICODE
1713
791k
      if (utf)
1714
0
        {
1715
0
        uint32_t d;
1716
0
        for (i = 1; i <= Lmin; i++)
1717
0
          {
1718
0
          if (Feptr >= mb->end_subject)
1719
0
            {
1720
0
            SCHECK_PARTIAL();
1721
0
            RRETURN(MATCH_NOMATCH);
1722
0
            }
1723
0
          GETCHARINC(d, Feptr);
1724
0
          if (Lc == d) RRETURN(MATCH_NOMATCH);
1725
0
          }
1726
0
        }
1727
791k
      else
1728
791k
#endif
1729
      /* Not UTF mode */
1730
791k
        {
1731
808k
        for (i = 1; i <= Lmin; i++)
1732
19.7k
          {
1733
19.7k
          if (Feptr >= mb->end_subject)
1734
508
            {
1735
508
            SCHECK_PARTIAL();
1736
508
            RRETURN(MATCH_NOMATCH);
1737
0
            }
1738
19.2k
          if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1739
17.4k
          }
1740
791k
        }
1741
1742
789k
      if (Lmin == Lmax) continue;
1743
1744
789k
      if (reptype == REPTYPE_MIN)
1745
0
        {
1746
0
#ifdef SUPPORT_UNICODE
1747
0
        if (utf)
1748
0
          {
1749
0
          uint32_t d;
1750
0
          for (;;)
1751
0
            {
1752
0
            RMATCH(Fecode, RM206);
1753
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1754
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1755
0
            if (Feptr >= mb->end_subject)
1756
0
              {
1757
0
              SCHECK_PARTIAL();
1758
0
              RRETURN(MATCH_NOMATCH);
1759
0
              }
1760
0
            GETCHARINC(d, Feptr);
1761
0
            if (Lc == d) RRETURN(MATCH_NOMATCH);
1762
0
            }
1763
0
          }
1764
0
        else
1765
0
#endif
1766
        /* Not UTF mode */
1767
0
          {
1768
0
          for (;;)
1769
0
            {
1770
0
            RMATCH(Fecode, RM31);
1771
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1772
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1773
0
            if (Feptr >= mb->end_subject)
1774
0
              {
1775
0
              SCHECK_PARTIAL();
1776
0
              RRETURN(MATCH_NOMATCH);
1777
0
              }
1778
0
            if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1779
0
            }
1780
0
          }
1781
        /* Control never gets here */
1782
0
        }
1783
1784
      /* Maximize case */
1785
1786
789k
      else
1787
789k
        {
1788
789k
        Lstart_eptr = Feptr;
1789
1790
789k
#ifdef SUPPORT_UNICODE
1791
789k
        if (utf)
1792
0
          {
1793
0
          uint32_t d;
1794
0
          for (i = Lmin; i < Lmax; i++)
1795
0
            {
1796
0
            int len = 1;
1797
0
            if (Feptr >= mb->end_subject)
1798
0
              {
1799
0
              SCHECK_PARTIAL();
1800
0
              break;
1801
0
              }
1802
0
            GETCHARLEN(d, Feptr, len);
1803
0
            if (Lc == d) break;
1804
0
            Feptr += len;
1805
0
            }
1806
1807
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1808
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1809
          go too far. */
1810
1811
0
          if (reptype != REPTYPE_POS) for(;;)
1812
0
            {
1813
0
            if (Feptr <= Lstart_eptr) break;
1814
0
            RMATCH(Fecode, RM207);
1815
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1816
0
            Feptr--;
1817
0
            BACKCHAR(Feptr);
1818
0
            }
1819
0
          }
1820
789k
        else
1821
789k
#endif
1822
        /* Not UTF mode */
1823
789k
          {
1824
3.61M
          for (i = Lmin; i < Lmax; i++)
1825
3.61M
            {
1826
3.61M
            if (Feptr >= mb->end_subject)
1827
408k
              {
1828
408k
              SCHECK_PARTIAL();
1829
408k
              break;
1830
408k
              }
1831
3.20M
            if (Lc == *Feptr) break;
1832
2.82M
            Feptr++;
1833
2.82M
            }
1834
789k
          if (reptype != REPTYPE_POS) for (;;)
1835
55.8k
            {
1836
55.8k
            if (Feptr == Lstart_eptr) break;
1837
53.4k
            RMATCH(Fecode, RM32);
1838
53.1k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1839
53.1k
            Feptr--;
1840
53.1k
            }
1841
789k
          }
1842
789k
        }
1843
789k
      }
1844
788k
    break;
1845
1846
788k
#undef Lstart_eptr
1847
788k
#undef Lmin
1848
788k
#undef Lmax
1849
788k
#undef Lc
1850
788k
#undef Loc
1851
1852
1853
    /* ===================================================================== */
1854
    /* Match a bit-mapped character class, possibly repeatedly. These opcodes
1855
    are used when all the characters in the class have values in the range
1856
    0-255, and either the matching is caseful, or the characters are in the
1857
    range 0-127 when UTF processing is enabled. The only difference between
1858
    OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1859
    encountered. */
1860
1861
51.9M
#define Lmin               F->temp_32[0]
1862
57.5M
#define Lmax               F->temp_32[1]
1863
7.82M
#define Lstart_eptr        F->temp_sptr[0]
1864
64.7M
#define Lbyte_map_address  F->temp_sptr[1]
1865
47.9M
#define Lbyte_map          ((unsigned char *)Lbyte_map_address)
1866
1867
788k
    case OP_NCLASS:
1868
16.8M
    case OP_CLASS:
1869
16.8M
      {
1870
16.8M
      Lbyte_map_address = Fecode + 1;           /* Save for matching */
1871
16.8M
      Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
1872
1873
      /* Look past the end of the item to see if there is repeat information
1874
      following. Then obey similar code to character type repeats. */
1875
1876
16.8M
      switch (*Fecode)
1877
16.8M
        {
1878
275k
        case OP_CRSTAR:
1879
275k
        case OP_CRMINSTAR:
1880
1.49M
        case OP_CRPLUS:
1881
1.50M
        case OP_CRMINPLUS:
1882
1.52M
        case OP_CRQUERY:
1883
1.52M
        case OP_CRMINQUERY:
1884
1.63M
        case OP_CRPOSSTAR:
1885
3.60M
        case OP_CRPOSPLUS:
1886
3.60M
        case OP_CRPOSQUERY:
1887
3.60M
        fc = *Fecode++ - OP_CRSTAR;
1888
3.60M
        Lmin = rep_min[fc];
1889
3.60M
        Lmax = rep_max[fc];
1890
3.60M
        reptype = rep_typ[fc];
1891
3.60M
        break;
1892
1893
424k
        case OP_CRRANGE:
1894
424k
        case OP_CRMINRANGE:
1895
437k
        case OP_CRPOSRANGE:
1896
437k
        Lmin = GET2(Fecode, 1);
1897
437k
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
1898
437k
        if (Lmax == 0) Lmax = UINT32_MAX;       /* Max 0 => infinity */
1899
437k
        reptype = rep_typ[*Fecode - OP_CRSTAR];
1900
437k
        Fecode += 1 + 2 * IMM2_SIZE;
1901
437k
        break;
1902
1903
12.7M
        default:               /* No repeat follows */
1904
12.7M
        Lmin = Lmax = 1;
1905
12.7M
        break;
1906
16.8M
        }
1907
1908
      /* First, ensure the minimum number of matches are present. */
1909
1910
16.8M
#ifdef SUPPORT_UNICODE
1911
16.8M
      if (utf)
1912
0
        {
1913
0
        for (i = 1; i <= Lmin; i++)
1914
0
          {
1915
0
          if (Feptr >= mb->end_subject)
1916
0
            {
1917
0
            SCHECK_PARTIAL();
1918
0
            RRETURN(MATCH_NOMATCH);
1919
0
            }
1920
0
          GETCHARINC(fc, Feptr);
1921
0
          if (fc > 255)
1922
0
            {
1923
0
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1924
0
            }
1925
0
          else
1926
0
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1927
0
          }
1928
0
        }
1929
16.8M
      else
1930
16.8M
#endif
1931
      /* Not UTF mode */
1932
16.8M
        {
1933
23.9M
        for (i = 1; i <= Lmin; i++)
1934
16.3M
          {
1935
16.3M
          if (Feptr >= mb->end_subject)
1936
24.8k
            {
1937
24.8k
            SCHECK_PARTIAL();
1938
24.8k
            RRETURN(MATCH_NOMATCH);
1939
0
            }
1940
16.3M
          fc = *Feptr++;
1941
#if PCRE2_CODE_UNIT_WIDTH != 8
1942
          if (fc > 255)
1943
            {
1944
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1945
            }
1946
          else
1947
#endif
1948
16.3M
          if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1949
7.10M
          }
1950
16.8M
        }
1951
1952
      /* If Lmax == Lmin we are done. Continue with main loop. */
1953
1954
7.50M
      if (Lmin == Lmax) continue;
1955
1956
      /* If minimizing, keep testing the rest of the expression and advancing
1957
      the pointer while it matches the class. */
1958
1959
3.66M
      if (reptype == REPTYPE_MIN)
1960
12.1k
        {
1961
12.1k
#ifdef SUPPORT_UNICODE
1962
12.1k
        if (utf)
1963
0
          {
1964
0
          for (;;)
1965
0
            {
1966
0
            RMATCH(Fecode, RM200);
1967
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1968
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1969
0
            if (Feptr >= mb->end_subject)
1970
0
              {
1971
0
              SCHECK_PARTIAL();
1972
0
              RRETURN(MATCH_NOMATCH);
1973
0
              }
1974
0
            GETCHARINC(fc, Feptr);
1975
0
            if (fc > 255)
1976
0
              {
1977
0
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1978
0
              }
1979
0
            else
1980
0
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1981
0
            }
1982
0
          }
1983
12.1k
        else
1984
12.1k
#endif
1985
        /* Not UTF mode */
1986
12.1k
          {
1987
12.1k
          for (;;)
1988
58.1k
            {
1989
58.1k
            RMATCH(Fecode, RM23);
1990
49.1k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1991
49.1k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1992
49.1k
            if (Feptr >= mb->end_subject)
1993
0
              {
1994
0
              SCHECK_PARTIAL();
1995
0
              RRETURN(MATCH_NOMATCH);
1996
0
              }
1997
49.1k
            fc = *Feptr++;
1998
#if PCRE2_CODE_UNIT_WIDTH != 8
1999
            if (fc > 255)
2000
              {
2001
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2002
              }
2003
            else
2004
#endif
2005
49.1k
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2006
46.0k
            }
2007
12.1k
          }
2008
        /* Control never gets here */
2009
12.1k
        }
2010
2011
      /* If maximizing, find the longest possible run, then work backwards. */
2012
2013
3.65M
      else
2014
3.65M
        {
2015
3.65M
        Lstart_eptr = Feptr;
2016
2017
3.65M
#ifdef SUPPORT_UNICODE
2018
3.65M
        if (utf)
2019
0
          {
2020
0
          for (i = Lmin; i < Lmax; i++)
2021
0
            {
2022
0
            int len = 1;
2023
0
            if (Feptr >= mb->end_subject)
2024
0
              {
2025
0
              SCHECK_PARTIAL();
2026
0
              break;
2027
0
              }
2028
0
            GETCHARLEN(fc, Feptr, len);
2029
0
            if (fc > 255)
2030
0
              {
2031
0
              if (Fop == OP_CLASS) break;
2032
0
              }
2033
0
            else
2034
0
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2035
0
            Feptr += len;
2036
0
            }
2037
2038
0
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2039
2040
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2041
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2042
          go too far. */
2043
2044
0
          for (;;)
2045
0
            {
2046
0
            RMATCH(Fecode, RM201);
2047
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2048
0
            if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2049
0
            BACKCHAR(Feptr);
2050
0
            }
2051
0
          }
2052
3.65M
        else
2053
3.65M
#endif
2054
          /* Not UTF mode */
2055
3.65M
          {
2056
32.8M
          for (i = Lmin; i < Lmax; i++)
2057
32.7M
            {
2058
32.7M
            if (Feptr >= mb->end_subject)
2059
1.21M
              {
2060
1.21M
              SCHECK_PARTIAL();
2061
1.21M
              break;
2062
1.21M
              }
2063
31.5M
            fc = *Feptr;
2064
#if PCRE2_CODE_UNIT_WIDTH != 8
2065
            if (fc > 255)
2066
              {
2067
              if (Fop == OP_CLASS) break;
2068
              }
2069
            else
2070
#endif
2071
31.5M
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2072
29.1M
            Feptr++;
2073
29.1M
            }
2074
2075
3.65M
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2076
2077
4.16M
          while (Feptr >= Lstart_eptr)
2078
3.97M
            {
2079
3.97M
            RMATCH(Fecode, RM24);
2080
2.57M
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2081
2.57M
            Feptr--;
2082
2.57M
            }
2083
1.58M
          }
2084
2085
3.65M
        RRETURN(MATCH_NOMATCH);
2086
0
        }
2087
3.66M
      }
2088
    /* Control never gets here */
2089
2090
0
#undef Lbyte_map_address
2091
0
#undef Lbyte_map
2092
0
#undef Lstart_eptr
2093
0
#undef Lmin
2094
0
#undef Lmax
2095
2096
2097
    /* ===================================================================== */
2098
    /* Match an extended character class. In the 8-bit library, this opcode is
2099
    encountered only when UTF-8 mode mode is supported. In the 16-bit and
2100
    32-bit libraries, codepoints greater than 255 may be encountered even when
2101
    UTF is not supported. */
2102
2103
713
#define Lstart_eptr  F->temp_sptr[0]
2104
2.25k
#define Lxclass_data F->temp_sptr[1]
2105
3.40k
#define Lmin         F->temp_32[0]
2106
2.32k
#define Lmax         F->temp_32[1]
2107
2108
0
#ifdef SUPPORT_WIDE_CHARS
2109
865
    case OP_XCLASS:
2110
865
      {
2111
865
      Lxclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2112
865
      Fecode += GET(Fecode, 1);               /* Advance past the item */
2113
2114
865
      switch (*Fecode)
2115
865
        {
2116
98
        case OP_CRSTAR:
2117
98
        case OP_CRMINSTAR:
2118
98
        case OP_CRPLUS:
2119
98
        case OP_CRMINPLUS:
2120
98
        case OP_CRQUERY:
2121
98
        case OP_CRMINQUERY:
2122
98
        case OP_CRPOSSTAR:
2123
98
        case OP_CRPOSPLUS:
2124
98
        case OP_CRPOSQUERY:
2125
98
        fc = *Fecode++ - OP_CRSTAR;
2126
98
        Lmin = rep_min[fc];
2127
98
        Lmax = rep_max[fc];
2128
98
        reptype = rep_typ[fc];
2129
98
        break;
2130
2131
0
        case OP_CRRANGE:
2132
0
        case OP_CRMINRANGE:
2133
0
        case OP_CRPOSRANGE:
2134
0
        Lmin = GET2(Fecode, 1);
2135
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2136
0
        if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2137
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
2138
0
        Fecode += 1 + 2 * IMM2_SIZE;
2139
0
        break;
2140
2141
767
        default:               /* No repeat follows */
2142
767
        Lmin = Lmax = 1;
2143
767
        break;
2144
865
        }
2145
2146
      /* First, ensure the minimum number of matches are present. */
2147
2148
1.60k
      for (i = 1; i <= Lmin; i++)
2149
767
        {
2150
767
        if (Feptr >= mb->end_subject)
2151
0
          {
2152
0
          SCHECK_PARTIAL();
2153
0
          RRETURN(MATCH_NOMATCH);
2154
0
          }
2155
767
        GETCHARINCTEST(fc, Feptr);
2156
767
        if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2157
738
        }
2158
2159
      /* If Lmax == Lmin we can just continue with the main loop. */
2160
2161
836
      if (Lmin == Lmax) continue;
2162
2163
      /* If minimizing, keep testing the rest of the expression and advancing
2164
      the pointer while it matches the class. */
2165
2166
98
      if (reptype == REPTYPE_MIN)
2167
0
        {
2168
0
        for (;;)
2169
0
          {
2170
0
          RMATCH(Fecode, RM100);
2171
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2172
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2173
0
          if (Feptr >= mb->end_subject)
2174
0
            {
2175
0
            SCHECK_PARTIAL();
2176
0
            RRETURN(MATCH_NOMATCH);
2177
0
            }
2178
0
          GETCHARINCTEST(fc, Feptr);
2179
0
          if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2180
0
          }
2181
        /* Control never gets here */
2182
0
        }
2183
2184
      /* If maximizing, find the longest possible run, then work backwards. */
2185
2186
98
      else
2187
98
        {
2188
98
        Lstart_eptr = Feptr;
2189
619
        for (i = Lmin; i < Lmax; i++)
2190
619
          {
2191
619
          int len = 1;
2192
619
          if (Feptr >= mb->end_subject)
2193
1
            {
2194
1
            SCHECK_PARTIAL();
2195
1
            break;
2196
1
            }
2197
618
#ifdef SUPPORT_UNICODE
2198
618
          GETCHARLENTEST(fc, Feptr, len);
2199
#else
2200
          fc = *Feptr;
2201
#endif
2202
618
          if (!PRIV(xclass)(fc, Lxclass_data, utf)) break;
2203
521
          Feptr += len;
2204
521
          }
2205
2206
98
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2207
2208
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2209
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2210
        go too far. */
2211
2212
98
        for(;;)
2213
616
          {
2214
616
          RMATCH(Fecode, RM101);
2215
615
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2216
615
          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2217
518
#ifdef SUPPORT_UNICODE
2218
518
          if (utf) BACKCHAR(Feptr);
2219
518
#endif
2220
518
          }
2221
98
        RRETURN(MATCH_NOMATCH);
2222
0
        }
2223
2224
      /* Control never gets here */
2225
98
      }
2226
0
#endif  /* SUPPORT_WIDE_CHARS: end of XCLASS */
2227
2228
0
#undef Lstart_eptr
2229
0
#undef Lxclass_data
2230
0
#undef Lmin
2231
0
#undef Lmax
2232
2233
2234
    /* ===================================================================== */
2235
    /* Match various character types when PCRE2_UCP is not set. These opcodes
2236
    are not generated when PCRE2_UCP is set - instead appropriate property
2237
    tests are compiled. */
2238
2239
6.85M
    case OP_NOT_DIGIT:
2240
6.85M
    if (Feptr >= mb->end_subject)
2241
25.2k
      {
2242
25.2k
      SCHECK_PARTIAL();
2243
25.2k
      RRETURN(MATCH_NOMATCH);
2244
0
      }
2245
6.82M
    GETCHARINCTEST(fc, Feptr);
2246
6.82M
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
2247
6.36M
      RRETURN(MATCH_NOMATCH);
2248
6.36M
    Fecode++;
2249
6.36M
    break;
2250
2251
4.58M
    case OP_DIGIT:
2252
4.58M
    if (Feptr >= mb->end_subject)
2253
4.13k
      {
2254
4.13k
      SCHECK_PARTIAL();
2255
4.13k
      RRETURN(MATCH_NOMATCH);
2256
0
      }
2257
4.57M
    GETCHARINCTEST(fc, Feptr);
2258
4.57M
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
2259
2.36M
      RRETURN(MATCH_NOMATCH);
2260
2.36M
    Fecode++;
2261
2.36M
    break;
2262
2263
916k
    case OP_NOT_WHITESPACE:
2264
916k
    if (Feptr >= mb->end_subject)
2265
738
      {
2266
738
      SCHECK_PARTIAL();
2267
738
      RRETURN(MATCH_NOMATCH);
2268
0
      }
2269
915k
    GETCHARINCTEST(fc, Feptr);
2270
915k
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
2271
861k
      RRETURN(MATCH_NOMATCH);
2272
861k
    Fecode++;
2273
861k
    break;
2274
2275
292k
    case OP_WHITESPACE:
2276
292k
    if (Feptr >= mb->end_subject)
2277
1.25k
      {
2278
1.25k
      SCHECK_PARTIAL();
2279
1.25k
      RRETURN(MATCH_NOMATCH);
2280
0
      }
2281
291k
    GETCHARINCTEST(fc, Feptr);
2282
291k
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
2283
279k
      RRETURN(MATCH_NOMATCH);
2284
11.8k
    Fecode++;
2285
11.8k
    break;
2286
2287
272k
    case OP_NOT_WORDCHAR:
2288
272k
    if (Feptr >= mb->end_subject)
2289
3.70k
      {
2290
3.70k
      SCHECK_PARTIAL();
2291
3.70k
      RRETURN(MATCH_NOMATCH);
2292
0
      }
2293
269k
    GETCHARINCTEST(fc, Feptr);
2294
269k
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
2295
162k
      RRETURN(MATCH_NOMATCH);
2296
162k
    Fecode++;
2297
162k
    break;
2298
2299
2.57M
    case OP_WORDCHAR:
2300
2.57M
    if (Feptr >= mb->end_subject)
2301
22.7k
      {
2302
22.7k
      SCHECK_PARTIAL();
2303
22.7k
      RRETURN(MATCH_NOMATCH);
2304
0
      }
2305
2.54M
    GETCHARINCTEST(fc, Feptr);
2306
2.54M
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
2307
1.80M
      RRETURN(MATCH_NOMATCH);
2308
741k
    Fecode++;
2309
741k
    break;
2310
2311
141M
    case OP_ANYNL:
2312
141M
    if (Feptr >= mb->end_subject)
2313
516k
      {
2314
516k
      SCHECK_PARTIAL();
2315
516k
      RRETURN(MATCH_NOMATCH);
2316
0
      }
2317
140M
    GETCHARINCTEST(fc, Feptr);
2318
140M
    switch(fc)
2319
140M
      {
2320
136M
      default: RRETURN(MATCH_NOMATCH);
2321
2322
1.28M
      case CHAR_CR:
2323
1.28M
      if (Feptr >= mb->end_subject)
2324
883
        {
2325
883
        SCHECK_PARTIAL();
2326
883
        }
2327
1.28M
      else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
2328
1.28M
      break;
2329
2330
1.48M
      case CHAR_LF:
2331
1.48M
      break;
2332
2333
421k
      case CHAR_VT:
2334
513k
      case CHAR_FF:
2335
986k
      case CHAR_NEL:
2336
986k
#ifndef EBCDIC
2337
986k
      case 0x2028:
2338
986k
      case 0x2029:
2339
986k
#endif  /* Not EBCDIC */
2340
986k
      if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2341
986k
      break;
2342
140M
      }
2343
3.75M
    Fecode++;
2344
3.75M
    break;
2345
2346
10.7M
    case OP_NOT_HSPACE:
2347
10.7M
    if (Feptr >= mb->end_subject)
2348
3.52k
      {
2349
3.52k
      SCHECK_PARTIAL();
2350
3.52k
      RRETURN(MATCH_NOMATCH);
2351
0
      }
2352
10.7M
    GETCHARINCTEST(fc, Feptr);
2353
10.7M
    switch(fc)
2354
10.7M
      {
2355
241k
      HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2356
10.4M
      default: break;
2357
10.7M
      }
2358
10.4M
    Fecode++;
2359
10.4M
    break;
2360
2361
31.0M
    case OP_HSPACE:
2362
31.0M
    if (Feptr >= mb->end_subject)
2363
41.2k
      {
2364
41.2k
      SCHECK_PARTIAL();
2365
41.2k
      RRETURN(MATCH_NOMATCH);
2366
0
      }
2367
31.0M
    GETCHARINCTEST(fc, Feptr);
2368
31.0M
    switch(fc)
2369
31.0M
      {
2370
469k
      HSPACE_CASES: break;  /* Byte and multibyte cases */
2371
30.5M
      default: RRETURN(MATCH_NOMATCH);
2372
31.0M
      }
2373
469k
    Fecode++;
2374
469k
    break;
2375
2376
8.20M
    case OP_NOT_VSPACE:
2377
8.20M
    if (Feptr >= mb->end_subject)
2378
6.96k
      {
2379
6.96k
      SCHECK_PARTIAL();
2380
6.96k
      RRETURN(MATCH_NOMATCH);
2381
0
      }
2382
8.19M
    GETCHARINCTEST(fc, Feptr);
2383
8.19M
    switch(fc)
2384
8.19M
      {
2385
175k
      VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2386
8.01M
      default: break;
2387
8.19M
      }
2388
8.01M
    Fecode++;
2389
8.01M
    break;
2390
2391
807k
    case OP_VSPACE:
2392
807k
    if (Feptr >= mb->end_subject)
2393
10.6k
      {
2394
10.6k
      SCHECK_PARTIAL();
2395
10.6k
      RRETURN(MATCH_NOMATCH);
2396
0
      }
2397
797k
    GETCHARINCTEST(fc, Feptr);
2398
797k
    switch(fc)
2399
797k
      {
2400
79.7k
      VSPACE_CASES: break;
2401
717k
      default: RRETURN(MATCH_NOMATCH);
2402
797k
      }
2403
79.7k
    Fecode++;
2404
79.7k
    break;
2405
2406
2407
0
#ifdef SUPPORT_UNICODE
2408
2409
    /* ===================================================================== */
2410
    /* Check the next character by Unicode property. We will get here only
2411
    if the support is in the binary; otherwise a compile-time error occurs. */
2412
2413
93.5k
    case OP_PROP:
2414
338k
    case OP_NOTPROP:
2415
338k
    if (Feptr >= mb->end_subject)
2416
368
      {
2417
368
      SCHECK_PARTIAL();
2418
368
      RRETURN(MATCH_NOMATCH);
2419
0
      }
2420
338k
    GETCHARINCTEST(fc, Feptr);
2421
338k
      {
2422
338k
      const uint32_t *cp;
2423
338k
      const ucd_record *prop = GET_UCD(fc);
2424
2425
338k
      switch(Fecode[1])
2426
338k
        {
2427
0
        case PT_ANY:
2428
0
        if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2429
0
        break;
2430
2431
0
        case PT_LAMP:
2432
0
        if ((prop->chartype == ucp_Lu ||
2433
0
             prop->chartype == ucp_Ll ||
2434
0
             prop->chartype == ucp_Lt) == (Fop == OP_NOTPROP))
2435
0
          RRETURN(MATCH_NOMATCH);
2436
0
        break;
2437
2438
338k
        case PT_GC:
2439
338k
        if ((Fecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (Fop == OP_PROP))
2440
204k
          RRETURN(MATCH_NOMATCH);
2441
204k
        break;
2442
2443
0
        case PT_PC:
2444
0
        if ((Fecode[2] != prop->chartype) == (Fop == OP_PROP))
2445
0
          RRETURN(MATCH_NOMATCH);
2446
0
        break;
2447
2448
0
        case PT_SC:
2449
0
        if ((Fecode[2] != prop->script) == (Fop == OP_PROP))
2450
0
          RRETURN(MATCH_NOMATCH);
2451
0
        break;
2452
2453
        /* These are specials */
2454
2455
0
        case PT_ALNUM:
2456
0
        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2457
0
             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (Fop == OP_NOTPROP))
2458
0
          RRETURN(MATCH_NOMATCH);
2459
0
        break;
2460
2461
        /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2462
        which means that Perl space and POSIX space are now identical. PCRE
2463
        was changed at release 8.34. */
2464
2465
0
        case PT_SPACE:    /* Perl space */
2466
0
        case PT_PXSPACE:  /* POSIX space */
2467
0
        switch(fc)
2468
0
          {
2469
0
          HSPACE_CASES:
2470
0
          VSPACE_CASES:
2471
0
          if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2472
0
          break;
2473
2474
0
          default:
2475
0
          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2476
0
            (Fop == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2477
0
          break;
2478
0
          }
2479
0
        break;
2480
2481
0
        case PT_WORD:
2482
0
        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2483
0
             PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2484
0
             fc == CHAR_UNDERSCORE) == (Fop == OP_NOTPROP))
2485
0
          RRETURN(MATCH_NOMATCH);
2486
0
        break;
2487
2488
0
        case PT_CLIST:
2489
0
        cp = PRIV(ucd_caseless_sets) + Fecode[2];
2490
0
        for (;;)
2491
0
          {
2492
0
          if (fc < *cp)
2493
0
            { if (Fop == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2494
0
          if (fc == *cp++)
2495
0
            { if (Fop == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2496
0
          }
2497
0
        break;
2498
2499
0
        case PT_UCNC:
2500
0
        if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2501
0
             fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2502
0
             fc >= 0xe000) == (Fop == OP_NOTPROP))
2503
0
          RRETURN(MATCH_NOMATCH);
2504
0
        break;
2505
2506
        /* This should never occur */
2507
2508
0
        default:
2509
0
        return PCRE2_ERROR_INTERNAL;
2510
338k
        }
2511
2512
204k
      Fecode += 3;
2513
204k
      }
2514
0
    break;
2515
2516
2517
    /* ===================================================================== */
2518
    /* Match an extended Unicode sequence. We will get here only if the support
2519
    is in the binary; otherwise a compile-time error occurs. */
2520
2521
869k
    case OP_EXTUNI:
2522
869k
    if (Feptr >= mb->end_subject)
2523
1.80k
      {
2524
1.80k
      SCHECK_PARTIAL();
2525
1.80k
      RRETURN(MATCH_NOMATCH);
2526
0
      }
2527
867k
    else
2528
867k
      {
2529
867k
      GETCHARINCTEST(fc, Feptr);
2530
867k
      Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
2531
867k
        NULL);
2532
867k
      }
2533
867k
    CHECK_PARTIAL();
2534
867k
    Fecode++;
2535
867k
    break;
2536
2537
0
#endif  /* SUPPORT_UNICODE */
2538
2539
2540
    /* ===================================================================== */
2541
    /* Match a single character type repeatedly. Note that the property type
2542
    does not need to be in a stack frame as it is not used within an RMATCH()
2543
    loop. */
2544
2545
1.52G
#define Lstart_eptr  F->temp_sptr[0]
2546
666M
#define Lmin         F->temp_32[0]
2547
3.63G
#define Lmax         F->temp_32[1]
2548
1.55G
#define Lctype       F->temp_32[2]
2549
3.47M
#define Lpropvalue   F->temp_32[3]
2550
2551
8.30M
    case OP_TYPEEXACT:
2552
8.30M
    Lmin = Lmax = GET2(Fecode, 1);
2553
8.30M
    Fecode += 1 + IMM2_SIZE;
2554
8.30M
    goto REPEATTYPE;
2555
2556
0
    case OP_TYPEUPTO:
2557
0
    case OP_TYPEMINUPTO:
2558
0
    Lmin = 0;
2559
0
    Lmax = GET2(Fecode, 1);
2560
0
    reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
2561
0
    Fecode += 1 + IMM2_SIZE;
2562
0
    goto REPEATTYPE;
2563
2564
11.4M
    case OP_TYPEPOSSTAR:
2565
11.4M
    reptype = REPTYPE_POS;
2566
11.4M
    Lmin = 0;
2567
11.4M
    Lmax = UINT32_MAX;
2568
11.4M
    Fecode++;
2569
11.4M
    goto REPEATTYPE;
2570
2571
51.5M
    case OP_TYPEPOSPLUS:
2572
51.5M
    reptype = REPTYPE_POS;
2573
51.5M
    Lmin = 1;
2574
51.5M
    Lmax = UINT32_MAX;
2575
51.5M
    Fecode++;
2576
51.5M
    goto REPEATTYPE;
2577
2578
3.93M
    case OP_TYPEPOSQUERY:
2579
3.93M
    reptype = REPTYPE_POS;
2580
3.93M
    Lmin = 0;
2581
3.93M
    Lmax = 1;
2582
3.93M
    Fecode++;
2583
3.93M
    goto REPEATTYPE;
2584
2585
0
    case OP_TYPEPOSUPTO:
2586
0
    reptype = REPTYPE_POS;
2587
0
    Lmin = 0;
2588
0
    Lmax = GET2(Fecode, 1);
2589
0
    Fecode += 1 + IMM2_SIZE;
2590
0
    goto REPEATTYPE;
2591
2592
9.91M
    case OP_TYPESTAR:
2593
11.1M
    case OP_TYPEMINSTAR:
2594
21.4M
    case OP_TYPEPLUS:
2595
22.7M
    case OP_TYPEMINPLUS:
2596
56.2M
    case OP_TYPEQUERY:
2597
56.9M
    case OP_TYPEMINQUERY:
2598
56.9M
    fc = *Fecode++ - OP_TYPESTAR;
2599
56.9M
    Lmin = rep_min[fc];
2600
56.9M
    Lmax = rep_max[fc];
2601
56.9M
    reptype = rep_typ[fc];
2602
2603
    /* Common code for all repeated character type matches. */
2604
2605
132M
    REPEATTYPE:
2606
132M
    Lctype = *Fecode++;      /* Code for the character type */
2607
2608
132M
#ifdef SUPPORT_UNICODE
2609
132M
    if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
2610
215k
      {
2611
215k
      proptype = *Fecode++;
2612
215k
      Lpropvalue = *Fecode++;
2613
215k
      }
2614
131M
    else proptype = -1;
2615
132M
#endif
2616
2617
    /* First, ensure the minimum number of matches are present. Use inline
2618
    code for maximizing the speed, and do the type test once at the start
2619
    (i.e. keep it out of the loop). The code for UTF mode is separated out for
2620
    tidiness, except for Unicode property tests. */
2621
2622
132M
    if (Lmin > 0)
2623
71.3M
      {
2624
71.3M
#ifdef SUPPORT_UNICODE
2625
71.3M
      if (proptype >= 0)  /* Property tests in all modes */
2626
202k
        {
2627
202k
        switch(proptype)
2628
202k
          {
2629
0
          case PT_ANY:
2630
0
          if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2631
0
          for (i = 1; i <= Lmin; i++)
2632
0
            {
2633
0
            if (Feptr >= mb->end_subject)
2634
0
              {
2635
0
              SCHECK_PARTIAL();
2636
0
              RRETURN(MATCH_NOMATCH);
2637
0
              }
2638
0
            GETCHARINCTEST(fc, Feptr);
2639
0
            }
2640
0
          break;
2641
2642
0
          case PT_LAMP:
2643
0
          for (i = 1; i <= Lmin; i++)
2644
0
            {
2645
0
            int chartype;
2646
0
            if (Feptr >= mb->end_subject)
2647
0
              {
2648
0
              SCHECK_PARTIAL();
2649
0
              RRETURN(MATCH_NOMATCH);
2650
0
              }
2651
0
            GETCHARINCTEST(fc, Feptr);
2652
0
            chartype = UCD_CHARTYPE(fc);
2653
0
            if ((chartype == ucp_Lu ||
2654
0
                 chartype == ucp_Ll ||
2655
0
                 chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
2656
0
              RRETURN(MATCH_NOMATCH);
2657
0
            }
2658
0
          break;
2659
2660
202k
          case PT_GC:
2661
348k
          for (i = 1; i <= Lmin; i++)
2662
202k
            {
2663
202k
            if (Feptr >= mb->end_subject)
2664
0
              {
2665
0
              SCHECK_PARTIAL();
2666
0
              RRETURN(MATCH_NOMATCH);
2667
0
              }
2668
202k
            GETCHARINCTEST(fc, Feptr);
2669
202k
            if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
2670
145k
              RRETURN(MATCH_NOMATCH);
2671
145k
            }
2672
145k
          break;
2673
2674
145k
          case PT_PC:
2675
0
          for (i = 1; i <= Lmin; i++)
2676
0
            {
2677
0
            if (Feptr >= mb->end_subject)
2678
0
              {
2679
0
              SCHECK_PARTIAL();
2680
0
              RRETURN(MATCH_NOMATCH);
2681
0
              }
2682
0
            GETCHARINCTEST(fc, Feptr);
2683
0
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
2684
0
              RRETURN(MATCH_NOMATCH);
2685
0
            }
2686
0
          break;
2687
2688
0
          case PT_SC:
2689
0
          for (i = 1; i <= Lmin; i++)
2690
0
            {
2691
0
            if (Feptr >= mb->end_subject)
2692
0
              {
2693
0
              SCHECK_PARTIAL();
2694
0
              RRETURN(MATCH_NOMATCH);
2695
0
              }
2696
0
            GETCHARINCTEST(fc, Feptr);
2697
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
2698
0
              RRETURN(MATCH_NOMATCH);
2699
0
            }
2700
0
          break;
2701
2702
0
          case PT_ALNUM:
2703
0
          for (i = 1; i <= Lmin; i++)
2704
0
            {
2705
0
            int category;
2706
0
            if (Feptr >= mb->end_subject)
2707
0
              {
2708
0
              SCHECK_PARTIAL();
2709
0
              RRETURN(MATCH_NOMATCH);
2710
0
              }
2711
0
            GETCHARINCTEST(fc, Feptr);
2712
0
            category = UCD_CATEGORY(fc);
2713
0
            if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
2714
0
              RRETURN(MATCH_NOMATCH);
2715
0
            }
2716
0
          break;
2717
2718
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2719
          which means that Perl space and POSIX space are now identical. PCRE
2720
          was changed at release 8.34. */
2721
2722
0
          case PT_SPACE:    /* Perl space */
2723
0
          case PT_PXSPACE:  /* POSIX space */
2724
0
          for (i = 1; i <= Lmin; i++)
2725
0
            {
2726
0
            if (Feptr >= mb->end_subject)
2727
0
              {
2728
0
              SCHECK_PARTIAL();
2729
0
              RRETURN(MATCH_NOMATCH);
2730
0
              }
2731
0
            GETCHARINCTEST(fc, Feptr);
2732
0
            switch(fc)
2733
0
              {
2734
0
              HSPACE_CASES:
2735
0
              VSPACE_CASES:
2736
0
              if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2737
0
              break;
2738
2739
0
              default:
2740
0
              if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
2741
0
                RRETURN(MATCH_NOMATCH);
2742
0
              break;
2743
0
              }
2744
0
            }
2745
0
          break;
2746
2747
0
          case PT_WORD:
2748
0
          for (i = 1; i <= Lmin; i++)
2749
0
            {
2750
0
            int category;
2751
0
            if (Feptr >= mb->end_subject)
2752
0
              {
2753
0
              SCHECK_PARTIAL();
2754
0
              RRETURN(MATCH_NOMATCH);
2755
0
              }
2756
0
            GETCHARINCTEST(fc, Feptr);
2757
0
            category = UCD_CATEGORY(fc);
2758
0
            if ((category == ucp_L || category == ucp_N ||
2759
0
                fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
2760
0
              RRETURN(MATCH_NOMATCH);
2761
0
            }
2762
0
          break;
2763
2764
0
          case PT_CLIST:
2765
0
          for (i = 1; i <= Lmin; i++)
2766
0
            {
2767
0
            const uint32_t *cp;
2768
0
            if (Feptr >= mb->end_subject)
2769
0
              {
2770
0
              SCHECK_PARTIAL();
2771
0
              RRETURN(MATCH_NOMATCH);
2772
0
              }
2773
0
            GETCHARINCTEST(fc, Feptr);
2774
0
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
2775
0
            for (;;)
2776
0
              {
2777
0
              if (fc < *cp)
2778
0
                {
2779
0
                if (Lctype == OP_NOTPROP) break;
2780
0
                RRETURN(MATCH_NOMATCH);
2781
0
                }
2782
0
              if (fc == *cp++)
2783
0
                {
2784
0
                if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2785
0
                break;
2786
0
                }
2787
0
              }
2788
0
            }
2789
0
          break;
2790
2791
0
          case PT_UCNC:
2792
0
          for (i = 1; i <= Lmin; i++)
2793
0
            {
2794
0
            if (Feptr >= mb->end_subject)
2795
0
              {
2796
0
              SCHECK_PARTIAL();
2797
0
              RRETURN(MATCH_NOMATCH);
2798
0
              }
2799
0
            GETCHARINCTEST(fc, Feptr);
2800
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2801
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2802
0
                 fc >= 0xe000) == (Lctype == OP_NOTPROP))
2803
0
              RRETURN(MATCH_NOMATCH);
2804
0
            }
2805
0
          break;
2806
2807
          /* This should not occur */
2808
2809
0
          default:
2810
0
          return PCRE2_ERROR_INTERNAL;
2811
202k
          }
2812
202k
        }
2813
2814
      /* Match extended Unicode sequences. We will get here only if the
2815
      support is in the binary; otherwise a compile-time error occurs. */
2816
2817
71.1M
      else if (Lctype == OP_EXTUNI)
2818
520k
        {
2819
1.04M
        for (i = 1; i <= Lmin; i++)
2820
520k
          {
2821
520k
          if (Feptr >= mb->end_subject)
2822
259
            {
2823
259
            SCHECK_PARTIAL();
2824
259
            RRETURN(MATCH_NOMATCH);
2825
0
            }
2826
520k
          else
2827
520k
            {
2828
520k
            GETCHARINCTEST(fc, Feptr);
2829
520k
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
2830
520k
              mb->end_subject, utf, NULL);
2831
520k
            }
2832
520k
          CHECK_PARTIAL();
2833
520k
          }
2834
520k
        }
2835
70.6M
      else
2836
70.6M
#endif     /* SUPPORT_UNICODE */
2837
2838
/* Handle all other cases in UTF mode */
2839
2840
70.6M
#ifdef SUPPORT_UNICODE
2841
70.6M
      if (utf) switch(Lctype)
2842
0
        {
2843
0
        case OP_ANY:
2844
0
        for (i = 1; i <= Lmin; i++)
2845
0
          {
2846
0
          if (Feptr >= mb->end_subject)
2847
0
            {
2848
0
            SCHECK_PARTIAL();
2849
0
            RRETURN(MATCH_NOMATCH);
2850
0
            }
2851
0
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
2852
0
          if (mb->partial != 0 &&
2853
0
              Feptr + 1 >= mb->end_subject &&
2854
0
              NLBLOCK->nltype == NLTYPE_FIXED &&
2855
0
              NLBLOCK->nllen == 2 &&
2856
0
              UCHAR21(Feptr) == NLBLOCK->nl[0])
2857
0
            {
2858
0
            mb->hitend = TRUE;
2859
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
2860
0
            }
2861
0
          Feptr++;
2862
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2863
0
          }
2864
0
        break;
2865
2866
0
        case OP_ALLANY:
2867
0
        for (i = 1; i <= Lmin; i++)
2868
0
          {
2869
0
          if (Feptr >= mb->end_subject)
2870
0
            {
2871
0
            SCHECK_PARTIAL();
2872
0
            RRETURN(MATCH_NOMATCH);
2873
0
            }
2874
0
          Feptr++;
2875
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2876
0
          }
2877
0
        break;
2878
2879
0
        case OP_ANYBYTE:
2880
0
        if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
2881
0
        Feptr += Lmin;
2882
0
        break;
2883
2884
0
        case OP_ANYNL:
2885
0
        for (i = 1; i <= Lmin; i++)
2886
0
          {
2887
0
          if (Feptr >= mb->end_subject)
2888
0
            {
2889
0
            SCHECK_PARTIAL();
2890
0
            RRETURN(MATCH_NOMATCH);
2891
0
            }
2892
0
          GETCHARINC(fc, Feptr);
2893
0
          switch(fc)
2894
0
            {
2895
0
            default: RRETURN(MATCH_NOMATCH);
2896
2897
0
            case CHAR_CR:
2898
0
            if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
2899
0
            break;
2900
2901
0
            case CHAR_LF:
2902
0
            break;
2903
2904
0
            case CHAR_VT:
2905
0
            case CHAR_FF:
2906
0
            case CHAR_NEL:
2907
0
#ifndef EBCDIC
2908
0
            case 0x2028:
2909
0
            case 0x2029:
2910
0
#endif  /* Not EBCDIC */
2911
0
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2912
0
            break;
2913
0
            }
2914
0
          }
2915
0
        break;
2916
2917
0
        case OP_NOT_HSPACE:
2918
0
        for (i = 1; i <= Lmin; i++)
2919
0
          {
2920
0
          if (Feptr >= mb->end_subject)
2921
0
            {
2922
0
            SCHECK_PARTIAL();
2923
0
            RRETURN(MATCH_NOMATCH);
2924
0
            }
2925
0
          GETCHARINC(fc, Feptr);
2926
0
          switch(fc)
2927
0
            {
2928
0
            HSPACE_CASES: RRETURN(MATCH_NOMATCH);
2929
0
            default: break;
2930
0
            }
2931
0
          }
2932
0
        break;
2933
2934
0
        case OP_HSPACE:
2935
0
        for (i = 1; i <= Lmin; i++)
2936
0
          {
2937
0
          if (Feptr >= mb->end_subject)
2938
0
            {
2939
0
            SCHECK_PARTIAL();
2940
0
            RRETURN(MATCH_NOMATCH);
2941
0
            }
2942
0
          GETCHARINC(fc, Feptr);
2943
0
          switch(fc)
2944
0
            {
2945
0
            HSPACE_CASES: break;
2946
0
            default: RRETURN(MATCH_NOMATCH);
2947
0
            }
2948
0
          }
2949
0
        break;
2950
2951
0
        case OP_NOT_VSPACE:
2952
0
        for (i = 1; i <= Lmin; i++)
2953
0
          {
2954
0
          if (Feptr >= mb->end_subject)
2955
0
            {
2956
0
            SCHECK_PARTIAL();
2957
0
            RRETURN(MATCH_NOMATCH);
2958
0
            }
2959
0
          GETCHARINC(fc, Feptr);
2960
0
          switch(fc)
2961
0
            {
2962
0
            VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2963
0
            default: break;
2964
0
            }
2965
0
          }
2966
0
        break;
2967
2968
0
        case OP_VSPACE:
2969
0
        for (i = 1; i <= Lmin; i++)
2970
0
          {
2971
0
          if (Feptr >= mb->end_subject)
2972
0
            {
2973
0
            SCHECK_PARTIAL();
2974
0
            RRETURN(MATCH_NOMATCH);
2975
0
            }
2976
0
          GETCHARINC(fc, Feptr);
2977
0
          switch(fc)
2978
0
            {
2979
0
            VSPACE_CASES: break;
2980
0
            default: RRETURN(MATCH_NOMATCH);
2981
0
            }
2982
0
          }
2983
0
        break;
2984
2985
0
        case OP_NOT_DIGIT:
2986
0
        for (i = 1; i <= Lmin; i++)
2987
0
          {
2988
0
          if (Feptr >= mb->end_subject)
2989
0
            {
2990
0
            SCHECK_PARTIAL();
2991
0
            RRETURN(MATCH_NOMATCH);
2992
0
            }
2993
0
          GETCHARINC(fc, Feptr);
2994
0
          if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
2995
0
            RRETURN(MATCH_NOMATCH);
2996
0
          }
2997
0
        break;
2998
2999
0
        case OP_DIGIT:
3000
0
        for (i = 1; i <= Lmin; i++)
3001
0
          {
3002
0
          uint32_t cc;
3003
0
          if (Feptr >= mb->end_subject)
3004
0
            {
3005
0
            SCHECK_PARTIAL();
3006
0
            RRETURN(MATCH_NOMATCH);
3007
0
            }
3008
0
          cc = UCHAR21(Feptr);
3009
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
3010
0
            RRETURN(MATCH_NOMATCH);
3011
0
          Feptr++;
3012
          /* No need to skip more code units - we know it has only one. */
3013
0
          }
3014
0
        break;
3015
3016
0
        case OP_NOT_WHITESPACE:
3017
0
        for (i = 1; i <= Lmin; i++)
3018
0
          {
3019
0
          uint32_t cc;
3020
0
          if (Feptr >= mb->end_subject)
3021
0
            {
3022
0
            SCHECK_PARTIAL();
3023
0
            RRETURN(MATCH_NOMATCH);
3024
0
            }
3025
0
          cc = UCHAR21(Feptr);
3026
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
3027
0
            RRETURN(MATCH_NOMATCH);
3028
0
          Feptr++;
3029
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3030
0
          }
3031
0
        break;
3032
3033
0
        case OP_WHITESPACE:
3034
0
        for (i = 1; i <= Lmin; i++)
3035
0
          {
3036
0
          uint32_t cc;
3037
0
          if (Feptr >= mb->end_subject)
3038
0
            {
3039
0
            SCHECK_PARTIAL();
3040
0
            RRETURN(MATCH_NOMATCH);
3041
0
            }
3042
0
          cc = UCHAR21(Feptr);
3043
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
3044
0
            RRETURN(MATCH_NOMATCH);
3045
0
          Feptr++;
3046
          /* No need to skip more code units - we know it has only one. */
3047
0
          }
3048
0
        break;
3049
3050
0
        case OP_NOT_WORDCHAR:
3051
0
        for (i = 1; i <= Lmin; i++)
3052
0
          {
3053
0
          uint32_t cc;
3054
0
          if (Feptr >= mb->end_subject)
3055
0
            {
3056
0
            SCHECK_PARTIAL();
3057
0
            RRETURN(MATCH_NOMATCH);
3058
0
            }
3059
0
          cc = UCHAR21(Feptr);
3060
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
3061
0
            RRETURN(MATCH_NOMATCH);
3062
0
          Feptr++;
3063
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3064
0
          }
3065
0
        break;
3066
3067
0
        case OP_WORDCHAR:
3068
0
        for (i = 1; i <= Lmin; i++)
3069
0
          {
3070
0
          uint32_t cc;
3071
0
          if (Feptr >= mb->end_subject)
3072
0
            {
3073
0
            SCHECK_PARTIAL();
3074
0
            RRETURN(MATCH_NOMATCH);
3075
0
            }
3076
0
          cc = UCHAR21(Feptr);
3077
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
3078
0
            RRETURN(MATCH_NOMATCH);
3079
0
          Feptr++;
3080
          /* No need to skip more code units - we know it has only one. */
3081
0
          }
3082
0
        break;
3083
3084
0
        default:
3085
0
        return PCRE2_ERROR_INTERNAL;
3086
0
        }  /* End switch(Lctype) */
3087
3088
70.6M
      else
3089
70.6M
#endif     /* SUPPORT_UNICODE */
3090
3091
      /* Code for the non-UTF case for minimum matching of operators other
3092
      than OP_PROP and OP_NOTPROP. */
3093
3094
70.6M
      switch(Lctype)
3095
70.6M
        {
3096
232k
        case OP_ANY:
3097
464k
        for (i = 1; i <= Lmin; i++)
3098
244k
          {
3099
244k
          if (Feptr >= mb->end_subject)
3100
5.24k
            {
3101
5.24k
            SCHECK_PARTIAL();
3102
5.24k
            RRETURN(MATCH_NOMATCH);
3103
0
            }
3104
239k
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3105
232k
          if (mb->partial != 0 &&
3106
232k
              Feptr + 1 >= mb->end_subject &&
3107
232k
              NLBLOCK->nltype == NLTYPE_FIXED &&
3108
232k
              NLBLOCK->nllen == 2 &&
3109
232k
              *Feptr == NLBLOCK->nl[0])
3110
0
            {
3111
0
            mb->hitend = TRUE;
3112
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3113
0
            }
3114
232k
          Feptr++;
3115
232k
          }
3116
219k
        break;
3117
3118
219k
        case OP_ALLANY:
3119
24.2k
        if (Feptr > mb->end_subject - Lmin)
3120
188
          {
3121
188
          SCHECK_PARTIAL();
3122
188
          RRETURN(MATCH_NOMATCH);
3123
0
          }
3124
24.0k
        Feptr += Lmin;
3125
24.0k
        break;
3126
3127
        /* This OP_ANYBYTE case will never be reached because \C gets turned
3128
        into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
3129
        reports don't complain about it's never being used. */
3130
3131
/*        case OP_ANYBYTE:
3132
*        if (Feptr > mb->end_subject - Lmin)
3133
*          {
3134
*          SCHECK_PARTIAL();
3135
*          RRETURN(MATCH_NOMATCH);
3136
*          }
3137
*        Feptr += Lmin;
3138
*        break;
3139
*/
3140
48.8M
        case OP_ANYNL:
3141
51.8M
        for (i = 1; i <= Lmin; i++)
3142
48.8M
          {
3143
48.8M
          if (Feptr >= mb->end_subject)
3144
27.1k
            {
3145
27.1k
            SCHECK_PARTIAL();
3146
27.1k
            RRETURN(MATCH_NOMATCH);
3147
0
            }
3148
48.8M
          switch(*Feptr++)
3149
48.8M
            {
3150
45.9M
            default: RRETURN(MATCH_NOMATCH);
3151
3152
693k
            case CHAR_CR:
3153
693k
            if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3154
693k
            break;
3155
3156
1.41M
            case CHAR_LF:
3157
1.41M
            break;
3158
3159
265k
            case CHAR_VT:
3160
585k
            case CHAR_FF:
3161
823k
            case CHAR_NEL:
3162
#if PCRE2_CODE_UNIT_WIDTH != 8
3163
            case 0x2028:
3164
            case 0x2029:
3165
#endif
3166
823k
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3167
823k
            break;
3168
48.8M
            }
3169
48.8M
          }
3170
2.93M
        break;
3171
3172
2.93M
        case OP_NOT_HSPACE:
3173
4.66M
        for (i = 1; i <= Lmin; i++)
3174
2.44M
          {
3175
2.44M
          if (Feptr >= mb->end_subject)
3176
0
            {
3177
0
            SCHECK_PARTIAL();
3178
0
            RRETURN(MATCH_NOMATCH);
3179
0
            }
3180
2.44M
          switch(*Feptr++)
3181
2.44M
            {
3182
2.34M
            default: break;
3183
2.34M
            HSPACE_BYTE_CASES:
3184
#if PCRE2_CODE_UNIT_WIDTH != 8
3185
            HSPACE_MULTIBYTE_CASES:
3186
#endif
3187
92.5k
            RRETURN(MATCH_NOMATCH);
3188
2.44M
            }
3189
2.44M
          }
3190
2.22M
        break;
3191
3192
2.22M
        case OP_HSPACE:
3193
1.08M
        for (i = 1; i <= Lmin; i++)
3194
1.05M
          {
3195
1.05M
          if (Feptr >= mb->end_subject)
3196
300
            {
3197
300
            SCHECK_PARTIAL();
3198
300
            RRETURN(MATCH_NOMATCH);
3199
0
            }
3200
1.05M
          switch(*Feptr++)
3201
1.05M
            {
3202
1.02M
            default: RRETURN(MATCH_NOMATCH);
3203
54.4k
            HSPACE_BYTE_CASES:
3204
#if PCRE2_CODE_UNIT_WIDTH != 8
3205
            HSPACE_MULTIBYTE_CASES:
3206
#endif
3207
54.4k
            break;
3208
1.05M
            }
3209
1.05M
          }
3210
26.7k
        break;
3211
3212
895k
        case OP_NOT_VSPACE:
3213
1.72M
        for (i = 1; i <= Lmin; i++)
3214
895k
          {
3215
895k
          if (Feptr >= mb->end_subject)
3216
1.48k
            {
3217
1.48k
            SCHECK_PARTIAL();
3218
1.48k
            RRETURN(MATCH_NOMATCH);
3219
0
            }
3220
894k
          switch(*Feptr++)
3221
894k
            {
3222
60.9k
            VSPACE_BYTE_CASES:
3223
#if PCRE2_CODE_UNIT_WIDTH != 8
3224
            VSPACE_MULTIBYTE_CASES:
3225
#endif
3226
60.9k
            RRETURN(MATCH_NOMATCH);
3227
833k
            default: break;
3228
894k
            }
3229
894k
          }
3230
833k
        break;
3231
3232
2.65M
        case OP_VSPACE:
3233
2.70M
        for (i = 1; i <= Lmin; i++)
3234
2.65M
          {
3235
2.65M
          if (Feptr >= mb->end_subject)
3236
16.9k
            {
3237
16.9k
            SCHECK_PARTIAL();
3238
16.9k
            RRETURN(MATCH_NOMATCH);
3239
0
            }
3240
2.63M
          switch(*Feptr++)
3241
2.63M
            {
3242
2.57M
            default: RRETURN(MATCH_NOMATCH);
3243
207k
            VSPACE_BYTE_CASES:
3244
#if PCRE2_CODE_UNIT_WIDTH != 8
3245
            VSPACE_MULTIBYTE_CASES:
3246
#endif
3247
207k
            break;
3248
2.63M
            }
3249
2.63M
          }
3250
58.3k
        break;
3251
3252
10.9M
        case OP_NOT_DIGIT:
3253
27.9M
        for (i = 1; i <= Lmin; i++)
3254
18.2M
          {
3255
18.2M
          if (Feptr >= mb->end_subject)
3256
1.27k
            {
3257
1.27k
            SCHECK_PARTIAL();
3258
1.27k
            RRETURN(MATCH_NOMATCH);
3259
0
            }
3260
18.2M
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
3261
17.0M
            RRETURN(MATCH_NOMATCH);
3262
17.0M
          Feptr++;
3263
17.0M
          }
3264
9.75M
        break;
3265
3266
9.75M
        case OP_DIGIT:
3267
571k
        for (i = 1; i <= Lmin; i++)
3268
319k
          {
3269
319k
          if (Feptr >= mb->end_subject)
3270
31.0k
            {
3271
31.0k
            SCHECK_PARTIAL();
3272
31.0k
            RRETURN(MATCH_NOMATCH);
3273
0
            }
3274
288k
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
3275
253k
            RRETURN(MATCH_NOMATCH);
3276
253k
          Feptr++;
3277
253k
          }
3278
251k
        break;
3279
3280
251k
        case OP_NOT_WHITESPACE:
3281
270k
        for (i = 1; i <= Lmin; i++)
3282
137k
          {
3283
137k
          if (Feptr >= mb->end_subject)
3284
0
            {
3285
0
            SCHECK_PARTIAL();
3286
0
            RRETURN(MATCH_NOMATCH);
3287
0
            }
3288
137k
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
3289
132k
            RRETURN(MATCH_NOMATCH);
3290
132k
          Feptr++;
3291
132k
          }
3292
132k
        break;
3293
3294
2.19M
        case OP_WHITESPACE:
3295
2.34M
        for (i = 1; i <= Lmin; i++)
3296
2.19M
          {
3297
2.19M
          if (Feptr >= mb->end_subject)
3298
45.4k
            {
3299
45.4k
            SCHECK_PARTIAL();
3300
45.4k
            RRETURN(MATCH_NOMATCH);
3301
0
            }
3302
2.14M
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
3303
1.99M
            RRETURN(MATCH_NOMATCH);
3304
154k
          Feptr++;
3305
154k
          }
3306
154k
        break;
3307
3308
909k
        case OP_NOT_WORDCHAR:
3309
1.56M
        for (i = 1; i <= Lmin; i++)
3310
909k
          {
3311
909k
          if (Feptr >= mb->end_subject)
3312
109
            {
3313
109
            SCHECK_PARTIAL();
3314
109
            RRETURN(MATCH_NOMATCH);
3315
0
            }
3316
908k
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
3317
658k
            RRETURN(MATCH_NOMATCH);
3318
658k
          Feptr++;
3319
658k
          }
3320
658k
        break;
3321
3322
658k
        case OP_WORDCHAR:
3323
213k
        for (i = 1; i <= Lmin; i++)
3324
144k
          {
3325
144k
          if (Feptr >= mb->end_subject)
3326
2.79k
            {
3327
2.79k
            SCHECK_PARTIAL();
3328
2.79k
            RRETURN(MATCH_NOMATCH);
3329
0
            }
3330
141k
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
3331
72.9k
            RRETURN(MATCH_NOMATCH);
3332
68.6k
          Feptr++;
3333
68.6k
          }
3334
68.6k
        break;
3335
3336
68.6k
        default:
3337
0
        return PCRE2_ERROR_INTERNAL;
3338
70.6M
        }
3339
71.3M
      }
3340
3341
    /* If Lmin = Lmax we are done. Continue with the main loop. */
3342
3343
78.7M
    if (Lmin == Lmax) continue;
3344
3345
    /* If minimizing, we have to test the rest of the pattern before each
3346
    subsequent match. */
3347
3348
71.5M
    if (reptype == REPTYPE_MIN)
3349
3.05M
      {
3350
3.05M
#ifdef SUPPORT_UNICODE
3351
3.05M
      if (proptype >= 0)
3352
2.02k
        {
3353
2.02k
        switch(proptype)
3354
2.02k
          {
3355
0
          case PT_ANY:
3356
0
          for (;;)
3357
0
            {
3358
0
            RMATCH(Fecode, RM208);
3359
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3360
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3361
0
            if (Feptr >= mb->end_subject)
3362
0
              {
3363
0
              SCHECK_PARTIAL();
3364
0
              RRETURN(MATCH_NOMATCH);
3365
0
              }
3366
0
            GETCHARINCTEST(fc, Feptr);
3367
0
            if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3368
0
            }
3369
          /* Control never gets here */
3370
3371
0
          case PT_LAMP:
3372
0
          for (;;)
3373
0
            {
3374
0
            int chartype;
3375
0
            RMATCH(Fecode, RM209);
3376
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3377
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3378
0
            if (Feptr >= mb->end_subject)
3379
0
              {
3380
0
              SCHECK_PARTIAL();
3381
0
              RRETURN(MATCH_NOMATCH);
3382
0
              }
3383
0
            GETCHARINCTEST(fc, Feptr);
3384
0
            chartype = UCD_CHARTYPE(fc);
3385
0
            if ((chartype == ucp_Lu ||
3386
0
                 chartype == ucp_Ll ||
3387
0
                 chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3388
0
              RRETURN(MATCH_NOMATCH);
3389
0
            }
3390
          /* Control never gets here */
3391
3392
2.02k
          case PT_GC:
3393
2.02k
          for (;;)
3394
134k
            {
3395
134k
            RMATCH(Fecode, RM210);
3396
134k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3397
134k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3398
134k
            if (Feptr >= mb->end_subject)
3399
480
              {
3400
480
              SCHECK_PARTIAL();
3401
480
              RRETURN(MATCH_NOMATCH);
3402
0
              }
3403
133k
            GETCHARINCTEST(fc, Feptr);
3404
133k
            if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3405
132k
              RRETURN(MATCH_NOMATCH);
3406
132k
            }
3407
          /* Control never gets here */
3408
3409
0
          case PT_PC:
3410
0
          for (;;)
3411
0
            {
3412
0
            RMATCH(Fecode, RM211);
3413
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3414
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3415
0
            if (Feptr >= mb->end_subject)
3416
0
              {
3417
0
              SCHECK_PARTIAL();
3418
0
              RRETURN(MATCH_NOMATCH);
3419
0
              }
3420
0
            GETCHARINCTEST(fc, Feptr);
3421
0
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3422
0
              RRETURN(MATCH_NOMATCH);
3423
0
            }
3424
          /* Control never gets here */
3425
3426
0
          case PT_SC:
3427
0
          for (;;)
3428
0
            {
3429
0
            RMATCH(Fecode, RM212);
3430
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3431
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3432
0
            if (Feptr >= mb->end_subject)
3433
0
              {
3434
0
              SCHECK_PARTIAL();
3435
0
              RRETURN(MATCH_NOMATCH);
3436
0
              }
3437
0
            GETCHARINCTEST(fc, Feptr);
3438
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3439
0
              RRETURN(MATCH_NOMATCH);
3440
0
            }
3441
          /* Control never gets here */
3442
3443
0
          case PT_ALNUM:
3444
0
          for (;;)
3445
0
            {
3446
0
            int category;
3447
0
            RMATCH(Fecode, RM213);
3448
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3449
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3450
0
            if (Feptr >= mb->end_subject)
3451
0
              {
3452
0
              SCHECK_PARTIAL();
3453
0
              RRETURN(MATCH_NOMATCH);
3454
0
              }
3455
0
            GETCHARINCTEST(fc, Feptr);
3456
0
            category = UCD_CATEGORY(fc);
3457
0
            if ((category == ucp_L || category == ucp_N) ==
3458
0
                (Lctype == OP_NOTPROP))
3459
0
              RRETURN(MATCH_NOMATCH);
3460
0
            }
3461
          /* Control never gets here */
3462
3463
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3464
          which means that Perl space and POSIX space are now identical. PCRE
3465
          was changed at release 8.34. */
3466
3467
0
          case PT_SPACE:    /* Perl space */
3468
0
          case PT_PXSPACE:  /* POSIX space */
3469
0
          for (;;)
3470
0
            {
3471
0
            RMATCH(Fecode, RM214);
3472
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3473
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3474
0
            if (Feptr >= mb->end_subject)
3475
0
              {
3476
0
              SCHECK_PARTIAL();
3477
0
              RRETURN(MATCH_NOMATCH);
3478
0
              }
3479
0
            GETCHARINCTEST(fc, Feptr);
3480
0
            switch(fc)
3481
0
              {
3482
0
              HSPACE_CASES:
3483
0
              VSPACE_CASES:
3484
0
              if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3485
0
              break;
3486
3487
0
              default:
3488
0
              if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3489
0
                RRETURN(MATCH_NOMATCH);
3490
0
              break;
3491
0
              }
3492
0
            }
3493
          /* Control never gets here */
3494
3495
0
          case PT_WORD:
3496
0
          for (;;)
3497
0
            {
3498
0
            int category;
3499
0
            RMATCH(Fecode, RM215);
3500
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3501
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3502
0
            if (Feptr >= mb->end_subject)
3503
0
              {
3504
0
              SCHECK_PARTIAL();
3505
0
              RRETURN(MATCH_NOMATCH);
3506
0
              }
3507
0
            GETCHARINCTEST(fc, Feptr);
3508
0
            category = UCD_CATEGORY(fc);
3509
0
            if ((category == ucp_L ||
3510
0
                 category == ucp_N ||
3511
0
                 fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
3512
0
              RRETURN(MATCH_NOMATCH);
3513
0
            }
3514
          /* Control never gets here */
3515
3516
0
          case PT_CLIST:
3517
0
          for (;;)
3518
0
            {
3519
0
            const uint32_t *cp;
3520
0
            RMATCH(Fecode, RM216);
3521
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3522
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3523
0
            if (Feptr >= mb->end_subject)
3524
0
              {
3525
0
              SCHECK_PARTIAL();
3526
0
              RRETURN(MATCH_NOMATCH);
3527
0
              }
3528
0
            GETCHARINCTEST(fc, Feptr);
3529
0
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3530
0
            for (;;)
3531
0
              {
3532
0
              if (fc < *cp)
3533
0
                {
3534
0
                if (Lctype == OP_NOTPROP) break;
3535
0
                RRETURN(MATCH_NOMATCH);
3536
0
                }
3537
0
              if (fc == *cp++)
3538
0
                {
3539
0
                if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3540
0
                break;
3541
0
                }
3542
0
              }
3543
0
            }
3544
          /* Control never gets here */
3545
3546
0
          case PT_UCNC:
3547
0
          for (;;)
3548
0
            {
3549
0
            RMATCH(Fecode, RM217);
3550
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3551
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3552
0
            if (Feptr >= mb->end_subject)
3553
0
              {
3554
0
              SCHECK_PARTIAL();
3555
0
              RRETURN(MATCH_NOMATCH);
3556
0
              }
3557
0
            GETCHARINCTEST(fc, Feptr);
3558
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3559
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3560
0
                 fc >= 0xe000) == (Lctype == OP_NOTPROP))
3561
0
              RRETURN(MATCH_NOMATCH);
3562
0
            }
3563
          /* Control never gets here */
3564
3565
          /* This should never occur */
3566
0
          default:
3567
0
          return PCRE2_ERROR_INTERNAL;
3568
2.02k
          }
3569
2.02k
        }
3570
3571
      /* Match extended Unicode sequences. We will get here only if the
3572
      support is in the binary; otherwise a compile-time error occurs. */
3573
3574
3.05M
      else if (Lctype == OP_EXTUNI)
3575
406k
        {
3576
406k
        for (;;)
3577
32.5M
          {
3578
32.5M
          RMATCH(Fecode, RM218);
3579
32.5M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3580
32.5M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3581
32.5M
          if (Feptr >= mb->end_subject)
3582
406k
            {
3583
406k
            SCHECK_PARTIAL();
3584
406k
            RRETURN(MATCH_NOMATCH);
3585
0
            }
3586
32.1M
          else
3587
32.1M
            {
3588
32.1M
            GETCHARINCTEST(fc, Feptr);
3589
32.1M
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
3590
32.1M
              utf, NULL);
3591
32.1M
            }
3592
32.1M
          CHECK_PARTIAL();
3593
32.1M
          }
3594
406k
        }
3595
2.64M
      else
3596
2.64M
#endif     /* SUPPORT_UNICODE */
3597
3598
      /* UTF mode for non-property testing character types. */
3599
3600
2.64M
#ifdef SUPPORT_UNICODE
3601
2.64M
      if (utf)
3602
0
        {
3603
0
        for (;;)
3604
0
          {
3605
0
          RMATCH(Fecode, RM219);
3606
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3607
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3608
0
          if (Feptr >= mb->end_subject)
3609
0
            {
3610
0
            SCHECK_PARTIAL();
3611
0
            RRETURN(MATCH_NOMATCH);
3612
0
            }
3613
0
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3614
0
          GETCHARINC(fc, Feptr);
3615
0
          switch(Lctype)
3616
0
            {
3617
0
            case OP_ANY:               /* This is the non-NL case */
3618
0
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
3619
0
                Feptr >= mb->end_subject &&
3620
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
3621
0
                NLBLOCK->nllen == 2 &&
3622
0
                fc == NLBLOCK->nl[0])
3623
0
              {
3624
0
              mb->hitend = TRUE;
3625
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3626
0
              }
3627
0
            break;
3628
3629
0
            case OP_ALLANY:
3630
0
            case OP_ANYBYTE:
3631
0
            break;
3632
3633
0
            case OP_ANYNL:
3634
0
            switch(fc)
3635
0
              {
3636
0
              default: RRETURN(MATCH_NOMATCH);
3637
3638
0
              case CHAR_CR:
3639
0
              if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3640
0
              break;
3641
3642
0
              case CHAR_LF:
3643
0
              break;
3644
3645
0
              case CHAR_VT:
3646
0
              case CHAR_FF:
3647
0
              case CHAR_NEL:
3648
0
#ifndef EBCDIC
3649
0
              case 0x2028:
3650
0
              case 0x2029:
3651
0
#endif  /* Not EBCDIC */
3652
0
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3653
0
                RRETURN(MATCH_NOMATCH);
3654
0
              break;
3655
0
              }
3656
0
            break;
3657
3658
0
            case OP_NOT_HSPACE:
3659
0
            switch(fc)
3660
0
              {
3661
0
              HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3662
0
              default: break;
3663
0
              }
3664
0
            break;
3665
3666
0
            case OP_HSPACE:
3667
0
            switch(fc)
3668
0
              {
3669
0
              HSPACE_CASES: break;
3670
0
              default: RRETURN(MATCH_NOMATCH);
3671
0
              }
3672
0
            break;
3673
3674
0
            case OP_NOT_VSPACE:
3675
0
            switch(fc)
3676
0
              {
3677
0
              VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3678
0
              default: break;
3679
0
              }
3680
0
            break;
3681
3682
0
            case OP_VSPACE:
3683
0
            switch(fc)
3684
0
              {
3685
0
              VSPACE_CASES: break;
3686
0
              default: RRETURN(MATCH_NOMATCH);
3687
0
              }
3688
0
            break;
3689
3690
0
            case OP_NOT_DIGIT:
3691
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
3692
0
              RRETURN(MATCH_NOMATCH);
3693
0
            break;
3694
3695
0
            case OP_DIGIT:
3696
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
3697
0
              RRETURN(MATCH_NOMATCH);
3698
0
            break;
3699
3700
0
            case OP_NOT_WHITESPACE:
3701
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
3702
0
              RRETURN(MATCH_NOMATCH);
3703
0
            break;
3704
3705
0
            case OP_WHITESPACE:
3706
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
3707
0
              RRETURN(MATCH_NOMATCH);
3708
0
            break;
3709
3710
0
            case OP_NOT_WORDCHAR:
3711
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
3712
0
              RRETURN(MATCH_NOMATCH);
3713
0
            break;
3714
3715
0
            case OP_WORDCHAR:
3716
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
3717
0
              RRETURN(MATCH_NOMATCH);
3718
0
            break;
3719
3720
0
            default:
3721
0
            return PCRE2_ERROR_INTERNAL;
3722
0
            }
3723
0
          }
3724
0
        }
3725
2.64M
      else
3726
2.64M
#endif  /* SUPPORT_UNICODE */
3727
3728
      /* Not UTF mode */
3729
2.64M
        {
3730
2.64M
        for (;;)
3731
125M
          {
3732
125M
          RMATCH(Fecode, RM33);
3733
125M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3734
125M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3735
124M
          if (Feptr >= mb->end_subject)
3736
50.3k
            {
3737
50.3k
            SCHECK_PARTIAL();
3738
50.3k
            RRETURN(MATCH_NOMATCH);
3739
0
            }
3740
124M
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
3741
124M
            RRETURN(MATCH_NOMATCH);
3742
124M
          fc = *Feptr++;
3743
124M
          switch(Lctype)
3744
124M
            {
3745
1.23M
            case OP_ANY:               /* This is the non-NL case */
3746
1.23M
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
3747
1.23M
                Feptr >= mb->end_subject &&
3748
1.23M
                NLBLOCK->nltype == NLTYPE_FIXED &&
3749
1.23M
                NLBLOCK->nllen == 2 &&
3750
1.23M
                fc == NLBLOCK->nl[0])
3751
0
              {
3752
0
              mb->hitend = TRUE;
3753
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3754
0
              }
3755
1.23M
            break;
3756
3757
1.23M
            case OP_ALLANY:
3758
432k
            case OP_ANYBYTE:
3759
432k
            break;
3760
3761
883
            case OP_ANYNL:
3762
883
            switch(fc)
3763
883
              {
3764
600
              default: RRETURN(MATCH_NOMATCH);
3765
3766
189
              case CHAR_CR:
3767
189
              if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3768
189
              break;
3769
3770
35
              case CHAR_LF:
3771
35
              break;
3772
3773
2
              case CHAR_VT:
3774
4
              case CHAR_FF:
3775
59
              case CHAR_NEL:
3776
#if PCRE2_CODE_UNIT_WIDTH != 8
3777
              case 0x2028:
3778
              case 0x2029:
3779
#endif
3780
59
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3781
59
                RRETURN(MATCH_NOMATCH);
3782
59
              break;
3783
883
              }
3784
283
            break;
3785
3786
47.1M
            case OP_NOT_HSPACE:
3787
47.1M
            switch(fc)
3788
47.1M
              {
3789
46.6M
              default: break;
3790
46.6M
              HSPACE_BYTE_CASES:
3791
#if PCRE2_CODE_UNIT_WIDTH != 8
3792
              HSPACE_MULTIBYTE_CASES:
3793
#endif
3794
477k
              RRETURN(MATCH_NOMATCH);
3795
47.1M
              }
3796
46.6M
            break;
3797
3798
46.6M
            case OP_HSPACE:
3799
6
            switch(fc)
3800
6
              {
3801
6
              default: RRETURN(MATCH_NOMATCH);
3802
0
              HSPACE_BYTE_CASES:
3803
#if PCRE2_CODE_UNIT_WIDTH != 8
3804
              HSPACE_MULTIBYTE_CASES:
3805
#endif
3806
0
              break;
3807
6
              }
3808
0
            break;
3809
3810
44.3M
            case OP_NOT_VSPACE:
3811
44.3M
            switch(fc)
3812
44.3M
              {
3813
43.1M
              default: break;
3814
43.1M
              VSPACE_BYTE_CASES:
3815
#if PCRE2_CODE_UNIT_WIDTH != 8
3816
              VSPACE_MULTIBYTE_CASES:
3817
#endif
3818
1.19M
              RRETURN(MATCH_NOMATCH);
3819
44.3M
              }
3820
43.1M
            break;
3821
3822
43.1M
            case OP_VSPACE:
3823
0
            switch(fc)
3824
0
              {
3825
0
              default: RRETURN(MATCH_NOMATCH);
3826
0
              VSPACE_BYTE_CASES:
3827
#if PCRE2_CODE_UNIT_WIDTH != 8
3828
              VSPACE_MULTIBYTE_CASES:
3829
#endif
3830
0
              break;
3831
0
              }
3832
0
            break;
3833
3834
6.08k
            case OP_NOT_DIGIT:
3835
6.08k
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
3836
5.74k
              RRETURN(MATCH_NOMATCH);
3837
5.74k
            break;
3838
3839
0
            case OP_DIGIT:
3840
0
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
3841
0
              RRETURN(MATCH_NOMATCH);
3842
0
            break;
3843
3844
29.8M
            case OP_NOT_WHITESPACE:
3845
29.8M
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
3846
29.7M
              RRETURN(MATCH_NOMATCH);
3847
29.7M
            break;
3848
3849
4
            case OP_WHITESPACE:
3850
4
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
3851
4
              RRETURN(MATCH_NOMATCH);
3852
0
            break;
3853
3854
1.82M
            case OP_NOT_WORDCHAR:
3855
1.82M
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
3856
1.74M
              RRETURN(MATCH_NOMATCH);
3857
1.74M
            break;
3858
3859
31.2k
            case OP_WORDCHAR:
3860
31.2k
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
3861
23.6k
              RRETURN(MATCH_NOMATCH);
3862
23.6k
            break;
3863
3864
0
            default:
3865
0
            return PCRE2_ERROR_INTERNAL;
3866
124M
            }
3867
124M
          }
3868
2.64M
        }
3869
      /* Control never gets here */
3870
3.05M
      }
3871
3872
    /* If maximizing, it is worth using inline code for speed, doing the type
3873
    test once at the start (i.e. keep it out of the loop). */
3874
3875
68.5M
    else
3876
68.5M
      {
3877
68.5M
      Lstart_eptr = Feptr;  /* Remember where we started */
3878
3879
68.5M
#ifdef SUPPORT_UNICODE
3880
68.5M
      if (proptype >= 0)
3881
156k
        {
3882
156k
        switch(proptype)
3883
156k
          {
3884
0
          case PT_ANY:
3885
0
          for (i = Lmin; i < Lmax; i++)
3886
0
            {
3887
0
            int len = 1;
3888
0
            if (Feptr >= mb->end_subject)
3889
0
              {
3890
0
              SCHECK_PARTIAL();
3891
0
              break;
3892
0
              }
3893
0
            GETCHARLENTEST(fc, Feptr, len);
3894
0
            if (Lctype == OP_NOTPROP) break;
3895
0
            Feptr+= len;
3896
0
            }
3897
0
          break;
3898
3899
0
          case PT_LAMP:
3900
0
          for (i = Lmin; i < Lmax; i++)
3901
0
            {
3902
0
            int chartype;
3903
0
            int len = 1;
3904
0
            if (Feptr >= mb->end_subject)
3905
0
              {
3906
0
              SCHECK_PARTIAL();
3907
0
              break;
3908
0
              }
3909
0
            GETCHARLENTEST(fc, Feptr, len);
3910
0
            chartype = UCD_CHARTYPE(fc);
3911
0
            if ((chartype == ucp_Lu ||
3912
0
                 chartype == ucp_Ll ||
3913
0
                 chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3914
0
              break;
3915
0
            Feptr+= len;
3916
0
            }
3917
0
          break;
3918
3919
156k
          case PT_GC:
3920
2.94M
          for (i = Lmin; i < Lmax; i++)
3921
2.94M
            {
3922
2.94M
            int len = 1;
3923
2.94M
            if (Feptr >= mb->end_subject)
3924
22.2k
              {
3925
22.2k
              SCHECK_PARTIAL();
3926
22.2k
              break;
3927
22.2k
              }
3928
2.91M
            GETCHARLENTEST(fc, Feptr, len);
3929
2.91M
            if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3930
128k
              break;
3931
2.79M
            Feptr+= len;
3932
2.79M
            }
3933
156k
          break;
3934
3935
156k
          case PT_PC:
3936
0
          for (i = Lmin; i < Lmax; i++)
3937
0
            {
3938
0
            int len = 1;
3939
0
            if (Feptr >= mb->end_subject)
3940
0
              {
3941
0
              SCHECK_PARTIAL();
3942
0
              break;
3943
0
              }
3944
0
            GETCHARLENTEST(fc, Feptr, len);
3945
0
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3946
0
              break;
3947
0
            Feptr+= len;
3948
0
            }
3949
0
          break;
3950
3951
0
          case PT_SC:
3952
0
          for (i = Lmin; i < Lmax; i++)
3953
0
            {
3954
0
            int len = 1;
3955
0
            if (Feptr >= mb->end_subject)
3956
0
              {
3957
0
              SCHECK_PARTIAL();
3958
0
              break;
3959
0
              }
3960
0
            GETCHARLENTEST(fc, Feptr, len);
3961
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3962
0
              break;
3963
0
            Feptr+= len;
3964
0
            }
3965
0
          break;
3966
3967
0
          case PT_ALNUM:
3968
0
          for (i = Lmin; i < Lmax; i++)
3969
0
            {
3970
0
            int category;
3971
0
            int len = 1;
3972
0
            if (Feptr >= mb->end_subject)
3973
0
              {
3974
0
              SCHECK_PARTIAL();
3975
0
              break;
3976
0
              }
3977
0
            GETCHARLENTEST(fc, Feptr, len);
3978
0
            category = UCD_CATEGORY(fc);
3979
0
            if ((category == ucp_L || category == ucp_N) ==
3980
0
                (Lctype == OP_NOTPROP))
3981
0
              break;
3982
0
            Feptr+= len;
3983
0
            }
3984
0
          break;
3985
3986
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3987
          which means that Perl space and POSIX space are now identical. PCRE
3988
          was changed at release 8.34. */
3989
3990
0
          case PT_SPACE:    /* Perl space */
3991
0
          case PT_PXSPACE:  /* POSIX space */
3992
0
          for (i = Lmin; i < Lmax; i++)
3993
0
            {
3994
0
            int len = 1;
3995
0
            if (Feptr >= mb->end_subject)
3996
0
              {
3997
0
              SCHECK_PARTIAL();
3998
0
              break;
3999
0
              }
4000
0
            GETCHARLENTEST(fc, Feptr, len);
4001
0
            switch(fc)
4002
0
              {
4003
0
              HSPACE_CASES:
4004
0
              VSPACE_CASES:
4005
0
              if (Lctype == OP_NOTPROP) goto ENDLOOP99;  /* Break the loop */
4006
0
              break;
4007
4008
0
              default:
4009
0
              if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
4010
0
                goto ENDLOOP99;   /* Break the loop */
4011
0
              break;
4012
0
              }
4013
0
            Feptr+= len;
4014
0
            }
4015
0
          ENDLOOP99:
4016
0
          break;
4017
4018
0
          case PT_WORD:
4019
0
          for (i = Lmin; i < Lmax; i++)
4020
0
            {
4021
0
            int category;
4022
0
            int len = 1;
4023
0
            if (Feptr >= mb->end_subject)
4024
0
              {
4025
0
              SCHECK_PARTIAL();
4026
0
              break;
4027
0
              }
4028
0
            GETCHARLENTEST(fc, Feptr, len);
4029
0
            category = UCD_CATEGORY(fc);
4030
0
            if ((category == ucp_L || category == ucp_N ||
4031
0
                 fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
4032
0
              break;
4033
0
            Feptr+= len;
4034
0
            }
4035
0
          break;
4036
4037
0
          case PT_CLIST:
4038
0
          for (i = Lmin; i < Lmax; i++)
4039
0
            {
4040
0
            const uint32_t *cp;
4041
0
            int len = 1;
4042
0
            if (Feptr >= mb->end_subject)
4043
0
              {
4044
0
              SCHECK_PARTIAL();
4045
0
              break;
4046
0
              }
4047
0
            GETCHARLENTEST(fc, Feptr, len);
4048
0
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
4049
0
            for (;;)
4050
0
              {
4051
0
              if (fc < *cp)
4052
0
                { if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; }
4053
0
              if (fc == *cp++)
4054
0
                { if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; }
4055
0
              }
4056
0
            Feptr += len;
4057
0
            }
4058
0
          GOT_MAX:
4059
0
          break;
4060
4061
0
          case PT_UCNC:
4062
0
          for (i = Lmin; i < Lmax; i++)
4063
0
            {
4064
0
            int len = 1;
4065
0
            if (Feptr >= mb->end_subject)
4066
0
              {
4067
0
              SCHECK_PARTIAL();
4068
0
              break;
4069
0
              }
4070
0
            GETCHARLENTEST(fc, Feptr, len);
4071
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
4072
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
4073
0
                 fc >= 0xe000) == (Lctype == OP_NOTPROP))
4074
0
              break;
4075
0
            Feptr += len;
4076
0
            }
4077
0
          break;
4078
4079
0
          default:
4080
0
          return PCRE2_ERROR_INTERNAL;
4081
156k
          }
4082
4083
        /* Feptr is now past the end of the maximum run */
4084
4085
156k
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4086
4087
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4088
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
4089
        go too far. */
4090
4091
143k
        for(;;)
4092
2.92M
          {
4093
2.92M
          if (Feptr <= Lstart_eptr) break;
4094
2.78M
          RMATCH(Fecode, RM222);
4095
2.78M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4096
2.78M
          Feptr--;
4097
2.78M
          if (utf) BACKCHAR(Feptr);
4098
2.78M
          }
4099
143k
        }
4100
4101
      /* Match extended Unicode grapheme clusters. We will get here only if the
4102
      support is in the binary; otherwise a compile-time error occurs. */
4103
4104
68.3M
      else if (Lctype == OP_EXTUNI)
4105
121k
        {
4106
73.4M
        for (i = Lmin; i < Lmax; i++)
4107
73.4M
          {
4108
73.4M
          if (Feptr >= mb->end_subject)
4109
113k
            {
4110
113k
            SCHECK_PARTIAL();
4111
113k
            break;
4112
113k
            }
4113
73.3M
          else
4114
73.3M
            {
4115
73.3M
            GETCHARINCTEST(fc, Feptr);
4116
73.3M
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4117
73.3M
              utf, NULL);
4118
73.3M
            }
4119
73.3M
          CHECK_PARTIAL();
4120
73.3M
          }
4121
4122
        /* Feptr is now past the end of the maximum run */
4123
4124
121k
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4125
4126
        /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
4127
        of the run while backtracking because the use of \C in UTF mode can
4128
        cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
4129
        the use of \C in UTF mode is fraught with danger. */
4130
4131
120k
        for(;;)
4132
72.7M
          {
4133
72.7M
          int lgb, rgb;
4134
72.7M
          PCRE2_SPTR fptr;
4135
4136
72.7M
          if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4137
72.5M
          RMATCH(Fecode, RM220);
4138
72.5M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4139
4140
          /* Backtracking over an extended grapheme cluster involves inspecting
4141
          the previous two characters (if present) to see if a break is
4142
          permitted between them. */
4143
4144
72.5M
          Feptr--;
4145
72.5M
          if (!utf) fc = *Feptr; else
4146
0
            {
4147
0
            BACKCHAR(Feptr);
4148
0
            GETCHAR(fc, Feptr);
4149
0
            }
4150
72.5M
          rgb = UCD_GRAPHBREAK(fc);
4151
4152
72.5M
          for (;;)
4153
72.6M
            {
4154
72.6M
            if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4155
72.5M
            fptr = Feptr - 1;
4156
72.5M
            if (!utf) fc = *fptr; else
4157
0
              {
4158
0
              BACKCHAR(fptr);
4159
0
              GETCHAR(fc, fptr);
4160
0
              }
4161
72.5M
            lgb = UCD_GRAPHBREAK(fc);
4162
72.5M
            if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
4163
65.5k
            Feptr = fptr;
4164
65.5k
            rgb = lgb;
4165
65.5k
            }
4166
72.5M
          }
4167
120k
        }
4168
4169
68.2M
      else
4170
68.2M
#endif   /* SUPPORT_UNICODE */
4171
4172
68.2M
#ifdef SUPPORT_UNICODE
4173
68.2M
      if (utf)
4174
0
        {
4175
0
        switch(Lctype)
4176
0
          {
4177
0
          case OP_ANY:
4178
0
          for (i = Lmin; i < Lmax; i++)
4179
0
            {
4180
0
            if (Feptr >= mb->end_subject)
4181
0
              {
4182
0
              SCHECK_PARTIAL();
4183
0
              break;
4184
0
              }
4185
0
            if (IS_NEWLINE(Feptr)) break;
4186
0
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4187
0
                Feptr + 1 >= mb->end_subject &&
4188
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
4189
0
                NLBLOCK->nllen == 2 &&
4190
0
                UCHAR21(Feptr) == NLBLOCK->nl[0])
4191
0
              {
4192
0
              mb->hitend = TRUE;
4193
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4194
0
              }
4195
0
            Feptr++;
4196
0
            ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4197
0
            }
4198
0
          break;
4199
4200
0
          case OP_ALLANY:
4201
0
          if (Lmax < UINT32_MAX)
4202
0
            {
4203
0
            for (i = Lmin; i < Lmax; i++)
4204
0
              {
4205
0
              if (Feptr >= mb->end_subject)
4206
0
                {
4207
0
                SCHECK_PARTIAL();
4208
0
                break;
4209
0
                }
4210
0
              Feptr++;
4211
0
              ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4212
0
              }
4213
0
            }
4214
0
          else
4215
0
            {
4216
0
            Feptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
4217
0
            SCHECK_PARTIAL();
4218
0
            }
4219
0
          break;
4220
4221
          /* The "byte" (i.e. "code unit") case is the same as non-UTF */
4222
4223
0
          case OP_ANYBYTE:
4224
0
          fc = Lmax - Lmin;
4225
0
          if (fc > (uint32_t)(mb->end_subject - Feptr))
4226
0
            {
4227
0
            Feptr = mb->end_subject;
4228
0
            SCHECK_PARTIAL();
4229
0
            }
4230
0
          else Feptr += fc;
4231
0
          break;
4232
4233
0
          case OP_ANYNL:
4234
0
          for (i = Lmin; i < Lmax; i++)
4235
0
            {
4236
0
            int len = 1;
4237
0
            if (Feptr >= mb->end_subject)
4238
0
              {
4239
0
              SCHECK_PARTIAL();
4240
0
              break;
4241
0
              }
4242
0
            GETCHARLEN(fc, Feptr, len);
4243
0
            if (fc == CHAR_CR)
4244
0
              {
4245
0
              if (++Feptr >= mb->end_subject) break;
4246
0
              if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
4247
0
              }
4248
0
            else
4249
0
              {
4250
0
              if (fc != CHAR_LF &&
4251
0
                  (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4252
0
                   (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4253
0
#ifndef EBCDIC
4254
0
                    && fc != 0x2028 && fc != 0x2029
4255
0
#endif  /* Not EBCDIC */
4256
0
                    )))
4257
0
                break;
4258
0
              Feptr += len;
4259
0
              }
4260
0
            }
4261
0
          break;
4262
4263
0
          case OP_NOT_HSPACE:
4264
0
          case OP_HSPACE:
4265
0
          for (i = Lmin; i < Lmax; i++)
4266
0
            {
4267
0
            BOOL gotspace;
4268
0
            int len = 1;
4269
0
            if (Feptr >= mb->end_subject)
4270
0
              {
4271
0
              SCHECK_PARTIAL();
4272
0
              break;
4273
0
              }
4274
0
            GETCHARLEN(fc, Feptr, len);
4275
0
            switch(fc)
4276
0
              {
4277
0
              HSPACE_CASES: gotspace = TRUE; break;
4278
0
              default: gotspace = FALSE; break;
4279
0
              }
4280
0
            if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
4281
0
            Feptr += len;
4282
0
            }
4283
0
          break;
4284
4285
0
          case OP_NOT_VSPACE:
4286
0
          case OP_VSPACE:
4287
0
          for (i = Lmin; i < Lmax; i++)
4288
0
            {
4289
0
            BOOL gotspace;
4290
0
            int len = 1;
4291
0
            if (Feptr >= mb->end_subject)
4292
0
              {
4293
0
              SCHECK_PARTIAL();
4294
0
              break;
4295
0
              }
4296
0
            GETCHARLEN(fc, Feptr, len);
4297
0
            switch(fc)
4298
0
              {
4299
0
              VSPACE_CASES: gotspace = TRUE; break;
4300
0
              default: gotspace = FALSE; break;
4301
0
              }
4302
0
            if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
4303
0
            Feptr += len;
4304
0
            }
4305
0
          break;
4306
4307
0
          case OP_NOT_DIGIT:
4308
0
          for (i = Lmin; i < Lmax; i++)
4309
0
            {
4310
0
            int len = 1;
4311
0
            if (Feptr >= mb->end_subject)
4312
0
              {
4313
0
              SCHECK_PARTIAL();
4314
0
              break;
4315
0
              }
4316
0
            GETCHARLEN(fc, Feptr, len);
4317
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
4318
0
            Feptr+= len;
4319
0
            }
4320
0
          break;
4321
4322
0
          case OP_DIGIT:
4323
0
          for (i = Lmin; i < Lmax; i++)
4324
0
            {
4325
0
            int len = 1;
4326
0
            if (Feptr >= mb->end_subject)
4327
0
              {
4328
0
              SCHECK_PARTIAL();
4329
0
              break;
4330
0
              }
4331
0
            GETCHARLEN(fc, Feptr, len);
4332
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
4333
0
            Feptr+= len;
4334
0
            }
4335
0
          break;
4336
4337
0
          case OP_NOT_WHITESPACE:
4338
0
          for (i = Lmin; i < Lmax; i++)
4339
0
            {
4340
0
            int len = 1;
4341
0
            if (Feptr >= mb->end_subject)
4342
0
              {
4343
0
              SCHECK_PARTIAL();
4344
0
              break;
4345
0
              }
4346
0
            GETCHARLEN(fc, Feptr, len);
4347
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
4348
0
            Feptr+= len;
4349
0
            }
4350
0
          break;
4351
4352
0
          case OP_WHITESPACE:
4353
0
          for (i = Lmin; i < Lmax; i++)
4354
0
            {
4355
0
            int len = 1;
4356
0
            if (Feptr >= mb->end_subject)
4357
0
              {
4358
0
              SCHECK_PARTIAL();
4359
0
              break;
4360
0
              }
4361
0
            GETCHARLEN(fc, Feptr, len);
4362
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
4363
0
            Feptr+= len;
4364
0
            }
4365
0
          break;
4366
4367
0
          case OP_NOT_WORDCHAR:
4368
0
          for (i = Lmin; i < Lmax; i++)
4369
0
            {
4370
0
            int len = 1;
4371
0
            if (Feptr >= mb->end_subject)
4372
0
              {
4373
0
              SCHECK_PARTIAL();
4374
0
              break;
4375
0
              }
4376
0
            GETCHARLEN(fc, Feptr, len);
4377
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
4378
0
            Feptr+= len;
4379
0
            }
4380
0
          break;
4381
4382
0
          case OP_WORDCHAR:
4383
0
          for (i = Lmin; i < Lmax; i++)
4384
0
            {
4385
0
            int len = 1;
4386
0
            if (Feptr >= mb->end_subject)
4387
0
              {
4388
0
              SCHECK_PARTIAL();
4389
0
              break;
4390
0
              }
4391
0
            GETCHARLEN(fc, Feptr, len);
4392
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
4393
0
            Feptr+= len;
4394
0
            }
4395
0
          break;
4396
4397
0
          default:
4398
0
          return PCRE2_ERROR_INTERNAL;
4399
0
          }
4400
4401
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4402
4403
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4404
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
4405
        too far. */
4406
4407
0
        for(;;)
4408
0
          {
4409
0
          if (Feptr <= Lstart_eptr) break;
4410
0
          RMATCH(Fecode, RM221);
4411
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4412
0
          Feptr--;
4413
0
          BACKCHAR(Feptr);
4414
0
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
4415
0
              UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
4416
0
            Feptr--;
4417
0
          }
4418
0
        }
4419
68.2M
      else
4420
68.2M
#endif  /* SUPPORT_UNICODE */
4421
4422
      /* Not UTF mode */
4423
68.2M
        {
4424
68.2M
        switch(Lctype)
4425
68.2M
          {
4426
5.41M
          case OP_ANY:
4427
363M
          for (i = Lmin; i < Lmax; i++)
4428
363M
            {
4429
363M
            if (Feptr >= mb->end_subject)
4430
1.63M
              {
4431
1.63M
              SCHECK_PARTIAL();
4432
1.63M
              break;
4433
1.63M
              }
4434
362M
            if (IS_NEWLINE(Feptr)) break;
4435
358M
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4436
358M
                Feptr + 1 >= mb->end_subject &&
4437
358M
                NLBLOCK->nltype == NLTYPE_FIXED &&
4438
358M
                NLBLOCK->nllen == 2 &&
4439
358M
                *Feptr == NLBLOCK->nl[0])
4440
0
              {
4441
0
              mb->hitend = TRUE;
4442
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4443
0
              }
4444
358M
            Feptr++;
4445
358M
            }
4446
5.41M
          break;
4447
4448
5.41M
          case OP_ALLANY:
4449
26.6k
          case OP_ANYBYTE:
4450
26.6k
          fc = Lmax - Lmin;
4451
26.6k
          if (fc > (uint32_t)(mb->end_subject - Feptr))
4452
24.8k
            {
4453
24.8k
            Feptr = mb->end_subject;
4454
24.8k
            SCHECK_PARTIAL();
4455
24.8k
            }
4456
1.78k
          else Feptr += fc;
4457
26.6k
          break;
4458
4459
11.3M
          case OP_ANYNL:
4460
16.9M
          for (i = Lmin; i < Lmax; i++)
4461
16.7M
            {
4462
16.7M
            if (Feptr >= mb->end_subject)
4463
15.1k
              {
4464
15.1k
              SCHECK_PARTIAL();
4465
15.1k
              break;
4466
15.1k
              }
4467
16.7M
            fc = *Feptr;
4468
16.7M
            if (fc == CHAR_CR)
4469
689k
              {
4470
689k
              if (++Feptr >= mb->end_subject) break;
4471
689k
              if (*Feptr == CHAR_LF) Feptr++;
4472
689k
              }
4473
16.0M
            else
4474
16.0M
              {
4475
16.0M
              if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4476
11.3M
                 (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4477
#if PCRE2_CODE_UNIT_WIDTH != 8
4478
                 && fc != 0x2028 && fc != 0x2029
4479
#endif
4480
11.3M
                 ))) break;
4481
4.95M
              Feptr++;
4482
4.95M
              }
4483
16.7M
            }
4484
11.3M
          break;
4485
4486
11.3M
          case OP_NOT_HSPACE:
4487
77.9M
          for (i = Lmin; i < Lmax; i++)
4488
77.8M
            {
4489
77.8M
            if (Feptr >= mb->end_subject)
4490
141k
              {
4491
141k
              SCHECK_PARTIAL();
4492
141k
              break;
4493
141k
              }
4494
77.6M
            switch(*Feptr)
4495
77.6M
              {
4496
76.2M
              default: Feptr++; break;
4497
2.99M
              HSPACE_BYTE_CASES:
4498
#if PCRE2_CODE_UNIT_WIDTH != 8
4499
              HSPACE_MULTIBYTE_CASES:
4500
#endif
4501
2.99M
              goto ENDLOOP00;
4502
77.6M
              }
4503
77.6M
            }
4504
1.70M
          ENDLOOP00:
4505
1.70M
          break;
4506
4507
765k
          case OP_HSPACE:
4508
793k
          for (i = Lmin; i < Lmax; i++)
4509
788k
            {
4510
788k
            if (Feptr >= mb->end_subject)
4511
1.43k
              {
4512
1.43k
              SCHECK_PARTIAL();
4513
1.43k
              break;
4514
1.43k
              }
4515
787k
            switch(*Feptr)
4516
787k
              {
4517
759k
              default: goto ENDLOOP01;
4518
759k
              HSPACE_BYTE_CASES:
4519
#if PCRE2_CODE_UNIT_WIDTH != 8
4520
              HSPACE_MULTIBYTE_CASES:
4521
#endif
4522
63.9k
              Feptr++; break;
4523
787k
              }
4524
787k
            }
4525
765k
          ENDLOOP01:
4526
765k
          break;
4527
4528
33.2M
          case OP_NOT_VSPACE:
4529
124M
          for (i = Lmin; i < Lmax; i++)
4530
92.3M
            {
4531
92.3M
            if (Feptr >= mb->end_subject)
4532
39.9k
              {
4533
39.9k
              SCHECK_PARTIAL();
4534
39.9k
              break;
4535
39.9k
              }
4536
92.2M
            switch(*Feptr)
4537
92.2M
              {
4538
90.8M
              default: Feptr++; break;
4539
5.31M
              VSPACE_BYTE_CASES:
4540
#if PCRE2_CODE_UNIT_WIDTH != 8
4541
              VSPACE_MULTIBYTE_CASES:
4542
#endif
4543
5.31M
              goto ENDLOOP02;
4544
92.2M
              }
4545
92.2M
            }
4546
33.2M
          ENDLOOP02:
4547
33.2M
          break;
4548
4549
31.9M
          case OP_VSPACE:
4550
289k
          for (i = Lmin; i < Lmax; i++)
4551
282k
            {
4552
282k
            if (Feptr >= mb->end_subject)
4553
1.97k
              {
4554
1.97k
              SCHECK_PARTIAL();
4555
1.97k
              break;
4556
1.97k
              }
4557
280k
            switch(*Feptr)
4558
280k
              {
4559
117k
              default: goto ENDLOOP03;
4560
607k
              VSPACE_BYTE_CASES:
4561
#if PCRE2_CODE_UNIT_WIDTH != 8
4562
              VSPACE_MULTIBYTE_CASES:
4563
#endif
4564
607k
              Feptr++; break;
4565
280k
              }
4566
280k
            }
4567
126k
          ENDLOOP03:
4568
126k
          break;
4569
4570
3.72M
          case OP_NOT_DIGIT:
4571
689M
          for (i = Lmin; i < Lmax; i++)
4572
688M
            {
4573
688M
            if (Feptr >= mb->end_subject)
4574
76.0k
              {
4575
76.0k
              SCHECK_PARTIAL();
4576
76.0k
              break;
4577
76.0k
              }
4578
688M
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
4579
3.07M
              break;
4580
685M
            Feptr++;
4581
685M
            }
4582
3.72M
          break;
4583
4584
3.72M
          case OP_DIGIT:
4585
1.63M
          for (i = Lmin; i < Lmax; i++)
4586
1.63M
            {
4587
1.63M
            if (Feptr >= mb->end_subject)
4588
34.9k
              {
4589
34.9k
              SCHECK_PARTIAL();
4590
34.9k
              break;
4591
34.9k
              }
4592
1.59M
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
4593
290k
              break;
4594
1.30M
            Feptr++;
4595
1.30M
            }
4596
328k
          break;
4597
4598
328k
          case OP_NOT_WHITESPACE:
4599
3.21M
          for (i = Lmin; i < Lmax; i++)
4600
2.94M
            {
4601
2.94M
            if (Feptr >= mb->end_subject)
4602
2.14k
              {
4603
2.14k
              SCHECK_PARTIAL();
4604
2.14k
              break;
4605
2.14k
              }
4606
2.94M
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
4607
20.9k
              break;
4608
2.92M
            Feptr++;
4609
2.92M
            }
4610
287k
          break;
4611
4612
10.5M
          case OP_WHITESPACE:
4613
103M
          for (i = Lmin; i < Lmax; i++)
4614
103M
            {
4615
103M
            if (Feptr >= mb->end_subject)
4616
2.50M
              {
4617
2.50M
              SCHECK_PARTIAL();
4618
2.50M
              break;
4619
2.50M
              }
4620
101M
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
4621
8.03M
              break;
4622
93.2M
            Feptr++;
4623
93.2M
            }
4624
10.5M
          break;
4625
4626
10.5M
          case OP_NOT_WORDCHAR:
4627
1.80G
          for (i = Lmin; i < Lmax; i++)
4628
1.80G
            {
4629
1.80G
            if (Feptr >= mb->end_subject)
4630
110k
              {
4631
110k
              SCHECK_PARTIAL();
4632
110k
              break;
4633
110k
              }
4634
1.80G
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
4635
495k
              break;
4636
1.80G
            Feptr++;
4637
1.80G
            }
4638
685k
          break;
4639
4640
685k
          case OP_WORDCHAR:
4641
278k
          for (i = Lmin; i < Lmax; i++)
4642
277k
            {
4643
277k
            if (Feptr >= mb->end_subject)
4644
8.12k
              {
4645
8.12k
              SCHECK_PARTIAL();
4646
8.12k
              break;
4647
8.12k
              }
4648
269k
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
4649
54.9k
              break;
4650
214k
            Feptr++;
4651
214k
            }
4652
64.3k
          break;
4653
4654
64.3k
          default:
4655
0
          return PCRE2_ERROR_INTERNAL;
4656
68.2M
          }
4657
4658
68.2M
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4659
4660
50.4M
        for (;;)
4661
679M
          {
4662
679M
          if (Feptr == Lstart_eptr) break;
4663
629M
          RMATCH(Fecode, RM34);
4664
628M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4665
628M
          Feptr--;
4666
628M
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
4667
628M
              Feptr[-1] == CHAR_CR) Feptr--;
4668
628M
          }
4669
50.4M
        }
4670
68.5M
      }
4671
49.8M
    break;  /* End of repeat character type processing */
4672
4673
49.8M
#undef Lstart_eptr
4674
49.8M
#undef Lmin
4675
49.8M
#undef Lmax
4676
49.8M
#undef Lctype
4677
49.8M
#undef Lpropvalue
4678
4679
4680
    /* ===================================================================== */
4681
    /* Match a back reference, possibly repeatedly. Look past the end of the
4682
    item to see if there is repeat information following. The OP_REF and
4683
    OP_REFI opcodes are used for a reference to a numbered group or to a
4684
    non-duplicated named group. For a duplicated named group, OP_DNREF and
4685
    OP_DNREFI are used. In this case we must scan the list of groups to which
4686
    the name refers, and use the first one that is set. */
4687
4688
49.8M
#define Lmin      F->temp_32[0]
4689
49.8M
#define Lmax      F->temp_32[1]
4690
49.8M
#define Lcaseless F->temp_32[2]
4691
49.8M
#define Lstart    F->temp_sptr[0]
4692
49.8M
#define Loffset   F->temp_size
4693
4694
49.8M
    case OP_DNREF:
4695
0
    case OP_DNREFI:
4696
0
    Lcaseless = (Fop == OP_DNREFI);
4697
0
      {
4698
0
      int count = GET2(Fecode, 1+IMM2_SIZE);
4699
0
      PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
4700
0
      Fecode += 1 + 2*IMM2_SIZE;
4701
4702
0
      while (count-- > 0)
4703
0
        {
4704
0
        Loffset = (GET2(slot, 0) << 1) - 2;
4705
0
        if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
4706
0
        slot += mb->name_entry_size;
4707
0
        }
4708
0
      }
4709
0
    goto REF_REPEAT;
4710
4711
2.60M
    case OP_REF:
4712
7.77M
    case OP_REFI:
4713
7.77M
    Lcaseless = (Fop == OP_REFI);
4714
7.77M
    Loffset = (GET2(Fecode, 1) << 1) - 2;
4715
7.77M
    Fecode += 1 + IMM2_SIZE;
4716
4717
    /* Set up for repetition, or handle the non-repeated case. The maximum and
4718
    minimum must be in the heap frame, but as they are short-term values, we
4719
    use temporary fields. */
4720
4721
7.77M
    REF_REPEAT:
4722
7.77M
    switch (*Fecode)
4723
7.77M
      {
4724
232
      case OP_CRSTAR:
4725
232
      case OP_CRMINSTAR:
4726
330
      case OP_CRPLUS:
4727
330
      case OP_CRMINPLUS:
4728
330
      case OP_CRQUERY:
4729
330
      case OP_CRMINQUERY:
4730
330
      fc = *Fecode++ - OP_CRSTAR;
4731
330
      Lmin = rep_min[fc];
4732
330
      Lmax = rep_max[fc];
4733
330
      reptype = rep_typ[fc];
4734
330
      break;
4735
4736
0
      case OP_CRRANGE:
4737
0
      case OP_CRMINRANGE:
4738
0
      Lmin = GET2(Fecode, 1);
4739
0
      Lmax = GET2(Fecode, 1 + IMM2_SIZE);
4740
0
      reptype = rep_typ[*Fecode - OP_CRSTAR];
4741
0
      if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
4742
0
      Fecode += 1 + 2 * IMM2_SIZE;
4743
0
      break;
4744
4745
7.77M
      default:                  /* No repeat follows */
4746
7.77M
        {
4747
7.77M
        rrc = match_ref(Loffset, Lcaseless, F, mb, &length);
4748
7.77M
        if (rrc != 0)
4749
5.99M
          {
4750
5.99M
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4751
5.99M
          CHECK_PARTIAL();
4752
5.99M
          RRETURN(MATCH_NOMATCH);
4753
0
          }
4754
7.77M
        }
4755
1.78M
      Feptr += length;
4756
1.78M
      continue;              /* With the main loop */
4757
7.77M
      }
4758
4759
    /* Handle repeated back references. If a set group has length zero, just
4760
    continue with the main loop, because it matches however many times. For an
4761
    unset reference, if the minimum is zero, we can also just continue. We can
4762
    also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
4763
    group behave as a zero-length group. For any other unset cases, carrying
4764
    on will result in NOMATCH. */
4765
4766
330
    if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
4767
0
      {
4768
0
      if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
4769
0
      }
4770
330
    else  /* Group is not set */
4771
330
      {
4772
330
      if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
4773
232
        continue;
4774
330
      }
4775
4776
    /* First, ensure the minimum number of matches are present. */
4777
4778
98
    for (i = 1; i <= Lmin; i++)
4779
98
      {
4780
98
      PCRE2_SIZE slength;
4781
98
      rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4782
98
      if (rrc != 0)
4783
98
        {
4784
98
        if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4785
98
        CHECK_PARTIAL();
4786
98
        RRETURN(MATCH_NOMATCH);
4787
0
        }
4788
0
      Feptr += slength;
4789
0
      }
4790
4791
    /* If min = max, we are done. They are not both allowed to be zero. */
4792
4793
0
    if (Lmin == Lmax) continue;
4794
4795
    /* If minimizing, keep trying and advancing the pointer. */
4796
4797
0
    if (reptype == REPTYPE_MIN)
4798
0
      {
4799
0
      for (;;)
4800
0
        {
4801
0
        PCRE2_SIZE slength;
4802
0
        RMATCH(Fecode, RM20);
4803
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4804
0
        if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
4805
0
        rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4806
0
        if (rrc != 0)
4807
0
          {
4808
0
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4809
0
          CHECK_PARTIAL();
4810
0
          RRETURN(MATCH_NOMATCH);
4811
0
          }
4812
0
        Feptr += slength;
4813
0
        }
4814
      /* Control never gets here */
4815
0
      }
4816
4817
    /* If maximizing, find the longest string and work backwards, as long as
4818
    the matched lengths for each iteration are the same. */
4819
4820
0
    else
4821
0
      {
4822
0
      BOOL samelengths = TRUE;
4823
0
      Lstart = Feptr;     /* Starting position */
4824
0
      Flength = Fovector[Loffset+1] - Fovector[Loffset];
4825
4826
0
      for (i = Lmin; i < Lmax; i++)
4827
0
        {
4828
0
        PCRE2_SIZE slength;
4829
0
        rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4830
0
        if (rrc != 0)
4831
0
          {
4832
          /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
4833
          the soft partial matching case. */
4834
4835
0
          if (rrc > 0 && mb->partial != 0 &&
4836
0
              mb->end_subject > mb->start_used_ptr)
4837
0
            {
4838
0
            mb->hitend = TRUE;
4839
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4840
0
            }
4841
0
          break;
4842
0
          }
4843
4844
0
        if (slength != Flength) samelengths = FALSE;
4845
0
        Feptr += slength;
4846
0
        }
4847
4848
      /* If the length matched for each repetition is the same as the length of
4849
      the captured group, we can easily work backwards. This is the normal
4850
      case. However, in caseless UTF-8 mode there are pairs of case-equivalent
4851
      characters whose lengths (in terms of code units) differ. However, this
4852
      is very rare, so we handle it by re-matching fewer and fewer times. */
4853
4854
0
      if (samelengths)
4855
0
        {
4856
0
        while (Feptr >= Lstart)
4857
0
          {
4858
0
          RMATCH(Fecode, RM21);
4859
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4860
0
          Feptr -= Flength;
4861
0
          }
4862
0
        }
4863
4864
      /* The rare case of non-matching lengths. Re-scan the repetition for each
4865
      iteration. We know that match_ref() will succeed every time. */
4866
4867
0
      else
4868
0
        {
4869
0
        Lmax = i;
4870
0
        for (;;)
4871
0
          {
4872
0
          RMATCH(Fecode, RM22);
4873
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4874
0
          if (Feptr == Lstart) break; /* Failed after minimal repetition */
4875
0
          Feptr = Lstart;
4876
0
          Lmax--;
4877
0
          for (i = Lmin; i < Lmax; i++)
4878
0
            {
4879
0
            PCRE2_SIZE slength;
4880
0
            (void)match_ref(Loffset, Lcaseless, F, mb, &slength);
4881
0
            Feptr += slength;
4882
0
            }
4883
0
          }
4884
0
        }
4885
4886
0
      RRETURN(MATCH_NOMATCH);
4887
0
      }
4888
    /* Control never gets here */
4889
4890
0
#undef Lcaseless
4891
0
#undef Lmin
4892
0
#undef Lmax
4893
0
#undef Lstart
4894
0
#undef Loffset
4895
4896
4897
4898
/* ========================================================================= */
4899
/*           Opcodes for the start of various parenthesized items            */
4900
/* ========================================================================= */
4901
4902
    /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
4903
    (*THEN) is within the current branch by comparing the address of OP_THEN
4904
    that is passed back with the end of the branch. If (*THEN) is within the
4905
    current branch, and the branch is one of two or more alternatives (it
4906
    either starts or ends with OP_ALT), we have reached the limit of THEN's
4907
    action, so convert the return code to NOMATCH, which will cause normal
4908
    backtracking to happen from now on. Otherwise, THEN is passed back to an
4909
    outer alternative. This implements Perl's treatment of parenthesized
4910
    groups, where a group not containing | does not affect the current
4911
    alternative, that is, (X) is NOT the same as (X|(*F)). */
4912
4913
4914
    /* ===================================================================== */
4915
    /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
4916
    bracket group, indicating that it may occur zero times. It may repeat
4917
    infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
4918
    the pattern. Brackets with fixed upper repeat limits are compiled as a
4919
    number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
4920
    Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
4921
4922
186M
#define Lnext_ecode F->temp_sptr[0]
4923
4924
16.0M
    case OP_BRAZERO:
4925
16.0M
    Lnext_ecode = Fecode + 1;
4926
16.0M
    RMATCH(Lnext_ecode, RM9);
4927
15.3M
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4928
77.1M
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
4929
15.3M
    Fecode = Lnext_ecode + 1 + LINK_SIZE;
4930
15.3M
    break;
4931
4932
51.1k
    case OP_BRAMINZERO:
4933
51.1k
    Lnext_ecode = Fecode + 1;
4934
161k
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
4935
51.1k
    RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
4936
51.1k
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4937
51.1k
    Fecode++;
4938
51.1k
    break;
4939
4940
0
#undef Lnext_ecode
4941
4942
0
    case OP_SKIPZERO:
4943
0
    Fecode++;
4944
0
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
4945
0
    Fecode += 1 + LINK_SIZE;
4946
0
    break;
4947
4948
4949
    /* ===================================================================== */
4950
    /* Handle possessive brackets with an unlimited repeat. The end of these
4951
    brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
4952
    going further in the pattern. */
4953
4954
4.39k
#define Lframe_type    F->temp_32[0]
4955
2.64k
#define Lmatched_once  F->temp_32[1]
4956
1.73k
#define Lzero_allowed  F->temp_32[2]
4957
3.53k
#define Lstart_eptr    F->temp_sptr[0]
4958
892
#define Lstart_group   F->temp_sptr[1]
4959
4960
876
    case OP_BRAPOSZERO:
4961
876
    Lzero_allowed = TRUE;                /* Zero repeat is allowed */
4962
876
    Fecode += 1;
4963
876
    if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
4964
0
      goto POSSESSIVE_CAPTURE;
4965
876
    goto POSSESSIVE_NON_CAPTURE;
4966
4967
876
    case OP_BRAPOS:
4968
0
    case OP_SBRAPOS:
4969
0
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
4970
4971
876
    POSSESSIVE_NON_CAPTURE:
4972
876
    Lframe_type = GF_NOCAPTURE;          /* Remembered frame type */
4973
876
    goto POSSESSIVE_GROUP;
4974
4975
0
    case OP_CBRAPOS:
4976
0
    case OP_SCBRAPOS:
4977
0
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
4978
4979
0
    POSSESSIVE_CAPTURE:
4980
0
    number = GET2(Fecode, 1+LINK_SIZE);
4981
0
    Lframe_type = GF_CAPTURE | number;   /* Remembered frame type */
4982
4983
876
    POSSESSIVE_GROUP:
4984
876
    Lmatched_once = FALSE;               /* Never matched */
4985
876
    Lstart_group = Fecode;               /* Start of this group */
4986
4987
876
    for (;;)
4988
3.52k
      {
4989
3.52k
      Lstart_eptr = Feptr;               /* Position at group start */
4990
3.52k
      group_frame_type = Lframe_type;
4991
3.52k
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
4992
3.52k
      if (rrc == MATCH_KETRPOS)
4993
16
        {
4994
16
        Lmatched_once = TRUE;            /* Matched at least once */
4995
16
        if (Feptr == Lstart_eptr)        /* Empty match; skip to end */
4996
0
          {
4997
0
          do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
4998
0
          break;
4999
0
          }
5000
5001
16
        Fecode = Lstart_group;
5002
16
        continue;
5003
16
        }
5004
5005
      /* See comment above about handling THEN. */
5006
5007
3.50k
      if (rrc == MATCH_THEN)
5008
0
        {
5009
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5010
0
        if (mb->verb_ecode_ptr < next_ecode &&
5011
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5012
0
          rrc = MATCH_NOMATCH;
5013
0
        }
5014
5015
3.50k
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5016
3.50k
      Fecode += GET(Fecode, 1);
5017
3.50k
      if (*Fecode != OP_ALT) break;
5018
3.50k
      }
5019
5020
    /* Success if matched something or zero repeat allowed */
5021
5022
876
    if (Lmatched_once || Lzero_allowed)
5023
876
      {
5024
876
      Fecode += 1 + LINK_SIZE;
5025
876
      break;
5026
876
      }
5027
5028
876
    RRETURN(MATCH_NOMATCH);
5029
5030
0
#undef Lmatched_once
5031
0
#undef Lzero_allowed
5032
0
#undef Lframe_type
5033
0
#undef Lstart_eptr
5034
0
#undef Lstart_group
5035
5036
5037
    /* ===================================================================== */
5038
    /* Handle non-capturing brackets that cannot match an empty string. When we
5039
    get to the final alternative within the brackets, as long as there are no
5040
    THEN's in the pattern, we can optimize by not recording a new backtracking
5041
    point. (Ideally we should test for a THEN within this group, but we don't
5042
    have that information.) Don't do this if we are at the very top level,
5043
    however, because that would make handling assertions and once-only brackets
5044
    messier when there is nothing to go back to. */
5045
5046
221M
#define Lframe_type F->temp_32[0]     /* Set for all that use GROUPLOOP */
5047
129M
#define Lnext_branch F->temp_sptr[0]  /* Used only in OP_BRA handling */
5048
5049
69.0M
    case OP_BRA:
5050
69.0M
    if (mb->hasthen || Frdepth == 0)
5051
51.8M
      {
5052
51.8M
      Lframe_type = 0;
5053
51.8M
      goto GROUPLOOP;
5054
51.8M
      }
5055
5056
17.1M
    for (;;)
5057
48.7M
      {
5058
48.7M
      Lnext_branch = Fecode + GET(Fecode, 1);
5059
48.7M
      if (*Lnext_branch != OP_ALT) break;
5060
5061
      /* This is never the final branch. We do not need to test for MATCH_THEN
5062
      here because this code is not used when there is a THEN in the pattern. */
5063
5064
31.6M
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
5065
31.6M
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5066
31.6M
      Fecode = Lnext_branch;
5067
31.6M
      }
5068
5069
    /* Hit the start of the final branch. Continue at this level. */
5070
5071
17.1M
    Fecode += PRIV(OP_lengths)[*Fecode];
5072
17.1M
    break;
5073
5074
0
#undef Lnext_branch
5075
5076
5077
    /* ===================================================================== */
5078
    /* Handle a capturing bracket, other than those that are possessive with an
5079
    unlimited repeat. */
5080
5081
6.11M
    case OP_CBRA:
5082
6.14M
    case OP_SCBRA:
5083
6.14M
    Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
5084
6.14M
    goto GROUPLOOP;
5085
5086
5087
    /* ===================================================================== */
5088
    /* Atomic groups and non-capturing brackets that can match an empty string
5089
    must record a backtracking point and also set up a chained frame. */
5090
5091
716
    case OP_ONCE:
5092
716
    case OP_SCRIPT_RUN:
5093
402k
    case OP_SBRA:
5094
402k
    Lframe_type = GF_NOCAPTURE | Fop;
5095
5096
58.4M
    GROUPLOOP:
5097
58.4M
    for (;;)
5098
163M
      {
5099
163M
      group_frame_type = Lframe_type;
5100
163M
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
5101
155M
      if (rrc == MATCH_THEN)
5102
0
        {
5103
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5104
0
        if (mb->verb_ecode_ptr < next_ecode &&
5105
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5106
0
          rrc = MATCH_NOMATCH;
5107
0
        }
5108
155M
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5109
155M
      Fecode += GET(Fecode, 1);
5110
155M
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5111
104M
      }
5112
    /* Control never reaches here. */
5113
5114
0
#undef Lframe_type
5115
5116
5117
    /* ===================================================================== */
5118
    /* Recursion either matches the current regex, or some subexpression. The
5119
    offset data is the offset to the starting bracket from the start of the
5120
    whole pattern. (This is so that it works from duplicated subpatterns.) */
5121
5122
0
#define Lframe_type F->temp_32[0]
5123
0
#define Lstart_branch F->temp_sptr[0]
5124
5125
0
    case OP_RECURSE:
5126
0
    bracode = mb->start_code + GET(Fecode, 1);
5127
0
    number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
5128
5129
    /* If we are already in a recursion, check for repeating the same one
5130
    without advancing the subject pointer. This should catch convoluted mutual
5131
    recursions. (Some simple cases are caught at compile time.) */
5132
5133
0
    if (Fcurrent_recurse != RECURSE_UNSET)
5134
0
      {
5135
0
      offset = Flast_group_offset;
5136
0
      while (offset != PCRE2_UNSET)
5137
0
        {
5138
0
        N = (heapframe *)((char *)mb->match_frames + offset);
5139
0
        P = (heapframe *)((char *)N - frame_size);
5140
0
        if (N->group_frame_type == (GF_RECURSE | number))
5141
0
          {
5142
0
          if (Feptr == P->eptr) return PCRE2_ERROR_RECURSELOOP;
5143
0
          break;
5144
0
          }
5145
0
        offset = P->last_group_offset;
5146
0
        }
5147
0
      }
5148
5149
    /* Now run the recursion, branch by branch. */
5150
5151
0
    Lstart_branch = bracode;
5152
0
    Lframe_type = GF_RECURSE | number;
5153
5154
0
    for (;;)
5155
0
      {
5156
0
      PCRE2_SPTR next_ecode;
5157
5158
0
      group_frame_type = Lframe_type;
5159
0
      RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
5160
0
      next_ecode = Lstart_branch + GET(Lstart_branch,1);
5161
5162
      /* Handle backtracking verbs, which are defined in a range that can
5163
      easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
5164
      escape beyond a recursion; they cause a NOMATCH for the entire recursion.
5165
5166
      When one of these verbs triggers, the current recursion group number is
5167
      recorded. If it matches the recursion we are processing, the verb
5168
      happened within the recursion and we must deal with it. Otherwise it must
5169
      have happened after the recursion completed, and so has to be passed
5170
      back. See comment above about handling THEN. */
5171
5172
0
      if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
5173
0
          mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
5174
0
        {
5175
0
        if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
5176
0
            (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
5177
0
          rrc = MATCH_NOMATCH;
5178
0
        else RRETURN(MATCH_NOMATCH);
5179
0
        }
5180
5181
      /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
5182
      OP_ACCEPT code. Nothing needs to be done here. */
5183
5184
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5185
0
      Lstart_branch = next_ecode;
5186
0
      if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
5187
0
      }
5188
    /* Control never reaches here. */
5189
5190
0
#undef Lframe_type
5191
0
#undef Lstart_branch
5192
5193
5194
    /* ===================================================================== */
5195
    /* Positive assertions are like other groups except that PCRE doesn't allow
5196
    the effect of (*THEN) to escape beyond an assertion; it is therefore
5197
    treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
5198
    captures and mark retained. Any other return is an error. */
5199
5200
697k
#define Lframe_type  F->temp_32[0]
5201
5202
270k
    case OP_ASSERT:
5203
270k
    case OP_ASSERTBACK:
5204
270k
    case OP_ASSERT_NA:
5205
271k
    case OP_ASSERTBACK_NA:
5206
271k
    Lframe_type = GF_NOCAPTURE | Fop;
5207
271k
    for (;;)
5208
426k
      {
5209
426k
      group_frame_type = Lframe_type;
5210
426k
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
5211
426k
      if (rrc == MATCH_ACCEPT)
5212
0
        {
5213
0
        memcpy(Fovector,
5214
0
              (char *)assert_accept_frame + offsetof(heapframe, ovector),
5215
0
              assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5216
0
        Foffset_top = assert_accept_frame->offset_top;
5217
0
        Fmark = assert_accept_frame->mark;
5218
0
        break;
5219
0
        }
5220
426k
      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
5221
426k
      Fecode += GET(Fecode, 1);
5222
426k
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5223
155k
      }
5224
5225
0
    do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5226
0
    Fecode += 1 + LINK_SIZE;
5227
0
    break;
5228
5229
0
#undef Lframe_type
5230
5231
5232
    /* ===================================================================== */
5233
    /* Handle negative assertions. Loop for each non-matching branch as for
5234
    positive assertions. */
5235
5236
32.9M
#define Lframe_type  F->temp_32[0]
5237
5238
0
    case OP_ASSERT_NOT:
5239
16.4M
    case OP_ASSERTBACK_NOT:
5240
16.4M
    Lframe_type  = GF_NOCAPTURE | Fop;
5241
5242
16.4M
    for (;;)
5243
16.4M
      {
5244
16.4M
      group_frame_type = Lframe_type;
5245
16.4M
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
5246
16.4M
      switch(rrc)
5247
16.4M
        {
5248
0
        case MATCH_ACCEPT:   /* Assertion matched, therefore it fails. */
5249
270k
        case MATCH_MATCH:
5250
270k
        RRETURN (MATCH_NOMATCH);
5251
5252
16.2M
        case MATCH_NOMATCH:  /* Branch failed, try next if present. */
5253
16.2M
        case MATCH_THEN:
5254
16.2M
        Fecode += GET(Fecode, 1);
5255
16.2M
        if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
5256
1.42k
        break;
5257
5258
1.42k
        case MATCH_COMMIT:   /* Assertion forced to fail, therefore continue. */
5259
0
        case MATCH_SKIP:
5260
0
        case MATCH_PRUNE:
5261
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5262
0
        goto ASSERT_NOT_FAILED;
5263
5264
0
        default:             /* Pass back any other return */
5265
0
        RRETURN(rrc);
5266
16.4M
        }
5267
16.4M
      }
5268
5269
    /* None of the branches have matched or there was a backtrack to (*COMMIT),
5270
    (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
5271
    negative assertion, so carry on. */
5272
5273
16.1M
    ASSERT_NOT_FAILED:
5274
16.1M
    Fecode += 1 + LINK_SIZE;
5275
16.1M
    break;
5276
5277
0
#undef Lframe_type
5278
5279
5280
    /* ===================================================================== */
5281
    /* The callout item calls an external function, if one is provided, passing
5282
    details of the match so far. This is mainly for debugging, though the
5283
    function is able to force a failure. */
5284
5285
0
    case OP_CALLOUT:
5286
0
    case OP_CALLOUT_STR:
5287
0
    rrc = do_callout(F, mb, &length);
5288
0
    if (rrc > 0) RRETURN(MATCH_NOMATCH);
5289
0
    if (rrc < 0) RRETURN(rrc);
5290
0
    Fecode += length;
5291
0
    break;
5292
5293
5294
    /* ===================================================================== */
5295
    /* Conditional group: compilation checked that there are no more than two
5296
    branches. If the condition is false, skipping the first branch takes us
5297
    past the end of the item if there is only one branch, but that's exactly
5298
    what we want. */
5299
5300
0
    case OP_COND:
5301
0
    case OP_SCOND:
5302
5303
    /* The variable Flength will be added to Fecode when the condition is
5304
    false, to get to the second branch. Setting it to the offset to the ALT or
5305
    KET, then incrementing Fecode achieves this effect. However, if the second
5306
    branch is non-existent, we must point to the KET so that the end of the
5307
    group is correctly processed. We now have Fecode pointing to the condition
5308
    or callout. */
5309
5310
0
    Flength = GET(Fecode, 1);    /* Offset to the second branch */
5311
0
    if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
5312
0
    Fecode += 1 + LINK_SIZE;     /* From this opcode */
5313
5314
    /* Because of the way auto-callout works during compile, a callout item is
5315
    inserted between OP_COND and an assertion condition. Such a callout can
5316
    also be inserted manually. */
5317
5318
0
    if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
5319
0
      {
5320
0
      rrc = do_callout(F, mb, &length);
5321
0
      if (rrc > 0) RRETURN(MATCH_NOMATCH);
5322
0
      if (rrc < 0) RRETURN(rrc);
5323
5324
      /* Advance Fecode past the callout, so it now points to the condition. We
5325
      must adjust Flength so that the value of Fecode+Flength is unchanged. */
5326
5327
0
      Fecode += length;
5328
0
      Flength -= length;
5329
0
      }
5330
5331
    /* Test the various possible conditions */
5332
5333
0
    condition = FALSE;
5334
0
    switch(*Fecode)
5335
0
      {
5336
0
      case OP_RREF:                  /* Group recursion test */
5337
0
      if (Fcurrent_recurse != RECURSE_UNSET)
5338
0
        {
5339
0
        number = GET2(Fecode, 1);
5340
0
        condition = (number == RREF_ANY || number == Fcurrent_recurse);
5341
0
        }
5342
0
      break;
5343
5344
0
      case OP_DNRREF:       /* Duplicate named group recursion test */
5345
0
      if (Fcurrent_recurse != RECURSE_UNSET)
5346
0
        {
5347
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
5348
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5349
0
        while (count-- > 0)
5350
0
          {
5351
0
          number = GET2(slot, 0);
5352
0
          condition = number == Fcurrent_recurse;
5353
0
          if (condition) break;
5354
0
          slot += mb->name_entry_size;
5355
0
          }
5356
0
        }
5357
0
      break;
5358
5359
0
      case OP_CREF:                         /* Numbered group used test */
5360
0
      offset = (GET2(Fecode, 1) << 1) - 2;  /* Doubled ref number */
5361
0
      condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5362
0
      break;
5363
5364
0
      case OP_DNCREF:      /* Duplicate named group used test */
5365
0
        {
5366
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
5367
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5368
0
        while (count-- > 0)
5369
0
          {
5370
0
          offset = (GET2(slot, 0) << 1) - 2;
5371
0
          condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5372
0
          if (condition) break;
5373
0
          slot += mb->name_entry_size;
5374
0
          }
5375
0
        }
5376
0
      break;
5377
5378
0
      case OP_FALSE:
5379
0
      case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
5380
0
      break;
5381
5382
0
      case OP_TRUE:
5383
0
      condition = TRUE;
5384
0
      break;
5385
5386
      /* The condition is an assertion. Run code similar to the assertion code
5387
      above. */
5388
5389
0
#define Lpositive      F->temp_32[0]
5390
0
#define Lstart_branch  F->temp_sptr[0]
5391
5392
0
      default:
5393
0
      Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
5394
0
      Lstart_branch = Fecode;
5395
5396
0
      for (;;)
5397
0
        {
5398
0
        group_frame_type = GF_CONDASSERT | *Fecode;
5399
0
        RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
5400
5401
0
        switch(rrc)
5402
0
          {
5403
0
          case MATCH_ACCEPT:  /* Save captures */
5404
0
          memcpy(Fovector,
5405
0
                (char *)assert_accept_frame + offsetof(heapframe, ovector),
5406
0
                assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5407
0
          Foffset_top = assert_accept_frame->offset_top;
5408
5409
          /* Fall through */
5410
          /* In the case of a match, the captures have already been put into
5411
          the current frame. */
5412
5413
0
          case MATCH_MATCH:
5414
0
          condition = Lpositive;   /* TRUE for positive assertion */
5415
0
          break;
5416
5417
          /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
5418
          assertion; it is therefore always treated as NOMATCH. */
5419
5420
0
          case MATCH_NOMATCH:
5421
0
          case MATCH_THEN:
5422
0
          Lstart_branch += GET(Lstart_branch, 1);
5423
0
          if (*Lstart_branch == OP_ALT) continue;  /* Try next branch */
5424
0
          condition = !Lpositive;  /* TRUE for negative assertion */
5425
0
          break;
5426
5427
          /* These force no match without checking other branches. */
5428
5429
0
          case MATCH_COMMIT:
5430
0
          case MATCH_SKIP:
5431
0
          case MATCH_PRUNE:
5432
0
          condition = !Lpositive;
5433
0
          break;
5434
5435
0
          default:
5436
0
          RRETURN(rrc);
5437
0
          }
5438
0
        break;  /* Out of the branch loop */
5439
0
        }
5440
5441
      /* If the condition is true, find the end of the assertion so that
5442
      advancing past it gets us to the start of the first branch. */
5443
5444
0
      if (condition)
5445
0
        {
5446
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5447
0
        }
5448
0
      break;  /* End of assertion condition */
5449
0
      }
5450
5451
0
#undef Lpositive
5452
0
#undef Lstart_branch
5453
5454
    /* Choose branch according to the condition. */
5455
5456
0
    Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
5457
5458
    /* If the opcode is OP_SCOND it means we are at a repeated conditional
5459
    group that might match an empty string. We must therefore descend a level
5460
    so that the start is remembered for checking. For OP_COND we can just
5461
    continue at this level. */
5462
5463
0
    if (Fop == OP_SCOND)
5464
0
      {
5465
0
      group_frame_type  = GF_NOCAPTURE | Fop;
5466
0
      RMATCH(Fecode, RM35);
5467
0
      RRETURN(rrc);
5468
0
      }
5469
0
    break;
5470
5471
5472
5473
/* ========================================================================= */
5474
/*                  End of start of parenthesis opcodes                      */
5475
/* ========================================================================= */
5476
5477
5478
    /* ===================================================================== */
5479
    /* Move the subject pointer back. This occurs only at the start of each
5480
    branch of a lookbehind assertion. If we are too close to the start to move
5481
    back, fail. When working with UTF-8 we move back a number of characters,
5482
    not bytes. */
5483
5484
16.4M
    case OP_REVERSE:
5485
16.4M
    number = GET(Fecode, 1);
5486
16.4M
#ifdef SUPPORT_UNICODE
5487
16.4M
    if (utf)
5488
0
      {
5489
0
      while (number-- > 0)
5490
0
        {
5491
0
        if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
5492
0
        Feptr--;
5493
0
        BACKCHAR(Feptr);
5494
0
        }
5495
0
      }
5496
16.4M
    else
5497
16.4M
#endif
5498
5499
    /* No UTF-8 support, or not in UTF-8 mode: count is code unit count */
5500
5501
16.4M
      {
5502
16.4M
      if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
5503
15.8M
      Feptr -= number;
5504
15.8M
      }
5505
5506
    /* Save the earliest consulted character, then skip to next opcode */
5507
5508
15.8M
    if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
5509
15.8M
    Fecode += 1 + LINK_SIZE;
5510
15.8M
    break;
5511
5512
5513
    /* ===================================================================== */
5514
    /* An alternation is the end of a branch; scan along to find the end of the
5515
    bracketed group. */
5516
5517
44.9M
    case OP_ALT:
5518
1.51G
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5519
44.9M
    break;
5520
5521
5522
    /* ===================================================================== */
5523
    /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
5524
    starting frame was added to the chained frames in order to remember the
5525
    starting subject position for the group. */
5526
5527
28.4M
    case OP_KET:
5528
28.4M
    case OP_KETRMIN:
5529
61.6M
    case OP_KETRMAX:
5530
61.6M
    case OP_KETRPOS:
5531
5532
61.6M
    bracode = Fecode - GET(Fecode, 1);
5533
5534
    /* Point N to the frame at the start of the most recent group.
5535
    Remember the subject pointer at the start of the group. */
5536
5537
61.6M
    if (*bracode != OP_BRA && *bracode != OP_COND)
5538
56.2M
      {
5539
56.2M
      N = (heapframe *)((char *)mb->match_frames + Flast_group_offset);
5540
56.2M
      P = (heapframe *)((char *)N - frame_size);
5541
56.2M
      Flast_group_offset = P->last_group_offset;
5542
5543
#ifdef DEBUG_SHOW_RMATCH
5544
      fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
5545
        N->rdepth, N->group_frame_type,
5546
        (char *)P->eptr - (char *)mb->start_subject);
5547
#endif
5548
5549
      /* If we are at the end of an assertion that is a condition, return a
5550
      match, discarding any intermediate backtracking points. Copy back the
5551
      mark setting and the captures into the frame before N so that they are
5552
      set on return. Doing this for all assertions, both positive and negative,
5553
      seems to match what Perl does. */
5554
5555
56.2M
      if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
5556
0
        {
5557
0
        memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
5558
0
          Foffset_top * sizeof(PCRE2_SIZE));
5559
0
        P->offset_top = Foffset_top;
5560
0
        P->mark = Fmark;
5561
0
        Fback_frame = (char *)F - (char *)P;
5562
0
        RRETURN(MATCH_MATCH);
5563
0
        }
5564
56.2M
      }
5565
5.42M
    else P = NULL;   /* Indicates starting frame not recorded */
5566
5567
    /* The group was not a conditional assertion. */
5568
5569
61.6M
    switch (*bracode)
5570
61.6M
      {
5571
5.42M
      case OP_BRA:    /* No need to do anything for these */
5572
5.42M
      case OP_COND:
5573
5.42M
      case OP_SCOND:
5574
5.42M
      break;
5575
5576
      /* Non-atomic positive assertions are like OP_BRA, except that the
5577
      subject pointer must be put back to where it was at the start of the
5578
      assertion. */
5579
5580
0
      case OP_ASSERT_NA:
5581
10
      case OP_ASSERTBACK_NA:
5582
10
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
5583
10
      Feptr = P->eptr;
5584
10
      break;
5585
5586
      /* Atomic positive assertions are like OP_ONCE, except that in addition
5587
      the subject pointer must be put back to where it was at the start of the
5588
      assertion. */
5589
5590
154k
      case OP_ASSERT:
5591
154k
      case OP_ASSERTBACK:
5592
154k
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
5593
154k
      Feptr = P->eptr;
5594
      /* Fall through */
5595
5596
      /* For an atomic group, discard internal backtracking points. We must
5597
      also ensure that any remaining branches within the top-level of the group
5598
      are not tried. Do this by adjusting the code pointer within the backtrack
5599
      frame so that it points to the final branch. */
5600
5601
154k
      case OP_ONCE:
5602
154k
      Fback_frame = ((char *)F - (char *)P);
5603
154k
      for (;;)
5604
6.50M
        {
5605
6.50M
        uint32_t y = GET(P->ecode,1);
5606
6.50M
        if ((P->ecode)[y] != OP_ALT) break;
5607
6.35M
        P->ecode += y;
5608
6.35M
        }
5609
154k
      break;
5610
5611
      /* A matching negative assertion returns MATCH, which is turned into
5612
      NOMATCH at the assertion level. */
5613
5614
0
      case OP_ASSERT_NOT:
5615
270k
      case OP_ASSERTBACK_NOT:
5616
270k
      RRETURN(MATCH_MATCH);
5617
5618
      /* At the end of a script run, apply the script-checking rules. This code
5619
      will never by exercised if Unicode support it not compiled, because in
5620
      that environment script runs cause an error at compile time. */
5621
5622
0
      case OP_SCRIPT_RUN:
5623
0
      if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
5624
0
      break;
5625
5626
      /* Whole-pattern recursion is coded as a recurse into group 0, so it
5627
      won't be picked up here. Instead, we catch it when the OP_END is reached.
5628
      Other recursion is handled here. */
5629
5630
23.9M
      case OP_CBRA:
5631
23.9M
      case OP_CBRAPOS:
5632
25.7M
      case OP_SCBRA:
5633
25.7M
      case OP_SCBRAPOS:
5634
25.7M
      number = GET2(bracode, 1+LINK_SIZE);
5635
5636
      /* Handle a recursively called group. We reinstate the previous set of
5637
      captures and then carry on after the recursion call. */
5638
5639
25.7M
      if (Fcurrent_recurse == number)
5640
0
        {
5641
0
        P = (heapframe *)((char *)N - frame_size);
5642
0
        memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
5643
0
          P->offset_top * sizeof(PCRE2_SIZE));
5644
0
        Foffset_top = P->offset_top;
5645
0
        Fcapture_last = P->capture_last;
5646
0
        Fcurrent_recurse = P->current_recurse;
5647
0
        Fecode = P->ecode + 1 + LINK_SIZE;
5648
0
        continue;  /* With next opcode */
5649
0
        }
5650
5651
      /* Deal with actual capturing. */
5652
5653
25.7M
      offset = (number << 1) - 2;
5654
25.7M
      Fcapture_last = number;
5655
25.7M
      Fovector[offset] = P->eptr - mb->start_subject;
5656
25.7M
      Fovector[offset+1] = Feptr - mb->start_subject;
5657
25.7M
      if (offset >= Foffset_top) Foffset_top = offset + 2;
5658
25.7M
      break;
5659
61.6M
      }  /* End actions relating to the starting opcode */
5660
5661
    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
5662
    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
5663
    at a time from the outer level. This must precede the empty string test -
5664
    in this case that test is done at the outer level. */
5665
5666
61.3M
    if (*Fecode == OP_KETRPOS)
5667
16
      {
5668
16
      memcpy((char *)P + offsetof(heapframe, eptr),
5669
16
             (char *)F + offsetof(heapframe, eptr),
5670
16
             frame_copy_size);
5671
16
      RRETURN(MATCH_KETRPOS);
5672
0
      }
5673
5674
    /* Handle the different kinds of closing brackets. A non-repeating ket
5675
    needs no special action, just continuing at this level. This also happens
5676
    for the repeating kets if the group matched no characters, in order to
5677
    forcibly break infinite loops. Otherwise, the repeating kets try the rest
5678
    of the pattern or restart from the preceding bracket, in the appropriate
5679
    order. */
5680
5681
61.3M
    if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
5682
1.39M
      {
5683
1.39M
      if (Fop == OP_KETRMIN)
5684
5.77k
        {
5685
5.77k
        RMATCH(Fecode + 1 + LINK_SIZE, RM6);
5686
5.77k
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5687
5.77k
        Fecode -= GET(Fecode, 1);
5688
5.77k
        break;   /* End of ket processing */
5689
5.77k
        }
5690
5691
      /* Repeat the maximum number of times (KETRMAX) */
5692
5693
1.38M
      RMATCH(bracode, RM7);
5694
1.38M
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5695
1.38M
      }
5696
5697
    /* Carry on at this level for a non-repeating ket, or after matching an
5698
    empty string, or after repeating for a maximum number of times. */
5699
5700
61.3M
    Fecode += 1 + LINK_SIZE;
5701
61.3M
    break;
5702
5703
5704
    /* ===================================================================== */
5705
    /* Start and end of line assertions, not multiline mode. */
5706
5707
2.85M
    case OP_CIRC:   /* Start of line, unless PCRE2_NOTBOL is set. */
5708
2.85M
    if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
5709
1.74M
      RRETURN(MATCH_NOMATCH);
5710
1.74M
    Fecode++;
5711
1.74M
    break;
5712
5713
145k
    case OP_SOD:    /* Unconditional start of subject */
5714
145k
    if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
5715
680
    Fecode++;
5716
680
    break;
5717
5718
    /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
5719
    terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
5720
5721
177M
    case OP_DOLL:
5722
177M
    if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
5723
177M
    if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
5724
5725
    /* Fall through */
5726
    /* Unconditional end of subject assertion (\z) */
5727
5728
6.63k
    case OP_EOD:
5729
6.63k
    if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
5730
1.13k
    if (mb->partial != 0)
5731
0
      {
5732
0
      mb->hitend = TRUE;
5733
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5734
0
      }
5735
1.13k
    Fecode++;
5736
1.13k
    break;
5737
5738
    /* End of subject or ending \n assertion (\Z) */
5739
5740
61.1k
    case OP_EODN:
5741
177M
    ASSERT_NL_OR_EOS:
5742
177M
    if (Feptr < mb->end_subject &&
5743
177M
        (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen))
5744
176M
      {
5745
176M
      if (mb->partial != 0 &&
5746
176M
          Feptr + 1 >= mb->end_subject &&
5747
176M
          NLBLOCK->nltype == NLTYPE_FIXED &&
5748
176M
          NLBLOCK->nllen == 2 &&
5749
176M
          UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
5750
0
        {
5751
0
        mb->hitend = TRUE;
5752
0
        if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5753
0
        }
5754
176M
      RRETURN(MATCH_NOMATCH);
5755
0
      }
5756
5757
    /* Either at end of string or \n before end. */
5758
5759
1.27M
    if (mb->partial != 0)
5760
0
      {
5761
0
      mb->hitend = TRUE;
5762
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5763
0
      }
5764
1.27M
    Fecode++;
5765
1.27M
    break;
5766
5767
5768
    /* ===================================================================== */
5769
    /* Start and end of line assertions, multiline mode. */
5770
5771
    /* Start of subject unless notbol, or after any newline except for one at
5772
    the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
5773
5774
8.87M
    case OP_CIRCM:
5775
8.87M
    if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
5776
8.87M
      RRETURN(MATCH_NOMATCH);
5777
8.87M
    if (Feptr != mb->start_subject &&
5778
8.87M
        ((Feptr == mb->end_subject &&
5779
8.80M
           (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
5780
8.80M
         !WAS_NEWLINE(Feptr)))
5781
8.63M
      RRETURN(MATCH_NOMATCH);
5782
233k
    Fecode++;
5783
233k
    break;
5784
5785
    /* Assert before any newline, or before end of subject unless noteol is
5786
    set. */
5787
5788
121k
    case OP_DOLLM:
5789
121k
    if (Feptr < mb->end_subject)
5790
120k
      {
5791
120k
      if (!IS_NEWLINE(Feptr))
5792
118k
        {
5793
118k
        if (mb->partial != 0 &&
5794
118k
            Feptr + 1 >= mb->end_subject &&
5795
118k
            NLBLOCK->nltype == NLTYPE_FIXED &&
5796
118k
            NLBLOCK->nllen == 2 &&
5797
118k
            UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
5798
0
          {
5799
0
          mb->hitend = TRUE;
5800
0
          if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5801
0
          }
5802
118k
        RRETURN(MATCH_NOMATCH);
5803
0
        }
5804
120k
      }
5805
1.60k
    else
5806
1.60k
      {
5807
1.60k
      if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
5808
1.60k
      SCHECK_PARTIAL();
5809
1.60k
      }
5810
3.73k
    Fecode++;
5811
3.73k
    break;
5812
5813
5814
    /* ===================================================================== */
5815
    /* Start of match assertion */
5816
5817
19.1M
    case OP_SOM:
5818
19.1M
    if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
5819
22.2k
    Fecode++;
5820
22.2k
    break;
5821
5822
5823
    /* ===================================================================== */
5824
    /* Reset the start of match point */
5825
5826
334k
    case OP_SET_SOM:
5827
334k
    Fstart_match = Feptr;
5828
334k
    Fecode++;
5829
334k
    break;
5830
5831
5832
    /* ===================================================================== */
5833
    /* Word boundary assertions. Find out if the previous and current
5834
    characters are "word" characters. It takes a bit more work in UTF mode.
5835
    Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
5836
    not set. When it is set, use Unicode properties if available, even when not
5837
    in UTF mode. Remember the earliest and latest consulted characters. */
5838
5839
157k
    case OP_NOT_WORD_BOUNDARY:
5840
5.85M
    case OP_WORD_BOUNDARY:
5841
5.85M
    if (Feptr == mb->check_subject) prev_is_word = FALSE; else
5842
5.84M
      {
5843
5.84M
      PCRE2_SPTR lastptr = Feptr - 1;
5844
5.84M
#ifdef SUPPORT_UNICODE
5845
5.84M
      if (utf)
5846
0
        {
5847
0
        BACKCHAR(lastptr);
5848
0
        GETCHAR(fc, lastptr);
5849
0
        }
5850
5.84M
      else
5851
5.84M
#endif  /* SUPPORT_UNICODE */
5852
5.84M
      fc = *lastptr;
5853
5.84M
      if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
5854
5.84M
#ifdef SUPPORT_UNICODE
5855
5.84M
      if ((mb->poptions & PCRE2_UCP) != 0)
5856
0
        {
5857
0
        if (fc == '_') prev_is_word = TRUE; else
5858
0
          {
5859
0
          int cat = UCD_CATEGORY(fc);
5860
0
          prev_is_word = (cat == ucp_L || cat == ucp_N);
5861
0
          }
5862
0
        }
5863
5.84M
      else
5864
5.84M
#endif  /* SUPPORT_UNICODE */
5865
5.84M
      prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
5866
5.84M
      }
5867
5868
    /* Get status of next character */
5869
5870
5.85M
    if (Feptr >= mb->end_subject)
5871
36.0k
      {
5872
36.0k
      SCHECK_PARTIAL();
5873
36.0k
      cur_is_word = FALSE;
5874
36.0k
      }
5875
5.81M
    else
5876
5.81M
      {
5877
5.81M
      PCRE2_SPTR nextptr = Feptr + 1;
5878
5.81M
#ifdef SUPPORT_UNICODE
5879
5.81M
      if (utf)
5880
0
        {
5881
0
        FORWARDCHARTEST(nextptr, mb->end_subject);
5882
0
        GETCHAR(fc, Feptr);
5883
0
        }
5884
5.81M
      else
5885
5.81M
#endif  /* SUPPORT_UNICODE */
5886
5.81M
      fc = *Feptr;
5887
5.81M
      if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
5888
5.81M
#ifdef SUPPORT_UNICODE
5889
5.81M
      if ((mb->poptions & PCRE2_UCP) != 0)
5890
0
        {
5891
0
        if (fc == '_') cur_is_word = TRUE; else
5892
0
          {
5893
0
          int cat = UCD_CATEGORY(fc);
5894
0
          cur_is_word = (cat == ucp_L || cat == ucp_N);
5895
0
          }
5896
0
        }
5897
5.81M
      else
5898
5.81M
#endif  /* SUPPORT_UNICODE */
5899
5.81M
      cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
5900
5.81M
      }
5901
5902
    /* Now see if the situation is what we want */
5903
5904
5.85M
    if ((*Fecode++ == OP_WORD_BOUNDARY)?
5905
5.69M
         cur_is_word == prev_is_word : cur_is_word != prev_is_word)
5906
4.08M
      RRETURN(MATCH_NOMATCH);
5907
1.76M
    break;
5908
5909
5910
    /* ===================================================================== */
5911
    /* Backtracking (*VERB)s, with and without arguments. Note that if the
5912
    pattern is successfully matched, we do not come back from RMATCH. */
5913
5914
0
    case OP_MARK:
5915
0
    Fmark = mb->nomatch_mark = Fecode + 2;
5916
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
5917
5918
    /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
5919
    argument, and we must check whether that argument matches this MARK's
5920
    argument. It is passed back in mb->verb_skip_ptr. If it does match, we
5921
    return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
5922
    position that corresponds to this mark. Otherwise, pass back the return
5923
    code unaltered. */
5924
5925
0
    if (rrc == MATCH_SKIP_ARG &&
5926
0
             PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
5927
0
      {
5928
0
      mb->verb_skip_ptr = Feptr;   /* Pass back current position */
5929
0
      RRETURN(MATCH_SKIP);
5930
0
      }
5931
0
    RRETURN(rrc);
5932
5933
0
    case OP_FAIL:
5934
0
    RRETURN(MATCH_NOMATCH);
5935
5936
    /* Record the current recursing group number in mb->verb_current_recurse
5937
    when a backtracking return such as MATCH_COMMIT is given. This enables the
5938
    recurse processing to catch verbs from within the recursion. */
5939
5940
0
    case OP_COMMIT:
5941
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
5942
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5943
0
    mb->verb_current_recurse = Fcurrent_recurse;
5944
0
    RRETURN(MATCH_COMMIT);
5945
5946
0
    case OP_COMMIT_ARG:
5947
0
    Fmark = mb->nomatch_mark = Fecode + 2;
5948
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
5949
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5950
0
    mb->verb_current_recurse = Fcurrent_recurse;
5951
0
    RRETURN(MATCH_COMMIT);
5952
5953
0
    case OP_PRUNE:
5954
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
5955
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5956
0
    mb->verb_current_recurse = Fcurrent_recurse;
5957
0
    RRETURN(MATCH_PRUNE);
5958
5959
0
    case OP_PRUNE_ARG:
5960
0
    Fmark = mb->nomatch_mark = Fecode + 2;
5961
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
5962
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5963
0
    mb->verb_current_recurse = Fcurrent_recurse;
5964
0
    RRETURN(MATCH_PRUNE);
5965
5966
0
    case OP_SKIP:
5967
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
5968
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5969
0
    mb->verb_skip_ptr = Feptr;   /* Pass back current position */
5970
0
    mb->verb_current_recurse = Fcurrent_recurse;
5971
0
    RRETURN(MATCH_SKIP);
5972
5973
    /* Note that, for Perl compatibility, SKIP with an argument does NOT set
5974
    nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
5975
    not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
5976
    that failed and any that precede it (either they also failed, or were not
5977
    triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
5978
    SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
5979
    set to the count of the one that failed. */
5980
5981
0
    case OP_SKIP_ARG:
5982
0
    mb->skip_arg_count++;
5983
0
    if (mb->skip_arg_count <= mb->ignore_skip_arg)
5984
0
      {
5985
0
      Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
5986
0
      break;
5987
0
      }
5988
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
5989
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5990
5991
    /* Pass back the current skip name and return the special MATCH_SKIP_ARG
5992
    return code. This will either be caught by a matching MARK, or get to the
5993
    top, where it causes a rematch with mb->ignore_skip_arg set to the value of
5994
    mb->skip_arg_count. */
5995
5996
0
    mb->verb_skip_ptr = Fecode + 2;
5997
0
    mb->verb_current_recurse = Fcurrent_recurse;
5998
0
    RRETURN(MATCH_SKIP_ARG);
5999
6000
    /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
6001
    the branch in which it occurs can be determined. */
6002
6003
0
    case OP_THEN:
6004
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
6005
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6006
0
    mb->verb_ecode_ptr = Fecode;
6007
0
    mb->verb_current_recurse = Fcurrent_recurse;
6008
0
    RRETURN(MATCH_THEN);
6009
6010
0
    case OP_THEN_ARG:
6011
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6012
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
6013
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6014
0
    mb->verb_ecode_ptr = Fecode;
6015
0
    mb->verb_current_recurse = Fcurrent_recurse;
6016
0
    RRETURN(MATCH_THEN);
6017
6018
6019
    /* ===================================================================== */
6020
    /* There's been some horrible disaster. Arrival here can only mean there is
6021
    something seriously wrong in the code above or the OP_xxx definitions. */
6022
6023
0
    default:
6024
0
    return PCRE2_ERROR_INTERNAL;
6025
1.77G
    }
6026
6027
  /* Do not insert any code in here without much thought; it is assumed
6028
  that "continue" in the code above comes out to here to repeat the main
6029
  loop. */
6030
6031
1.77G
  }  /* End of main loop */
6032
/* Control never reaches here */
6033
6034
6035
/* ========================================================================= */
6036
/* The RRETURN() macro jumps here. The number that is saved in Freturn_id
6037
indicates which label we actually want to return to. The value in Frdepth is
6038
the index number of the frame in the vector. The return value has been placed
6039
in rrc. */
6040
6041
1.08G
#define LBL(val) case val: goto L_RM##val;
6042
6043
1.13G
RETURN_SWITCH:
6044
1.13G
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6045
1.13G
if (Frdepth == 0) return rrc;                     /* Exit from the top level */
6046
1.08G
F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
6047
1.08G
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
6048
6049
#ifdef DEBUG_SHOW_RMATCH
6050
fprintf(stderr, "++ RETURN %d to %d\n", rrc, Freturn_id);
6051
#endif
6052
6053
1.08G
switch (Freturn_id)
6054
1.08G
  {
6055
155M
  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6056
15.3M
  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
6057
2.57M
  LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
6058
143k
  LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
6059
628M
  LBL(33) LBL(34) LBL(35) LBL(36)
6060
6061
0
#ifdef SUPPORT_WIDE_CHARS
6062
615
  LBL(100) LBL(101)
6063
0
#endif
6064
6065
0
#ifdef SUPPORT_UNICODE
6066
0
  LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
6067
134k
  LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
6068
72.5M
  LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
6069
2.78M
  LBL(221) LBL(222)
6070
0
#endif
6071
6072
0
  default:
6073
0
  return PCRE2_ERROR_INTERNAL;
6074
1.08G
  }
6075
1.08G
#undef LBL
6076
1.08G
}
6077
6078
6079
/*************************************************
6080
*           Match a Regular Expression           *
6081
*************************************************/
6082
6083
/* This function applies a compiled pattern to a subject string and picks out
6084
portions of the string if it matches. Two elements in the vector are set for
6085
each substring: the offsets to the start and end of the substring.
6086
6087
Arguments:
6088
  code            points to the compiled expression
6089
  subject         points to the subject string
6090
  length          length of subject string (may contain binary zeros)
6091
  start_offset    where to start in the subject string
6092
  options         option bits
6093
  match_data      points to a match_data block
6094
  mcontext        points a PCRE2 context
6095
6096
Returns:          > 0 => success; value is the number of ovector pairs filled
6097
                  = 0 => success, but ovector is not big enough
6098
                  = -1 => failed to match (PCRE2_ERROR_NOMATCH)
6099
                  = -2 => partial match (PCRE2_ERROR_PARTIAL)
6100
                  < -2 => some kind of unexpected problem
6101
*/
6102
6103
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
6104
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6105
  PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6106
  pcre2_match_context *mcontext)
6107
3.71M
{
6108
3.71M
int rc;
6109
3.71M
int was_zero_terminated = 0;
6110
3.71M
const uint8_t *start_bits = NULL;
6111
3.71M
const pcre2_real_code *re = (const pcre2_real_code *)code;
6112
6113
3.71M
BOOL anchored;
6114
3.71M
BOOL firstline;
6115
3.71M
BOOL has_first_cu = FALSE;
6116
3.71M
BOOL has_req_cu = FALSE;
6117
3.71M
BOOL startline;
6118
6119
3.71M
#if PCRE2_CODE_UNIT_WIDTH == 8
6120
3.71M
PCRE2_SPTR memchr_found_first_cu;
6121
3.71M
PCRE2_SPTR memchr_found_first_cu2;
6122
3.71M
#endif
6123
6124
3.71M
PCRE2_UCHAR first_cu = 0;
6125
3.71M
PCRE2_UCHAR first_cu2 = 0;
6126
3.71M
PCRE2_UCHAR req_cu = 0;
6127
3.71M
PCRE2_UCHAR req_cu2 = 0;
6128
6129
3.71M
PCRE2_SPTR bumpalong_limit;
6130
3.71M
PCRE2_SPTR end_subject;
6131
3.71M
PCRE2_SPTR true_end_subject;
6132
3.71M
PCRE2_SPTR start_match = subject + start_offset;
6133
3.71M
PCRE2_SPTR req_cu_ptr = start_match - 1;
6134
3.71M
PCRE2_SPTR start_partial;
6135
3.71M
PCRE2_SPTR match_partial;
6136
6137
#ifdef SUPPORT_JIT
6138
BOOL use_jit;
6139
#endif
6140
6141
/* This flag is needed even when Unicode is not supported for convenience
6142
(it is used by the IS_NEWLINE macro). */
6143
6144
3.71M
BOOL utf = FALSE;
6145
6146
3.71M
#ifdef SUPPORT_UNICODE
6147
3.71M
BOOL ucp = FALSE;
6148
3.71M
BOOL allow_invalid;
6149
3.71M
uint32_t fragment_options = 0;
6150
#ifdef SUPPORT_JIT
6151
BOOL jit_checked_utf = FALSE;
6152
#endif
6153
3.71M
#endif  /* SUPPORT_UNICODE */
6154
6155
3.71M
PCRE2_SIZE frame_size;
6156
6157
/* We need to have mb as a pointer to a match block, because the IS_NEWLINE
6158
macro is used below, and it expects NLBLOCK to be defined as a pointer. */
6159
6160
3.71M
pcre2_callout_block cb;
6161
3.71M
match_block actual_match_block;
6162
3.71M
match_block *mb = &actual_match_block;
6163
6164
/* Allocate an initial vector of backtracking frames on the stack. If this
6165
proves to be too small, it is replaced by a larger one on the heap. To get a
6166
vector of the size required that is aligned for pointers, allocate it as a
6167
vector of pointers. */
6168
6169
3.71M
PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
6170
3.71M
    PCRE2_KEEP_UNINITIALIZED;
6171
3.71M
mb->stack_frames = (heapframe *)stack_frames_vector;
6172
6173
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
6174
subject string. */
6175
6176
3.71M
if (length == PCRE2_ZERO_TERMINATED)
6177
0
  {
6178
0
  length = PRIV(strlen)(subject);
6179
0
  was_zero_terminated = 1;
6180
0
  }
6181
3.71M
true_end_subject = end_subject = subject + length;
6182
6183
/* Plausibility checks */
6184
6185
3.71M
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
6186
3.71M
if (code == NULL || subject == NULL || match_data == NULL)
6187
0
  return PCRE2_ERROR_NULL;
6188
3.71M
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
6189
6190
/* Check that the first field in the block is the magic number. */
6191
6192
3.71M
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
6193
6194
/* Check the code unit width. */
6195
6196
3.71M
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
6197
0
  return PCRE2_ERROR_BADMODE;
6198
6199
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
6200
options variable for this function. Users of PCRE2 who are not calling the
6201
function directly would like to have a way of setting these flags, in the same
6202
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
6203
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
6204
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
6205
transfer to the options for this function. The bits are guaranteed to be
6206
adjacent, but do not have the same values. This bit of Boolean trickery assumes
6207
that the match-time bits are not more significant than the flag bits. If by
6208
accident this is not the case, a compile-time division by zero error will
6209
occur. */
6210
6211
11.1M
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
6212
7.42M
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
6213
3.71M
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
6214
3.71M
#undef FF
6215
3.71M
#undef OO
6216
6217
/* If the pattern was successfully studied with JIT support, we will run the
6218
JIT executable instead of the rest of this function. Most options must be set
6219
at compile time for the JIT code to be usable. */
6220
6221
#ifdef SUPPORT_JIT
6222
use_jit = (re->executable_jit != NULL &&
6223
          (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
6224
#endif
6225
6226
/* Initialize UTF/UCP parameters. */
6227
6228
3.71M
#ifdef SUPPORT_UNICODE
6229
3.71M
utf = (re->overall_options & PCRE2_UTF) != 0;
6230
3.71M
allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
6231
3.71M
ucp = (re->overall_options & PCRE2_UCP) != 0;
6232
3.71M
#endif  /* SUPPORT_UNICODE */
6233
6234
/* Convert the partial matching flags into an integer. */
6235
6236
3.71M
mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
6237
3.71M
              ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
6238
6239
/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
6240
time. */
6241
6242
3.71M
if (mb->partial != 0 &&
6243
3.71M
   ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
6244
0
  return PCRE2_ERROR_BADOPTION;
6245
6246
/* It is an error to set an offset limit without setting the flag at compile
6247
time. */
6248
6249
3.71M
if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
6250
3.71M
     (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
6251
0
  return PCRE2_ERROR_BADOFFSETLIMIT;
6252
6253
/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
6254
free the memory that was obtained. Set the field to NULL for no match cases. */
6255
6256
3.71M
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
6257
0
  {
6258
0
  match_data->memctl.free((void *)match_data->subject,
6259
0
    match_data->memctl.memory_data);
6260
0
  match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
6261
0
  }
6262
3.71M
match_data->subject = NULL;
6263
6264
/* Zero the error offset in case the first code unit is invalid UTF. */
6265
6266
3.71M
match_data->startchar = 0;
6267
6268
6269
/* ============================= JIT matching ============================== */
6270
6271
/* Prepare for JIT matching. Check a UTF string for validity unless no check is
6272
requested or invalid UTF can be handled. We check only the portion of the
6273
subject that might be be inspected during matching - from the offset minus the
6274
maximum lookbehind to the given length. This saves time when a small part of a
6275
large subject is being matched by the use of a starting offset. Note that the
6276
maximum lookbehind is a number of characters, not code units. */
6277
6278
#ifdef SUPPORT_JIT
6279
if (use_jit)
6280
  {
6281
#ifdef SUPPORT_UNICODE
6282
  if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
6283
    {
6284
#if PCRE2_CODE_UNIT_WIDTH != 32
6285
    unsigned int i;
6286
#endif
6287
6288
    /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6289
    character start. */
6290
6291
#if PCRE2_CODE_UNIT_WIDTH != 32
6292
    if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6293
      {
6294
      if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6295
#if PCRE2_CODE_UNIT_WIDTH == 8
6296
      return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6297
#else
6298
      return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6299
#endif
6300
      }
6301
#endif  /* WIDTH != 32 */
6302
6303
    /* Move back by the maximum lookbehind, just in case it happens at the very
6304
    start of matching. */
6305
6306
#if PCRE2_CODE_UNIT_WIDTH != 32
6307
    for (i = re->max_lookbehind; i > 0 && start_match > subject; i--)
6308
      {
6309
      start_match--;
6310
      while (start_match > subject &&
6311
#if PCRE2_CODE_UNIT_WIDTH == 8
6312
      (*start_match & 0xc0) == 0x80)
6313
#else  /* 16-bit */
6314
      (*start_match & 0xfc00) == 0xdc00)
6315
#endif
6316
        start_match--;
6317
      }
6318
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6319
6320
    /* In the 32-bit library, one code unit equals one character. However,
6321
    we cannot just subtract the lookbehind and then compare pointers, because
6322
    a very large lookbehind could create an invalid pointer. */
6323
6324
    if (start_offset >= re->max_lookbehind)
6325
      start_match -= re->max_lookbehind;
6326
    else
6327
      start_match = subject;
6328
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6329
6330
    /* Validate the relevant portion of the subject. Adjust the offset of an
6331
    invalid code point to be an absolute offset in the whole string. */
6332
6333
    match_data->rc = PRIV(valid_utf)(start_match,
6334
      length - (start_match - subject), &(match_data->startchar));
6335
    if (match_data->rc != 0)
6336
      {
6337
      match_data->startchar += start_match - subject;
6338
      return match_data->rc;
6339
      }
6340
    jit_checked_utf = TRUE;
6341
    }
6342
#endif  /* SUPPORT_UNICODE */
6343
6344
  /* If JIT returns BADOPTION, which means that the selected complete or
6345
  partial matching mode was not compiled, fall through to the interpreter. */
6346
6347
  rc = pcre2_jit_match(code, subject, length, start_offset, options,
6348
    match_data, mcontext);
6349
  if (rc != PCRE2_ERROR_JIT_BADOPTION)
6350
    {
6351
    if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
6352
      {
6353
      length = CU2BYTES(length + was_zero_terminated);
6354
      match_data->subject = match_data->memctl.malloc(length,
6355
        match_data->memctl.memory_data);
6356
      if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
6357
      memcpy((void *)match_data->subject, subject, length);
6358
      match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
6359
      }
6360
    return rc;
6361
    }
6362
  }
6363
#endif  /* SUPPORT_JIT */
6364
6365
/* ========================= End of JIT matching ========================== */
6366
6367
6368
/* Proceed with non-JIT matching. The default is to allow lookbehinds to the
6369
start of the subject. A UTF check when there is a non-zero offset may change
6370
this. */
6371
6372
3.71M
mb->check_subject = subject;
6373
6374
/* If a UTF subject string was not checked for validity in the JIT code above,
6375
check it here, and handle support for invalid UTF strings. The check above
6376
happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
6377
If we get here in those circumstances, it means the subject string is valid,
6378
but for some reason JIT matching was not successful. There is no need to check
6379
the subject again.
6380
6381
We check only the portion of the subject that might be be inspected during
6382
matching - from the offset minus the maximum lookbehind to the given length.
6383
This saves time when a small part of a large subject is being matched by the
6384
use of a starting offset. Note that the maximum lookbehind is a number of
6385
characters, not code units.
6386
6387
Note also that support for invalid UTF forces a check, overriding the setting
6388
of PCRE2_NO_CHECK_UTF. */
6389
6390
3.71M
#ifdef SUPPORT_UNICODE
6391
3.71M
if (utf &&
6392
#ifdef SUPPORT_JIT
6393
    !jit_checked_utf &&
6394
#endif
6395
3.71M
    ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
6396
0
  {
6397
0
#if PCRE2_CODE_UNIT_WIDTH != 32
6398
0
  BOOL skipped_bad_start = FALSE;
6399
0
#endif
6400
6401
  /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6402
  character start. If we are handling invalid UTF, just skip over such code
6403
  units. Otherwise, give an appropriate error. */
6404
6405
0
#if PCRE2_CODE_UNIT_WIDTH != 32
6406
0
  if (allow_invalid)
6407
0
    {
6408
0
    while (start_match < end_subject && NOT_FIRSTCU(*start_match))
6409
0
      {
6410
0
      start_match++;
6411
0
      skipped_bad_start = TRUE;
6412
0
      }
6413
0
    }
6414
0
  else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6415
0
    {
6416
0
    if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6417
0
#if PCRE2_CODE_UNIT_WIDTH == 8
6418
0
    return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6419
#else
6420
    return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6421
#endif
6422
0
    }
6423
0
#endif  /* WIDTH != 32 */
6424
6425
  /* The mb->check_subject field points to the start of UTF checking;
6426
  lookbehinds can go back no further than this. */
6427
6428
0
  mb->check_subject = start_match;
6429
6430
  /* Move back by the maximum lookbehind, just in case it happens at the very
6431
  start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
6432
  units above. */
6433
6434
0
#if PCRE2_CODE_UNIT_WIDTH != 32
6435
0
  if (!skipped_bad_start)
6436
0
    {
6437
0
    unsigned int i;
6438
0
    for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
6439
0
      {
6440
0
      mb->check_subject--;
6441
0
      while (mb->check_subject > subject &&
6442
0
#if PCRE2_CODE_UNIT_WIDTH == 8
6443
0
      (*mb->check_subject & 0xc0) == 0x80)
6444
#else  /* 16-bit */
6445
      (*mb->check_subject & 0xfc00) == 0xdc00)
6446
#endif
6447
0
        mb->check_subject--;
6448
0
      }
6449
0
    }
6450
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6451
6452
  /* In the 32-bit library, one code unit equals one character. However,
6453
  we cannot just subtract the lookbehind and then compare pointers, because
6454
  a very large lookbehind could create an invalid pointer. */
6455
6456
  if (start_offset >= re->max_lookbehind)
6457
    mb->check_subject -= re->max_lookbehind;
6458
  else
6459
    mb->check_subject = subject;
6460
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6461
6462
  /* Validate the relevant portion of the subject. There's a loop in case we
6463
  encounter bad UTF in the characters preceding start_match which we are
6464
  scanning because of a lookbehind. */
6465
6466
0
  for (;;)
6467
0
    {
6468
0
    match_data->rc = PRIV(valid_utf)(mb->check_subject,
6469
0
      length - (mb->check_subject - subject), &(match_data->startchar));
6470
6471
0
    if (match_data->rc == 0) break;   /* Valid UTF string */
6472
6473
    /* Invalid UTF string. Adjust the offset to be an absolute offset in the
6474
    whole string. If we are handling invalid UTF strings, set end_subject to
6475
    stop before the bad code unit, and set the options to "not end of line".
6476
    Otherwise return the error. */
6477
6478
0
    match_data->startchar += mb->check_subject - subject;
6479
0
    if (!allow_invalid || match_data->rc > 0) return match_data->rc;
6480
0
    end_subject = subject + match_data->startchar;
6481
6482
    /* If the end precedes start_match, it means there is invalid UTF in the
6483
    extra code units we reversed over because of a lookbehind. Advance past the
6484
    first bad code unit, and then skip invalid character starting code units in
6485
    8-bit and 16-bit modes, and try again. */
6486
6487
0
    if (end_subject < start_match)
6488
0
      {
6489
0
      mb->check_subject = end_subject + 1;
6490
0
#if PCRE2_CODE_UNIT_WIDTH != 32
6491
0
      while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
6492
0
        mb->check_subject++;
6493
0
#endif
6494
0
      }
6495
6496
    /* Otherwise, set the not end of line option, and do the match. */
6497
6498
0
    else
6499
0
      {
6500
0
      fragment_options = PCRE2_NOTEOL;
6501
0
      break;
6502
0
      }
6503
0
    }
6504
0
  }
6505
3.71M
#endif  /* SUPPORT_UNICODE */
6506
6507
/* A NULL match context means "use a default context", but we take the memory
6508
control functions from the pattern. */
6509
6510
3.71M
if (mcontext == NULL)
6511
1.51M
  {
6512
1.51M
  mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
6513
1.51M
  mb->memctl = re->memctl;
6514
1.51M
  }
6515
2.19M
else mb->memctl = mcontext->memctl;
6516
6517
3.71M
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
6518
3.71M
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
6519
3.71M
startline = (re->flags & PCRE2_STARTLINE) != 0;
6520
3.71M
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
6521
3.71M
  true_end_subject : subject + mcontext->offset_limit;
6522
6523
/* Initialize and set up the fixed fields in the callout block, with a pointer
6524
in the match block. */
6525
6526
3.71M
mb->cb = &cb;
6527
3.71M
cb.version = 2;
6528
3.71M
cb.subject = subject;
6529
3.71M
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
6530
3.71M
cb.callout_flags = 0;
6531
6532
/* Fill in the remaining fields in the match block, except for moptions, which
6533
gets set later. */
6534
6535
3.71M
mb->callout = mcontext->callout;
6536
3.71M
mb->callout_data = mcontext->callout_data;
6537
6538
3.71M
mb->start_subject = subject;
6539
3.71M
mb->start_offset = start_offset;
6540
3.71M
mb->end_subject = end_subject;
6541
3.71M
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
6542
3.71M
mb->allowemptypartial = (re->max_lookbehind > 0) ||
6543
3.71M
    (re->flags & PCRE2_MATCH_EMPTY) != 0;
6544
3.71M
mb->poptions = re->overall_options;          /* Pattern options */
6545
3.71M
mb->ignore_skip_arg = 0;
6546
3.71M
mb->mark = mb->nomatch_mark = NULL;          /* In case never set */
6547
6548
/* The name table is needed for finding all the numbers associated with a
6549
given name, for condition testing. The code follows the name table. */
6550
6551
3.71M
mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
6552
3.71M
mb->name_count = re->name_count;
6553
3.71M
mb->name_entry_size = re->name_entry_size;
6554
3.71M
mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
6555
6556
/* Process the \R and newline settings. */
6557
6558
3.71M
mb->bsr_convention = re->bsr_convention;
6559
3.71M
mb->nltype = NLTYPE_FIXED;
6560
3.71M
switch(re->newline_convention)
6561
3.71M
  {
6562
0
  case PCRE2_NEWLINE_CR:
6563
0
  mb->nllen = 1;
6564
0
  mb->nl[0] = CHAR_CR;
6565
0
  break;
6566
6567
3.71M
  case PCRE2_NEWLINE_LF:
6568
3.71M
  mb->nllen = 1;
6569
3.71M
  mb->nl[0] = CHAR_NL;
6570
3.71M
  break;
6571
6572
0
  case PCRE2_NEWLINE_NUL:
6573
0
  mb->nllen = 1;
6574
0
  mb->nl[0] = CHAR_NUL;
6575
0
  break;
6576
6577
0
  case PCRE2_NEWLINE_CRLF:
6578
0
  mb->nllen = 2;
6579
0
  mb->nl[0] = CHAR_CR;
6580
0
  mb->nl[1] = CHAR_NL;
6581
0
  break;
6582
6583
0
  case PCRE2_NEWLINE_ANY:
6584
0
  mb->nltype = NLTYPE_ANY;
6585
0
  break;
6586
6587
0
  case PCRE2_NEWLINE_ANYCRLF:
6588
0
  mb->nltype = NLTYPE_ANYCRLF;
6589
0
  break;
6590
6591
0
  default: return PCRE2_ERROR_INTERNAL;
6592
3.71M
  }
6593
6594
/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
6595
vector at the end, whose size depends on the number of capturing parentheses in
6596
the pattern. It is not used at all if there are no capturing parentheses.
6597
6598
  frame_size             is the total size of each frame
6599
  mb->frame_vector_size  is the total usable size of the vector (rounded down
6600
                           to a whole number of frames)
6601
6602
The last of these is changed within the match() function if the frame vector
6603
has to be expanded. We therefore put it into the match block so that it is
6604
correct when calling match() more than once for non-anchored patterns. */
6605
6606
3.71M
frame_size = offsetof(heapframe, ovector) +
6607
3.71M
  re->top_bracket * 2 * sizeof(PCRE2_SIZE);
6608
6609
/* Limits set in the pattern override the match context only if they are
6610
smaller. */
6611
6612
3.71M
mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
6613
3.71M
  mcontext->heap_limit : re->limit_heap;
6614
6615
3.71M
mb->match_limit = (mcontext->match_limit < re->limit_match)?
6616
3.71M
  mcontext->match_limit : re->limit_match;
6617
6618
3.71M
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
6619
3.71M
  mcontext->depth_limit : re->limit_depth;
6620
6621
/* If a pattern has very many capturing parentheses, the frame size may be very
6622
large. Ensure that there are at least 10 available frames by getting an initial
6623
vector on the heap if necessary, except when the heap limit prevents this. Get
6624
fewer if possible. (The heap limit is in kibibytes.) */
6625
6626
3.71M
if (frame_size <= START_FRAMES_SIZE/10)
6627
3.71M
  {
6628
3.71M
  mb->match_frames = mb->stack_frames;   /* Initial frame vector on the stack */
6629
3.71M
  mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
6630
3.71M
  }
6631
0
else
6632
0
  {
6633
0
  mb->frame_vector_size = frame_size * 10;
6634
0
  if ((mb->frame_vector_size / 1024) > mb->heap_limit)
6635
0
    {
6636
0
    if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
6637
0
    mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
6638
0
    }
6639
0
  mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
6640
0
    mb->memctl.memory_data);
6641
0
  if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
6642
0
  }
6643
6644
3.71M
mb->match_frames_top =
6645
3.71M
  (heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
6646
6647
/* Write to the ovector within the first frame to mark every capture unset and
6648
to avoid uninitialized memory read errors when it is copied to a new frame. */
6649
6650
3.71M
memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
6651
3.71M
  re->top_bracket * 2 * sizeof(PCRE2_SIZE));
6652
6653
/* Pointers to the individual character tables */
6654
6655
3.71M
mb->lcc = re->tables + lcc_offset;
6656
3.71M
mb->fcc = re->tables + fcc_offset;
6657
3.71M
mb->ctypes = re->tables + ctypes_offset;
6658
6659
/* Set up the first code unit to match, if available. If there's no first code
6660
unit there may be a bitmap of possible first characters. */
6661
6662
3.71M
if ((re->flags & PCRE2_FIRSTSET) != 0)
6663
1.47M
  {
6664
1.47M
  has_first_cu = TRUE;
6665
1.47M
  first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
6666
1.47M
  if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
6667
7.20k
    {
6668
7.20k
    first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
6669
7.20k
#ifdef SUPPORT_UNICODE
6670
7.20k
#if PCRE2_CODE_UNIT_WIDTH == 8
6671
7.20k
    if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
6672
#else
6673
    if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
6674
#endif
6675
7.20k
#endif  /* SUPPORT_UNICODE */
6676
7.20k
    }
6677
1.47M
  }
6678
2.23M
else
6679
2.23M
  if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
6680
1.84M
    start_bits = re->start_bitmap;
6681
6682
/* There may also be a "last known required character" set. */
6683
6684
3.71M
if ((re->flags & PCRE2_LASTSET) != 0)
6685
2.47M
  {
6686
2.47M
  has_req_cu = TRUE;
6687
2.47M
  req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
6688
2.47M
  if ((re->flags & PCRE2_LASTCASELESS) != 0)
6689
41.0k
    {
6690
41.0k
    req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
6691
41.0k
#ifdef SUPPORT_UNICODE
6692
41.0k
#if PCRE2_CODE_UNIT_WIDTH == 8
6693
41.0k
    if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
6694
#else
6695
    if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
6696
#endif
6697
41.0k
#endif  /* SUPPORT_UNICODE */
6698
41.0k
    }
6699
2.47M
  }
6700
6701
6702
/* ==========================================================================*/
6703
6704
/* Loop for handling unanchored repeated matching attempts; for anchored regexs
6705
the loop runs just once. */
6706
6707
3.71M
#ifdef SUPPORT_UNICODE
6708
3.71M
FRAGMENT_RESTART:
6709
3.71M
#endif
6710
6711
3.71M
start_partial = match_partial = NULL;
6712
3.71M
mb->hitend = FALSE;
6713
6714
3.71M
#if PCRE2_CODE_UNIT_WIDTH == 8
6715
3.71M
memchr_found_first_cu = NULL;
6716
3.71M
memchr_found_first_cu2 = NULL;
6717
3.71M
#endif
6718
6719
3.71M
for(;;)
6720
52.9M
  {
6721
52.9M
  PCRE2_SPTR new_start_match;
6722
6723
  /* ----------------- Start of match optimizations ---------------- */
6724
6725
  /* There are some optimizations that avoid running the match if a known
6726
  starting point is not found, or if a known later code unit is not present.
6727
  However, there is an option (settable at compile time) that disables these,
6728
  for testing and for ensuring that all callouts do actually occur. */
6729
6730
52.9M
  if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
6731
52.9M
    {
6732
    /* If firstline is TRUE, the start of the match is constrained to the first
6733
    line of a multiline string. That is, the match must be before or at the
6734
    first newline following the start of matching. Temporarily adjust
6735
    end_subject so that we stop the scans for a first code unit at a newline.
6736
    If the match fails at the newline, later code breaks the loop. */
6737
6738
52.9M
    if (firstline)
6739
0
      {
6740
0
      PCRE2_SPTR t = start_match;
6741
0
#ifdef SUPPORT_UNICODE
6742
0
      if (utf)
6743
0
        {
6744
0
        while (t < end_subject && !IS_NEWLINE(t))
6745
0
          {
6746
0
          t++;
6747
0
          ACROSSCHAR(t < end_subject, t, t++);
6748
0
          }
6749
0
        }
6750
0
      else
6751
0
#endif
6752
0
      while (t < end_subject && !IS_NEWLINE(t)) t++;
6753
0
      end_subject = t;
6754
0
      }
6755
6756
    /* Anchored: check the first code unit if one is recorded. This may seem
6757
    pointless but it can help in detecting a no match case without scanning for
6758
    the required code unit. */
6759
6760
52.9M
    if (anchored)
6761
1.73M
      {
6762
1.73M
      if (has_first_cu || start_bits != NULL)
6763
1.71M
        {
6764
1.71M
        BOOL ok = start_match < end_subject;
6765
1.71M
        if (ok)
6766
1.71M
          {
6767
1.71M
          PCRE2_UCHAR c = UCHAR21TEST(start_match);
6768
1.71M
          ok = has_first_cu && (c == first_cu || c == first_cu2);
6769
1.71M
          if (!ok && start_bits != NULL)
6770
1.71M
            {
6771
#if PCRE2_CODE_UNIT_WIDTH != 8
6772
            if (c > 255) c = 255;
6773
#endif
6774
1.71M
            ok = (start_bits[c/8] & (1u << (c&7))) != 0;
6775
1.71M
            }
6776
1.71M
          }
6777
1.71M
        if (!ok)
6778
17.2k
          {
6779
17.2k
          rc = MATCH_NOMATCH;
6780
17.2k
          break;
6781
17.2k
          }
6782
1.71M
        }
6783
1.73M
      }
6784
6785
    /* Not anchored. Advance to a unique first code unit if there is one. */
6786
6787
51.2M
    else
6788
51.2M
      {
6789
51.2M
      if (has_first_cu)
6790
2.25M
        {
6791
2.25M
        if (first_cu != first_cu2)  /* Caseless */
6792
194k
          {
6793
          /* In 16-bit and 32_bit modes we have to do our own search, so can
6794
          look for both cases at once. */
6795
6796
#if PCRE2_CODE_UNIT_WIDTH != 8
6797
          PCRE2_UCHAR smc;
6798
          while (start_match < end_subject &&
6799
                (smc = UCHAR21TEST(start_match)) != first_cu &&
6800
                 smc != first_cu2)
6801
            start_match++;
6802
#else
6803
          /* In 8-bit mode, the use of memchr() gives a big speed up, even
6804
          though we have to call it twice in order to find the earliest
6805
          occurrence of the code unit in either of its cases. Caching is used
6806
          to remember the positions of previously found code units. This can
6807
          make a huge difference when the strings are very long and only one
6808
          case is actually present. */
6809
6810
194k
          PCRE2_SPTR pp1 = NULL;
6811
194k
          PCRE2_SPTR pp2 = NULL;
6812
194k
          PCRE2_SIZE searchlength = end_subject - start_match;
6813
6814
          /* If we haven't got a previously found position for first_cu, or if
6815
          the current starting position is later, we need to do a search. If
6816
          the code unit is not found, set it to the end. */
6817
6818
194k
          if (memchr_found_first_cu == NULL ||
6819
194k
              start_match > memchr_found_first_cu)
6820
110k
            {
6821
110k
            pp1 = memchr(start_match, first_cu, searchlength);
6822
110k
            memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
6823
110k
            }
6824
6825
          /* If the start is before a previously found position, use the
6826
          previous position, or NULL if a previous search failed. */
6827
6828
84.1k
          else pp1 = (memchr_found_first_cu == end_subject)? NULL :
6829
84.1k
            memchr_found_first_cu;
6830
6831
          /* Do the same thing for the other case. */
6832
6833
194k
          if (memchr_found_first_cu2 == NULL ||
6834
194k
              start_match > memchr_found_first_cu2)
6835
90.6k
            {
6836
90.6k
            pp2 = memchr(start_match, first_cu2, searchlength);
6837
90.6k
            memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
6838
90.6k
            }
6839
6840
103k
          else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
6841
103k
            memchr_found_first_cu2;
6842
6843
          /* Set the start to the end of the subject if neither case was found.
6844
          Otherwise, use the earlier found point. */
6845
6846
194k
          if (pp1 == NULL)
6847
16.3k
            start_match = (pp2 == NULL)? end_subject : pp2;
6848
178k
          else
6849
178k
            start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
6850
6851
194k
#endif  /* 8-bit handling */
6852
194k
          }
6853
6854
        /* The caseful case is much simpler. */
6855
6856
2.06M
        else
6857
2.06M
          {
6858
#if PCRE2_CODE_UNIT_WIDTH != 8
6859
          while (start_match < end_subject && UCHAR21TEST(start_match) !=
6860
                 first_cu)
6861
            start_match++;
6862
#else
6863
2.06M
          start_match = memchr(start_match, first_cu, end_subject - start_match);
6864
2.06M
          if (start_match == NULL) start_match = end_subject;
6865
2.06M
#endif
6866
2.06M
          }
6867
6868
        /* If we can't find the required first code unit, having reached the
6869
        true end of the subject, break the bumpalong loop, to force a match
6870
        failure, except when doing partial matching, when we let the next cycle
6871
        run at the end of the subject. To see why, consider the pattern
6872
        /(?<=abc)def/, which partially matches "abc", even though the string
6873
        does not contain the starting character "d". If we have not reached the
6874
        true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
6875
        temporarily modified) we also let the cycle run, because the matching
6876
        string is legitimately allowed to start with the first code unit of a
6877
        newline. */
6878
6879
2.25M
        if (mb->partial == 0 && start_match >= mb->end_subject)
6880
245k
          {
6881
245k
          rc = MATCH_NOMATCH;
6882
245k
          break;
6883
245k
          }
6884
2.25M
        }
6885
6886
      /* If there's no first code unit, advance to just after a linebreak for a
6887
      multiline match if required. */
6888
6889
48.9M
      else if (startline)
6890
1.05M
        {
6891
1.05M
        if (start_match > mb->start_subject + start_offset)
6892
922k
          {
6893
922k
#ifdef SUPPORT_UNICODE
6894
922k
          if (utf)
6895
0
            {
6896
0
            while (start_match < end_subject && !WAS_NEWLINE(start_match))
6897
0
              {
6898
0
              start_match++;
6899
0
              ACROSSCHAR(start_match < end_subject, start_match, start_match++);
6900
0
              }
6901
0
            }
6902
922k
          else
6903
922k
#endif
6904
34.8M
          while (start_match < end_subject && !WAS_NEWLINE(start_match))
6905
33.9M
            start_match++;
6906
6907
          /* If we have just passed a CR and the newline option is ANY or
6908
          ANYCRLF, and we are now at a LF, advance the match position by one
6909
          more code unit. */
6910
6911
922k
          if (start_match[-1] == CHAR_CR &&
6912
922k
               (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
6913
922k
               start_match < end_subject &&
6914
922k
               UCHAR21TEST(start_match) == CHAR_NL)
6915
0
            start_match++;
6916
922k
          }
6917
1.05M
        }
6918
6919
      /* If there's no first code unit or a requirement for a multiline line
6920
      start, advance to a non-unique first code unit if any have been
6921
      identified. The bitmap contains only 256 bits. When code units are 16 or
6922
      32 bits wide, all code units greater than 254 set the 255 bit. */
6923
6924
47.9M
      else if (start_bits != NULL)
6925
15.9M
        {
6926
106M
        while (start_match < end_subject)
6927
106M
          {
6928
106M
          uint32_t c = UCHAR21TEST(start_match);
6929
#if PCRE2_CODE_UNIT_WIDTH != 8
6930
          if (c > 255) c = 255;
6931
#endif
6932
106M
          if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
6933
90.3M
          start_match++;
6934
90.3M
          }
6935
6936
        /* See comment above in first_cu checking about the next few lines. */
6937
6938
15.9M
        if (mb->partial == 0 && start_match >= mb->end_subject)
6939
22.7k
          {
6940
22.7k
          rc = MATCH_NOMATCH;
6941
22.7k
          break;
6942
22.7k
          }
6943
15.9M
        }
6944
51.2M
      }   /* End first code unit handling */
6945
6946
    /* Restore fudged end_subject */
6947
6948
52.6M
    end_subject = mb->end_subject;
6949
6950
    /* The following two optimizations must be disabled for partial matching. */
6951
6952
52.6M
    if (mb->partial == 0)
6953
52.6M
      {
6954
52.6M
      PCRE2_SPTR p;
6955
6956
      /* The minimum matching length is a lower bound; no string of that length
6957
      may actually match the pattern. Although the value is, strictly, in
6958
      characters, we treat it as code units to avoid spending too much time in
6959
      this optimization. */
6960
6961
52.6M
      if (end_subject - start_match < re->minlength)
6962
271k
        {
6963
271k
        rc = MATCH_NOMATCH;
6964
271k
        break;
6965
271k
        }
6966
6967
      /* If req_cu is set, we know that that code unit must appear in the
6968
      subject for the (non-partial) match to succeed. If the first code unit is
6969
      set, req_cu must be later in the subject; otherwise the test starts at
6970
      the match point. This optimization can save a huge amount of backtracking
6971
      in patterns with nested unlimited repeats that aren't going to match.
6972
      Writing separate code for caseful/caseless versions makes it go faster,
6973
      as does using an autoincrement and backing off on a match. As in the case
6974
      of the first code unit, using memchr() in the 8-bit library gives a big
6975
      speed up. Unlike the first_cu check above, we do not need to call
6976
      memchr() twice in the caseless case because we only need to check for the
6977
      presence of the character in either case, not find the first occurrence.
6978
6979
      The search can be skipped if the code unit was found later than the
6980
      current starting point in a previous iteration of the bumpalong loop.
6981
6982
      HOWEVER: when the subject string is very, very long, searching to its end
6983
      can take a long time, and give bad performance on quite ordinary
6984
      anchored patterns. This showed up when somebody was matching something
6985
      like /^\d+C/ on a 32-megabyte string... so we don't do this when the
6986
      string is sufficiently long, but it's worth searching a lot more for
6987
      unanchored patterns. */
6988
6989
52.3M
      p = start_match + (has_first_cu? 1:0);
6990
52.3M
      if (has_req_cu && p > req_cu_ptr)
6991
2.80M
        {
6992
2.80M
        PCRE2_SIZE check_length = end_subject - start_match;
6993
6994
2.80M
        if (check_length < REQ_CU_MAX ||
6995
2.80M
              (!anchored && check_length < REQ_CU_MAX * 1000))
6996
2.80M
          {
6997
2.80M
          if (req_cu != req_cu2)  /* Caseless */
6998
138k
            {
6999
#if PCRE2_CODE_UNIT_WIDTH != 8
7000
            while (p < end_subject)
7001
              {
7002
              uint32_t pp = UCHAR21INCTEST(p);
7003
              if (pp == req_cu || pp == req_cu2) { p--; break; }
7004
              }
7005
#else  /* 8-bit code units */
7006
138k
            PCRE2_SPTR pp = p;
7007
138k
            p = memchr(pp, req_cu, end_subject - pp);
7008
138k
            if (p == NULL)
7009
39.8k
              {
7010
39.8k
              p = memchr(pp, req_cu2, end_subject - pp);
7011
39.8k
              if (p == NULL) p = end_subject;
7012
39.8k
              }
7013
138k
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
7014
138k
            }
7015
7016
          /* The caseful case */
7017
7018
2.66M
          else
7019
2.66M
            {
7020
#if PCRE2_CODE_UNIT_WIDTH != 8
7021
            while (p < end_subject)
7022
              {
7023
              if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
7024
              }
7025
7026
#else  /* 8-bit code units */
7027
2.66M
            p = memchr(p, req_cu, end_subject - p);
7028
2.66M
            if (p == NULL) p = end_subject;
7029
2.66M
#endif
7030
2.66M
            }
7031
7032
          /* If we can't find the required code unit, break the bumpalong loop,
7033
          forcing a match failure. */
7034
7035
2.80M
          if (p >= end_subject)
7036
504k
            {
7037
504k
            rc = MATCH_NOMATCH;
7038
504k
            break;
7039
504k
            }
7040
7041
          /* If we have found the required code unit, save the point where we
7042
          found it, so that we don't search again next time round the bumpalong
7043
          loop if the start hasn't yet passed this code unit. */
7044
7045
2.29M
          req_cu_ptr = p;
7046
2.29M
          }
7047
2.80M
        }
7048
52.3M
      }
7049
52.6M
    }
7050
7051
  /* ------------ End of start of match optimizations ------------ */
7052
7053
  /* Give no match if we have passed the bumpalong limit. */
7054
7055
51.8M
  if (start_match > bumpalong_limit)
7056
0
    {
7057
0
    rc = MATCH_NOMATCH;
7058
0
    break;
7059
0
    }
7060
7061
  /* OK, we can now run the match. If "hitend" is set afterwards, remember the
7062
  first starting point for which a partial match was found. */
7063
7064
51.8M
  cb.start_match = (PCRE2_SIZE)(start_match - subject);
7065
51.8M
  cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
7066
7067
51.8M
  mb->start_used_ptr = start_match;
7068
51.8M
  mb->last_used_ptr = start_match;
7069
51.8M
#ifdef SUPPORT_UNICODE
7070
51.8M
  mb->moptions = options | fragment_options;
7071
#else
7072
  mb->moptions = options;
7073
#endif
7074
51.8M
  mb->match_call_count = 0;
7075
51.8M
  mb->end_offset_top = 0;
7076
51.8M
  mb->skip_arg_count = 0;
7077
7078
51.8M
  rc = match(start_match, mb->start_code, match_data->ovector,
7079
51.8M
    match_data->oveccount, re->top_bracket, frame_size, mb);
7080
7081
51.8M
  if (mb->hitend && start_partial == NULL)
7082
0
    {
7083
0
    start_partial = mb->start_used_ptr;
7084
0
    match_partial = start_match;
7085
0
    }
7086
7087
51.8M
  switch(rc)
7088
51.8M
    {
7089
    /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7090
    the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7091
    entirely. The only way we can do that is to re-do the match at the same
7092
    point, with a flag to force SKIP with an argument to be ignored. Just
7093
    treating this case as NOMATCH does not work because it does not check other
7094
    alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7095
7096
0
    case MATCH_SKIP_ARG:
7097
0
    new_start_match = start_match;
7098
0
    mb->ignore_skip_arg = mb->skip_arg_count;
7099
0
    break;
7100
7101
    /* SKIP passes back the next starting point explicitly, but if it is no
7102
    greater than the match we have just done, treat it as NOMATCH. */
7103
7104
0
    case MATCH_SKIP:
7105
0
    if (mb->verb_skip_ptr > start_match)
7106
0
      {
7107
0
      new_start_match = mb->verb_skip_ptr;
7108
0
      break;
7109
0
      }
7110
    /* Fall through */
7111
7112
    /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7113
    exactly like PRUNE. Unset ignore SKIP-with-argument. */
7114
7115
49.3M
    case MATCH_NOMATCH:
7116
49.3M
    case MATCH_PRUNE:
7117
49.3M
    case MATCH_THEN:
7118
49.3M
    mb->ignore_skip_arg = 0;
7119
49.3M
    new_start_match = start_match + 1;
7120
49.3M
#ifdef SUPPORT_UNICODE
7121
49.3M
    if (utf)
7122
0
      ACROSSCHAR(new_start_match < end_subject, new_start_match,
7123
49.3M
        new_start_match++);
7124
49.3M
#endif
7125
49.3M
    break;
7126
7127
    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
7128
7129
0
    case MATCH_COMMIT:
7130
0
    rc = MATCH_NOMATCH;
7131
0
    goto ENDLOOP;
7132
7133
    /* Any other return is either a match, or some kind of error. */
7134
7135
2.58M
    default:
7136
2.58M
    goto ENDLOOP;
7137
51.8M
    }
7138
7139
  /* Control reaches here for the various types of "no match at this point"
7140
  result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7141
7142
49.3M
  rc = MATCH_NOMATCH;
7143
7144
  /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
7145
  newline in the subject (though it may continue over the newline). Therefore,
7146
  if we have just failed to match, starting at a newline, do not continue. */
7147
7148
49.3M
  if (firstline && IS_NEWLINE(start_match)) break;
7149
7150
  /* Advance to new matching position */
7151
7152
49.3M
  start_match = new_start_match;
7153
7154
  /* Break the loop if the pattern is anchored or if we have passed the end of
7155
  the subject. */
7156
7157
49.3M
  if (anchored || start_match > end_subject) break;
7158
7159
  /* If we have just passed a CR and we are now at a LF, and the pattern does
7160
  not contain any explicit matches for \r or \n, and the newline option is CRLF
7161
  or ANY or ANYCRLF, advance the match position by one more code unit. In
7162
  normal matching start_match will aways be greater than the first position at
7163
  this stage, but a failed *SKIP can cause a return at the same point, which is
7164
  why the first test exists. */
7165
7166
49.2M
  if (start_match > subject + start_offset &&
7167
49.2M
      start_match[-1] == CHAR_CR &&
7168
49.2M
      start_match < end_subject &&
7169
49.2M
      *start_match == CHAR_NL &&
7170
49.2M
      (re->flags & PCRE2_HASCRORLF) == 0 &&
7171
49.2M
        (mb->nltype == NLTYPE_ANY ||
7172
373k
         mb->nltype == NLTYPE_ANYCRLF ||
7173
373k
         mb->nllen == 2))
7174
0
    start_match++;
7175
7176
49.2M
  mb->mark = NULL;   /* Reset for start of next match attempt */
7177
49.2M
  }                  /* End of for(;;) "bumpalong" loop */
7178
7179
/* ==========================================================================*/
7180
7181
/* When we reach here, one of the following stopping conditions is true:
7182
7183
(1) The match succeeded, either completely, or partially;
7184
7185
(2) The pattern is anchored or the match was failed after (*COMMIT);
7186
7187
(3) We are past the end of the subject or the bumpalong limit;
7188
7189
(4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
7190
    this option requests that a match occur at or before the first newline in
7191
    the subject.
7192
7193
(5) Some kind of error occurred.
7194
7195
*/
7196
7197
3.71M
ENDLOOP:
7198
7199
/* If end_subject != true_end_subject, it means we are handling invalid UTF,
7200
and have just processed a non-terminal fragment. If this resulted in no match
7201
or a partial match we must carry on to the next fragment (a partial match is
7202
returned to the caller only at the very end of the subject). A loop is used to
7203
avoid trying to match against empty fragments; if the pattern can match an
7204
empty string it would have done so already. */
7205
7206
3.71M
#ifdef SUPPORT_UNICODE
7207
3.71M
if (utf && end_subject != true_end_subject &&
7208
3.71M
    (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
7209
0
  {
7210
0
  for (;;)
7211
0
    {
7212
    /* Advance past the first bad code unit, and then skip invalid character
7213
    starting code units in 8-bit and 16-bit modes. */
7214
7215
0
    start_match = end_subject + 1;
7216
7217
0
#if PCRE2_CODE_UNIT_WIDTH != 32
7218
0
    while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
7219
0
      start_match++;
7220
0
#endif
7221
7222
    /* If we have hit the end of the subject, there isn't another non-empty
7223
    fragment, so give up. */
7224
7225
0
    if (start_match >= true_end_subject)
7226
0
      {
7227
0
      rc = MATCH_NOMATCH;  /* In case it was partial */
7228
0
      break;
7229
0
      }
7230
7231
    /* Check the rest of the subject */
7232
7233
0
    mb->check_subject = start_match;
7234
0
    rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
7235
0
      &(match_data->startchar));
7236
7237
    /* The rest of the subject is valid UTF. */
7238
7239
0
    if (rc == 0)
7240
0
      {
7241
0
      mb->end_subject = end_subject = true_end_subject;
7242
0
      fragment_options = PCRE2_NOTBOL;
7243
0
      goto FRAGMENT_RESTART;
7244
0
      }
7245
7246
    /* A subsequent UTF error has been found; if the next fragment is
7247
    non-empty, set up to process it. Otherwise, let the loop advance. */
7248
7249
0
    else if (rc < 0)
7250
0
      {
7251
0
      mb->end_subject = end_subject = start_match + match_data->startchar;
7252
0
      if (end_subject > start_match)
7253
0
        {
7254
0
        fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
7255
0
        goto FRAGMENT_RESTART;
7256
0
        }
7257
0
      }
7258
0
    }
7259
0
  }
7260
3.71M
#endif  /* SUPPORT_UNICODE */
7261
7262
/* Release an enlarged frame vector that is on the heap. */
7263
7264
3.71M
if (mb->match_frames != mb->stack_frames)
7265
6
  mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
7266
7267
/* Fill in fields that are always returned in the match data. */
7268
7269
3.71M
match_data->code = re;
7270
3.71M
match_data->mark = mb->mark;
7271
3.71M
match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
7272
7273
/* Handle a fully successful match. Set the return code to the number of
7274
captured strings, or 0 if there were too many to fit into the ovector, and then
7275
set the remaining returned values before returning. Make a copy of the subject
7276
string if requested. */
7277
7278
3.71M
if (rc == MATCH_MATCH)
7279
2.56M
  {
7280
2.56M
  match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
7281
2.56M
    0 : (int)mb->end_offset_top/2 + 1;
7282
2.56M
  match_data->startchar = start_match - subject;
7283
2.56M
  match_data->leftchar = mb->start_used_ptr - subject;
7284
2.56M
  match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
7285
2.14M
    mb->last_used_ptr : mb->end_match_ptr) - subject;
7286
2.56M
  if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
7287
0
    {
7288
0
    length = CU2BYTES(length + was_zero_terminated);
7289
0
    match_data->subject = match_data->memctl.malloc(length,
7290
0
      match_data->memctl.memory_data);
7291
0
    if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
7292
0
    memcpy((void *)match_data->subject, subject, length);
7293
0
    match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
7294
0
    }
7295
2.56M
  else match_data->subject = subject;
7296
2.56M
  return match_data->rc;
7297
2.56M
  }
7298
7299
/* Control gets here if there has been a partial match, an error, or if the
7300
overall match attempt has failed at all permitted starting positions. Any mark
7301
data is in the nomatch_mark field. */
7302
7303
1.14M
match_data->mark = mb->nomatch_mark;
7304
7305
/* For anything other than nomatch or partial match, just return the code. */
7306
7307
1.14M
if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
7308
7309
/* Handle a partial match. If a "soft" partial match was requested, searching
7310
for a complete match will have continued, and the value of rc at this point
7311
will be MATCH_NOMATCH. For a "hard" partial match, it will already be
7312
PCRE2_ERROR_PARTIAL. */
7313
7314
1.12M
else if (match_partial != NULL)
7315
0
  {
7316
0
  match_data->subject = subject;
7317
0
  match_data->ovector[0] = match_partial - subject;
7318
0
  match_data->ovector[1] = end_subject - subject;
7319
0
  match_data->startchar = match_partial - subject;
7320
0
  match_data->leftchar = start_partial - subject;
7321
0
  match_data->rightchar = end_subject - subject;
7322
0
  match_data->rc = PCRE2_ERROR_PARTIAL;
7323
0
  }
7324
7325
/* Else this is the classic nomatch case. */
7326
7327
1.12M
else match_data->rc = PCRE2_ERROR_NOMATCH;
7328
7329
1.14M
return match_data->rc;
7330
3.71M
}
7331
7332
/* End of pcre2_match.c */