Coverage Report

Created: 2026-06-10 06:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libgit2/deps/pcre2/pcre2_match.c
Line
Count
Source
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2015-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#include "pcre2_internal.h"
43
44
45
46
/* These defines enable debugging code */
47
48
/* #define DEBUG_FRAMES_DISPLAY */
49
/* #define DEBUG_SHOW_OPS */
50
/* #define DEBUG_SHOW_RMATCH */
51
52
#ifdef DEBUG_FRAMES_DISPLAY
53
#include <stdarg.h>
54
#endif
55
56
#ifdef DEBUG_SHOW_OPS
57
static const char *OP_names[] = { OP_NAME_LIST };
58
#endif
59
60
/* These defines identify the name of the block containing "static"
61
information, and fields within it. */
62
63
0
#define NLBLOCK mb              /* Block containing newline information */
64
0
#define PSSTART start_subject   /* Field containing processed string start */
65
0
#define PSEND   end_subject     /* Field containing processed string end */
66
67
0
#define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */
68
69
/* Masks for identifying the public options that are permitted at match time. */
70
71
#define PUBLIC_MATCH_OPTIONS \
72
0
  (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
73
0
   PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
74
0
   PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT| \
75
0
   PCRE2_DISABLE_RECURSELOOP_CHECK)
76
77
#define PUBLIC_JIT_MATCH_OPTIONS \
78
   (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
79
    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
80
    PCRE2_COPY_MATCHED_SUBJECT)
81
82
/* Non-error returns from and within the match() function. Error returns are
83
externally defined PCRE2_ERROR_xxx codes, which are all negative. */
84
85
0
#define MATCH_MATCH        1
86
0
#define MATCH_NOMATCH      0
87
88
/* Special internal returns used in the match() function. Make them
89
sufficiently negative to avoid the external error codes. */
90
91
0
#define MATCH_ACCEPT       (-999)
92
0
#define MATCH_KETRPOS      (-998)
93
/* The next 5 must be kept together and in sequence so that a test that checks
94
for any one of them can use a range. */
95
0
#define MATCH_COMMIT       (-997)
96
0
#define MATCH_PRUNE        (-996)
97
0
#define MATCH_SKIP         (-995)
98
0
#define MATCH_SKIP_ARG     (-994)
99
0
#define MATCH_THEN         (-993)
100
0
#define MATCH_BACKTRACK_MAX MATCH_THEN
101
0
#define MATCH_BACKTRACK_MIN MATCH_COMMIT
102
103
/* Group frame type values. Zero means the frame is not a group frame. The
104
lower 16 bits are used for data (e.g. the capture number). Group frames are
105
used for most groups so that information about the start is easily available at
106
the end without having to scan back through intermediate frames (backtrack
107
points). */
108
109
0
#define GF_CAPTURE     0x00010000u
110
0
#define GF_NOCAPTURE   0x00020000u
111
0
#define GF_CONDASSERT  0x00030000u
112
0
#define GF_RECURSE     0x00040000u
113
114
/* Masks for the identity and data parts of the group frame type. */
115
116
0
#define GF_IDMASK(a)   ((a) & 0xffff0000u)
117
0
#define GF_DATAMASK(a) ((a) & 0x0000ffffu)
118
119
/* Repetition types */
120
121
enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
122
123
/* Min and max values for the common repeats; a maximum of UINT32_MAX =>
124
infinity. */
125
126
static const uint32_t rep_min[] = {
127
  0, 0,       /* * and *? */
128
  1, 1,       /* + and +? */
129
  0, 0,       /* ? and ?? */
130
  0, 0,       /* dummy placefillers for OP_CR[MIN]RANGE */
131
  0, 1, 0 };  /* OP_CRPOS{STAR, PLUS, QUERY} */
132
133
static const uint32_t rep_max[] = {
134
  UINT32_MAX, UINT32_MAX,      /* * and *? */
135
  UINT32_MAX, UINT32_MAX,      /* + and +? */
136
  1, 1,                        /* ? and ?? */
137
  0, 0,                        /* dummy placefillers for OP_CR[MIN]RANGE */
138
  UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
139
140
/* Repetition types - must include OP_CRPOSRANGE (not needed above) */
141
142
static const uint32_t rep_typ[] = {
143
  REPTYPE_MAX, REPTYPE_MIN,    /* * and *? */
144
  REPTYPE_MAX, REPTYPE_MIN,    /* + and +? */
145
  REPTYPE_MAX, REPTYPE_MIN,    /* ? and ?? */
146
  REPTYPE_MAX, REPTYPE_MIN,    /* OP_CRRANGE and OP_CRMINRANGE */
147
  REPTYPE_POS, REPTYPE_POS,    /* OP_CRPOSSTAR, OP_CRPOSPLUS */
148
  REPTYPE_POS, REPTYPE_POS };  /* OP_CRPOSQUERY, OP_CRPOSRANGE */
149
150
/* Numbers for RMATCH calls at backtracking points. When these lists are
151
changed, the code at RETURN_SWITCH below must be updated in sync.  */
152
153
enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
154
       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
155
       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
156
       RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39 };
157
158
#ifdef SUPPORT_WIDE_CHARS
159
enum { RM100=100, RM101, RM102, RM103 };
160
#endif
161
162
#ifdef SUPPORT_UNICODE
163
enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
164
       RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
165
       RM216,     RM217, RM218, RM219, RM220, RM221, RM222, RM223,
166
       RM224 };
167
#endif
168
169
/* Define short names for general fields in the current backtrack frame, which
170
is always pointed to by the F variable. Occasional references to fields in
171
other frames are written out explicitly. There are also some fields in the
172
current frame whose names start with "temp" that are used for short-term,
173
localised backtracking memory. These are #defined with Lxxx names at the point
174
of use and undefined afterwards. */
175
176
0
#define Fback_frame        F->back_frame
177
0
#define Fcapture_last      F->capture_last
178
0
#define Fcurrent_recurse   F->current_recurse
179
0
#define Fecode             F->ecode
180
0
#define Feptr              F->eptr
181
0
#define Fgroup_frame_type  F->group_frame_type
182
0
#define Flast_group_offset F->last_group_offset
183
0
#define Flength            F->length
184
0
#define Fmark              F->mark
185
0
#define Frdepth            F->rdepth
186
0
#define Fstart_match       F->start_match
187
0
#define Foffset_top        F->offset_top
188
0
#define Foccu              F->occu
189
0
#define Fop                F->op
190
0
#define Fovector           F->ovector
191
0
#define Freturn_id         F->return_id
192
193
194
#ifdef DEBUG_FRAMES_DISPLAY
195
/*************************************************
196
*      Display current frames and contents       *
197
*************************************************/
198
199
/* This debugging function displays the current set of frames and their
200
contents. It is not called automatically from anywhere, the intention being
201
that calls can be inserted where necessary when debugging frame-related
202
problems.
203
204
Arguments:
205
  f           the file to write to
206
  F           the current top frame
207
  P           a previous frame of interest
208
  frame_size  the frame size
209
  mb          points to the match block
210
  match_data  points to the match data block
211
  s           identification text
212
213
Returns:    nothing
214
*/
215
216
static void
217
display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
218
  match_block *mb, pcre2_match_data *match_data, const char *s, ...)
219
{
220
uint32_t i;
221
heapframe *Q;
222
va_list ap;
223
va_start(ap, s);
224
225
fprintf(f, "FRAMES ");
226
vfprintf(f, s, ap);
227
va_end(ap);
228
229
if (P != NULL) fprintf(f, " P=%lu",
230
  ((char *)P - (char *)(match_data->heapframes))/frame_size);
231
fprintf(f, "\n");
232
233
for (i = 0, Q = match_data->heapframes;
234
     Q <= F;
235
     i++, Q = (heapframe *)((char *)Q + frame_size))
236
  {
237
  fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
238
    i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
239
    Q->back_frame, Q->return_id);
240
241
  if (Q->last_group_offset == PCRE2_UNSET)
242
    fprintf(f, " lgoffset=unset\n");
243
  else
244
    fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
245
  }
246
}
247
248
#endif
249
250
251
252
/*************************************************
253
*                Process a callout               *
254
*************************************************/
255
256
/* This function is called for all callouts, whether "standalone" or at the
257
start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
258
OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
259
with fixed values.
260
261
Arguments:
262
  F          points to the current backtracking frame
263
  mb         points to the match block
264
  lengthptr  where to return the length of the callout item
265
266
Returns:     the return from the callout
267
             or 0 if no callout function exists
268
*/
269
270
static int
271
do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
272
0
{
273
0
int rc;
274
0
PCRE2_SIZE save0, save1;
275
0
PCRE2_SIZE *callout_ovector;
276
0
pcre2_callout_block *cb;
277
278
0
*lengthptr = (*Fecode == OP_CALLOUT)?
279
0
  PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
280
281
0
if (mb->callout == NULL) return 0;   /* No callout function provided */
282
283
/* The original matching code (pre 10.30) worked directly with the ovector
284
passed by the user, and this was passed to callouts. Now that the working
285
ovector is in the backtracking frame, it no longer needs to reserve space for
286
the overall match offsets (which would waste space in the frame). For backward
287
compatibility, however, we pass capture_top and offset_vector to the callout as
288
if for the extended ovector, and we ensure that the first two slots are unset
289
by preserving and restoring their current contents. Picky compilers complain if
290
references such as Fovector[-2] are use directly, so we set up a separate
291
pointer. */
292
293
0
callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
294
295
/* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
296
are set externally. The first 3 never change; the last is updated for each
297
bumpalong. */
298
299
0
cb = mb->cb;
300
0
cb->capture_top      = (uint32_t)Foffset_top/2 + 1;
301
0
cb->capture_last     = Fcapture_last;
302
0
cb->offset_vector    = callout_ovector;
303
0
cb->mark             = mb->nomatch_mark;
304
0
cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
305
0
cb->pattern_position = GET(Fecode, 1);
306
0
cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
307
308
0
if (*Fecode == OP_CALLOUT)  /* Numerical callout */
309
0
  {
310
0
  cb->callout_number = Fecode[1 + 2*LINK_SIZE];
311
0
  cb->callout_string_offset = 0;
312
0
  cb->callout_string = NULL;
313
0
  cb->callout_string_length = 0;
314
0
  }
315
0
else  /* String callout */
316
0
  {
317
0
  cb->callout_number = 0;
318
0
  cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
319
0
  cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
320
0
  cb->callout_string_length =
321
0
    *lengthptr - (1 + 4*LINK_SIZE) - 2;
322
0
  }
323
324
0
save0 = callout_ovector[0];
325
0
save1 = callout_ovector[1];
326
0
callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
327
0
rc = mb->callout(cb, mb->callout_data);
328
0
callout_ovector[0] = save0;
329
0
callout_ovector[1] = save1;
330
0
cb->callout_flags = 0;
331
0
return rc;
332
0
}
333
334
335
336
/*************************************************
337
*          Match a back-reference                *
338
*************************************************/
339
340
/* This function is called only when it is known that the offset lies within
341
the offsets that have so far been used in the match. Note that in caseless
342
UTF-8 mode, the number of subject bytes matched may be different to the number
343
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
344
seems unlikely.)
345
346
Arguments:
347
  offset      index into the offset vector
348
  caseless    TRUE if caseless
349
  caseopts    bitmask of REFI_FLAG_XYZ values
350
  F           the current backtracking frame pointer
351
  mb          points to match block
352
  lengthptr   pointer for returning the length matched
353
354
Returns:      = 0 sucessful match; number of code units matched is set
355
              < 0 no match
356
              > 0 partial match
357
*/
358
359
static int
360
match_ref(PCRE2_SIZE offset, BOOL caseless, int caseopts, heapframe *F,
361
  match_block *mb, PCRE2_SIZE *lengthptr)
362
0
{
363
0
PCRE2_SPTR p;
364
0
PCRE2_SIZE length;
365
0
PCRE2_SPTR eptr;
366
0
PCRE2_SPTR eptr_start;
367
368
#ifndef SUPPORT_UNICODE
369
(void)caseopts; /* Avoid compiler warning. */
370
#endif
371
372
/* Deal with an unset group. The default is no match, but there is an option to
373
match an empty string. */
374
375
0
if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
376
0
  {
377
0
  if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
378
0
    {
379
0
    *lengthptr = 0;
380
0
    return 0;      /* Match */
381
0
    }
382
0
  else return -1;  /* No match */
383
0
  }
384
385
/* Separate the caseless and UTF cases for speed. */
386
387
0
eptr = eptr_start = Feptr;
388
0
p = mb->start_subject + Fovector[offset];
389
0
length = Fovector[offset+1] - Fovector[offset];
390
0
PCRE2_ASSERT(eptr <= mb->end_subject);
391
392
0
if (caseless)
393
0
  {
394
0
#if defined SUPPORT_UNICODE
395
0
  BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
396
0
  BOOL caseless_restrict = (caseopts & REFI_FLAG_CASELESS_RESTRICT) != 0;
397
0
  BOOL turkish_casing = !caseless_restrict && (caseopts & REFI_FLAG_TURKISH_CASING) != 0;
398
399
0
  if (utf || (mb->poptions & PCRE2_UCP) != 0)
400
0
    {
401
0
    PCRE2_SPTR endptr = p + length;
402
403
    /* Match characters up to the end of the reference. NOTE: the number of
404
    code units matched may differ, because in UTF-8 there are some characters
405
    whose upper and lower case codes have different numbers of bytes. For
406
    example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
407
    bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
408
    sequence of two of the latter. It is important, therefore, to check the
409
    length along the reference, not along the subject (earlier code did this
410
    wrong). UCP uses Unicode properties but without UTF encoding. */
411
412
0
    while (p < endptr)
413
0
      {
414
0
      uint32_t c, d;
415
0
      const ucd_record *ur;
416
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
417
418
0
      if (utf)
419
0
        {
420
0
        GETCHARINC(c, eptr);
421
0
        GETCHARINC(d, p);
422
0
        }
423
0
      else
424
0
        {
425
0
        c = *eptr++;
426
0
        d = *p++;
427
0
        }
428
429
0
      if (turkish_casing && UCD_ANY_I(d))
430
0
        {
431
0
        c = UCD_FOLD_I_TURKISH(c);
432
0
        d = UCD_FOLD_I_TURKISH(d);
433
0
        if (c != d) return -1;  /* No match */
434
0
        }
435
0
      else if (c != d && c != (uint32_t)((int)d + (ur = GET_UCD(d))->other_case))
436
0
        {
437
0
        const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
438
439
        /* When PCRE2_EXTRA_CASELESS_RESTRICT is set, ignore any caseless sets
440
        that start with an ASCII character. */
441
0
        if (caseless_restrict && *pp < 128) return -1;  /* No match */
442
443
0
        for (;;)
444
0
          {
445
0
          if (c < *pp) return -1;  /* No match */
446
0
          if (c == *pp++) break;
447
0
          }
448
0
        }
449
0
      }
450
0
    }
451
0
  else
452
0
#endif
453
454
  /* Not in UTF or UCP mode */
455
0
    {
456
0
    for (; length > 0; length--)
457
0
      {
458
0
      uint32_t cc, cp;
459
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
460
0
      cc = UCHAR21TEST(eptr);
461
0
      cp = UCHAR21TEST(p);
462
0
      if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
463
0
        return -1;  /* No match */
464
0
      p++;
465
0
      eptr++;
466
0
      }
467
0
    }
468
0
  }
469
470
/* In the caseful case, we can just compare the code units, whether or not we
471
are in UTF and/or UCP mode. When partial matching, we have to do this unit by
472
unit. */
473
474
0
else
475
0
  {
476
0
  if (mb->partial != 0)
477
0
    {
478
0
    for (; length > 0; length--)
479
0
      {
480
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
481
0
      if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /* No match */
482
0
      }
483
0
    }
484
485
  /* Not partial matching */
486
487
0
  else
488
0
    {
489
0
    if ((PCRE2_SIZE)(mb->end_subject - eptr) < length ||
490
0
        memcmp(p, eptr, CU2BYTES(length)) != 0) return -1;  /* No match */
491
0
    eptr += length;
492
0
    }
493
0
  }
494
495
0
*lengthptr = eptr - eptr_start;
496
0
return 0;  /* Match */
497
0
}
498
499
500
501
/*************************************************
502
*     Restore offsets after a recurse            *
503
*************************************************/
504
505
/* This function restores the ovector values when
506
a recursive block reaches its end, and the triggering
507
recurse has and argument list.
508
509
Arguments:
510
  F           the current backtracking frame pointer
511
  P           the previous backtracking frame pointer
512
*/
513
514
static void
515
recurse_update_offsets(heapframe *F, heapframe *P)
516
0
{
517
0
PCRE2_SIZE *dst = F->ovector;
518
0
PCRE2_SIZE *src = P->ovector;
519
/* The first bracket has offset 2, because
520
offset 0 is reserved for the full match. */
521
0
PCRE2_SIZE offset = 2;
522
0
PCRE2_SIZE offset_top = Foffset_top + 2;
523
0
PCRE2_SIZE diff;
524
0
PCRE2_SPTR ecode = Fecode;
525
526
0
do
527
0
  {
528
0
  diff = (GET2(ecode, 1) << 1) - offset;
529
0
  ecode += 1 + IMM2_SIZE;
530
531
0
  if (offset + diff >= offset_top)
532
0
    {
533
    /* Some OP_CREF opcodes are not
534
    processed, they must be skipped. */
535
0
    while (*ecode == OP_CREF) ecode += 1 + IMM2_SIZE;
536
0
    break;
537
0
    }
538
539
0
  if (diff == 2)
540
0
    {
541
0
    dst[0] = src[0];
542
0
    dst[1] = src[1];
543
0
    }
544
0
  else if (diff >= 4)
545
0
    memcpy(dst, src, diff * sizeof(PCRE2_SIZE));
546
547
  /* Skip the unmodified entry. */
548
0
  diff += 2;
549
0
  offset += diff;
550
0
  dst += diff;
551
0
  src += diff;
552
0
  }
553
0
while (*ecode == OP_CREF);
554
555
0
diff = offset_top - offset;
556
0
if (diff == 2)
557
0
  {
558
0
  dst[0] = src[0];
559
0
  dst[1] = src[1];
560
0
  }
561
0
else if (diff >= 4)
562
0
  memcpy(dst, src, diff * sizeof(PCRE2_SIZE));
563
564
0
Fecode = ecode;
565
0
Foffset_top = (offset <= P->offset_top) ? P->offset_top : (offset - 2);
566
0
}
567
568
569
570
/******************************************************************************
571
*******************************************************************************
572
                   "Recursion" in the match() function
573
574
The original match() function was highly recursive, but this proved to be the
575
source of a number of problems over the years, mostly because of the relatively
576
small system stacks that are commonly found. As new features were added to
577
patterns, various kludges were invented to reduce the amount of stack used,
578
making the code hard to understand in places.
579
580
A version did exist that used individual frames on the heap instead of calling
581
match() recursively, but this ran substantially slower. The current version is
582
a refactoring that uses a vector of frames to remember backtracking points.
583
This runs no slower, and possibly even a bit faster than the original recursive
584
implementation.
585
586
At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50
587
frames) was allocated on the system stack. If this was not big enough, the heap
588
was used for a larger vector. However, it turns out that there are environments
589
where taking as little as 20KiB from the system stack is an embarrassment.
590
After another refactoring, the heap is used exclusively, but a pointer the
591
frames vector and its size are cached in the match_data block, so that there is
592
no new memory allocation if the same match_data block is used for multiple
593
matches (unless the frames vector has to be extended).
594
*******************************************************************************
595
******************************************************************************/
596
597
598
599
600
/*************************************************
601
*       Macros for the match() function          *
602
*************************************************/
603
604
/* These macros pack up tests that are used for partial matching several times
605
in the code. The second one is used when we already know we are past the end of
606
the subject. We set the "hit end" flag if the pointer is at the end of the
607
subject and either (a) the pointer is past the earliest inspected character
608
(i.e. something has been matched, even if not part of the actual matched
609
string), or (b) the pattern contains a lookbehind. These are the conditions for
610
which adding more characters may allow the current match to continue.
611
612
For hard partial matching, we immediately return a partial match. Otherwise,
613
carrying on means that a complete match on the current subject will be sought.
614
A partial match is returned only if no complete match can be found. */
615
616
#define CHECK_PARTIAL() \
617
0
  do { \
618
0
     if (Feptr >= mb->end_subject) \
619
0
       { \
620
0
       SCHECK_PARTIAL(); \
621
0
       } \
622
0
     } \
623
0
  while (0)
624
625
#define SCHECK_PARTIAL() \
626
0
  do { \
627
0
     if (mb->partial != 0 && \
628
0
         (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
629
0
       { \
630
0
       mb->hitend = TRUE; \
631
0
       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
632
0
       } \
633
0
     } \
634
0
  while (0)
635
636
637
/* These macros are used to implement backtracking. They simulate a recursive
638
call to the match() function by means of a local vector of frames which
639
remember the backtracking points. */
640
641
#define RMATCH(ra,rb) \
642
0
  do { \
643
0
     start_ecode = ra; \
644
0
     Freturn_id = rb; \
645
0
     goto MATCH_RECURSE; \
646
0
     L_##rb:; \
647
0
     } \
648
0
  while (0)
649
650
#define RRETURN(ra) \
651
0
  do { \
652
0
     rrc = ra; \
653
0
     goto RETURN_SWITCH; \
654
0
     } \
655
0
  while (0)
656
657
658
659
/*************************************************
660
*         Match from current position            *
661
*************************************************/
662
663
/* This function is called to run one match attempt at a single starting point
664
in the subject.
665
666
Performance note: It might be tempting to extract commonly used fields from the
667
mb structure (e.g. end_subject) into individual variables to improve
668
performance. Tests using gcc on a SPARC disproved this; in the first case, it
669
made performance worse.
670
671
Arguments:
672
   start_eptr   starting character in subject
673
   start_ecode  starting position in compiled code
674
   top_bracket  number of capturing parentheses in the pattern
675
   frame_size   size of each backtracking frame
676
   match_data   pointer to the match_data block
677
   mb           pointer to "static" variables block
678
679
Returns:        MATCH_MATCH if matched            )  these values are >= 0
680
                MATCH_NOMATCH if failed to match  )
681
                negative MATCH_xxx value for PRUNE, SKIP, etc
682
                negative PCRE2_ERROR_xxx value if aborted by an error condition
683
                (e.g. stopped by repeated call or depth limit)
684
*/
685
686
static int
687
match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket,
688
  PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
689
0
{
690
/* Frame-handling variables */
691
692
0
heapframe *F;           /* Current frame pointer */
693
0
heapframe *N = NULL;    /* Temporary frame pointers */
694
0
heapframe *P = NULL;
695
696
0
heapframe *frames_top;  /* End of frames vector */
697
0
heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
698
0
PCRE2_SIZE frame_copy_size;   /* Amount to copy when creating a new frame */
699
700
/* Local variables that do not need to be preserved over calls to RRMATCH(). */
701
702
0
PCRE2_SPTR branch_end = NULL;
703
0
PCRE2_SPTR branch_start;
704
0
PCRE2_SPTR bracode;     /* Temp pointer to start of group */
705
0
PCRE2_SIZE offset;      /* Used for group offsets */
706
0
PCRE2_SIZE length;      /* Used for various length calculations */
707
708
0
int rrc;                /* Return from functions & backtracking "recursions" */
709
0
#ifdef SUPPORT_UNICODE
710
0
int proptype;           /* Type of character property */
711
0
#endif
712
713
0
uint32_t i;             /* Used for local loops */
714
0
uint32_t fc;            /* Character values */
715
0
uint32_t number;        /* Used for group and other numbers */
716
0
uint32_t reptype = 0;   /* Type of repetition (0 to avoid compiler warning) */
717
0
uint32_t group_frame_type;  /* Specifies type for new group frames */
718
719
0
BOOL condition;         /* Used in conditional groups */
720
0
BOOL cur_is_word;       /* Used in "word" tests */
721
0
BOOL prev_is_word;      /* Used in "word" tests */
722
723
/* UTF and UCP flags */
724
725
0
#ifdef SUPPORT_UNICODE
726
0
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
727
0
BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
728
#else
729
BOOL utf = FALSE;  /* Required for convenience even when no Unicode support */
730
#endif
731
732
/* This is the length of the last part of a backtracking frame that must be
733
copied when a new frame is created. */
734
735
0
frame_copy_size = frame_size - offsetof(heapframe, eptr);
736
737
/* Set up the first frame and the end of the frames vector. */
738
739
0
F = match_data->heapframes;
740
0
frames_top = (heapframe *)((char *)F + match_data->heapframes_size);
741
742
0
Frdepth = 0;                        /* "Recursion" depth */
743
0
Fcapture_last = 0;                  /* Number of most recent capture */
744
0
Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
745
0
Fstart_match = Feptr = start_eptr;  /* Current data pointer and start match */
746
0
Fmark = NULL;                       /* Most recent mark */
747
0
Foffset_top = 0;                    /* End of captures within the frame */
748
0
Flast_group_offset = PCRE2_UNSET;   /* Saved frame of most recent group */
749
0
group_frame_type = 0;               /* Not a start of group frame */
750
0
goto NEW_FRAME;                     /* Start processing with this frame */
751
752
/* Come back here when we want to create a new frame for remembering a
753
backtracking point. */
754
755
0
MATCH_RECURSE:
756
757
/* Set up a new backtracking frame. If the vector is full, get a new one,
758
doubling the size, but constrained by the heap limit (which is in KiB). */
759
760
0
N = (heapframe *)((char *)F + frame_size);
761
0
if ((heapframe *)((char *)N + frame_size) >= frames_top)
762
0
  {
763
0
  heapframe *new;
764
0
  PCRE2_SIZE newsize;
765
0
  PCRE2_SIZE usedsize = (char *)N - (char *)(match_data->heapframes);
766
767
0
  if (match_data->heapframes_size >= PCRE2_SIZE_MAX / 2)
768
0
    {
769
0
    if (match_data->heapframes_size == PCRE2_SIZE_MAX - 1)
770
0
      return PCRE2_ERROR_NOMEMORY;
771
0
    newsize = PCRE2_SIZE_MAX - 1;
772
0
    }
773
0
  else
774
0
    newsize = match_data->heapframes_size * 2;
775
776
0
  if (newsize / 1024 >= mb->heap_limit)
777
0
    {
778
0
    PCRE2_SIZE old_size = match_data->heapframes_size / 1024;
779
0
    if (mb->heap_limit <= old_size)
780
0
      return PCRE2_ERROR_HEAPLIMIT;
781
0
    else
782
0
      {
783
0
      PCRE2_SIZE max_delta = 1024 * (mb->heap_limit - old_size);
784
0
      int over_bytes = match_data->heapframes_size % 1024;
785
0
      if (over_bytes) max_delta -= (1024 - over_bytes);
786
0
      newsize = match_data->heapframes_size + max_delta;
787
0
      }
788
0
    }
789
790
  /* With a heap limit set, the permitted additional size may not be enough for
791
  another frame, so do a final check. */
792
793
0
  if (newsize - usedsize < frame_size) return PCRE2_ERROR_HEAPLIMIT;
794
0
  new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data);
795
0
  if (new == NULL) return PCRE2_ERROR_NOMEMORY;
796
0
  memcpy(new, match_data->heapframes, usedsize);
797
798
0
  N = (heapframe *)((char *)new + usedsize);
799
0
  F = (heapframe *)((char *)N - frame_size);
800
801
0
  match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data);
802
0
  match_data->heapframes = new;
803
0
  match_data->heapframes_size = newsize;
804
0
  frames_top = (heapframe *)((char *)new + newsize);
805
0
  }
806
807
#ifdef DEBUG_SHOW_RMATCH
808
fprintf(stderr, "++ RMATCH %d frame=%d", Freturn_id, Frdepth + 1);
809
if (group_frame_type != 0)
810
  {
811
  fprintf(stderr, " type=%x ", group_frame_type);
812
  switch (GF_IDMASK(group_frame_type))
813
    {
814
    case GF_CAPTURE:
815
    fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
816
    break;
817
818
    case GF_NOCAPTURE:
819
    fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
820
    break;
821
822
    case GF_CONDASSERT:
823
    fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
824
    break;
825
826
    case GF_RECURSE:
827
    fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
828
    break;
829
830
    default:
831
    fprintf(stderr, "*** unknown ***");
832
    break;
833
    }
834
  }
835
fprintf(stderr, "\n");
836
#endif
837
838
/* Copy those fields that must be copied into the new frame, increase the
839
"recursion" depth (i.e. the new frame's index) and then make the new frame
840
current. */
841
842
0
memcpy((char *)N + offsetof(heapframe, eptr),
843
0
       (char *)F + offsetof(heapframe, eptr),
844
0
       frame_copy_size);
845
846
0
N->rdepth = Frdepth + 1;
847
0
F = N;
848
849
/* Carry on processing with a new frame. */
850
851
0
NEW_FRAME:
852
0
Fgroup_frame_type = group_frame_type;
853
0
Fecode = start_ecode;      /* Starting code pointer */
854
0
Fback_frame = frame_size;  /* Default is go back one frame */
855
856
/* If this is a special type of group frame, remember its offset for quick
857
access at the end of the group. If this is a recursion, set a new current
858
recursion value. */
859
860
0
if (group_frame_type != 0)
861
0
  {
862
0
  Flast_group_offset = (char *)F - (char *)match_data->heapframes;
863
0
  if (GF_IDMASK(group_frame_type) == GF_RECURSE)
864
0
    Fcurrent_recurse = GF_DATAMASK(group_frame_type);
865
0
  group_frame_type = 0;
866
0
  }
867
868
869
/* ========================================================================= */
870
/* This is the main processing loop. First check that we haven't recorded too
871
many backtracks (search tree is too large), or that we haven't exceeded the
872
recursive depth limit (used too many backtracking frames). If not, process the
873
opcodes. */
874
875
0
if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
876
0
if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
877
878
#ifdef DEBUG_SHOW_OPS
879
fprintf(stderr, "\n++ New frame: type=0x%x subject offset %ld\n",
880
  GF_IDMASK(Fgroup_frame_type), Feptr - mb->start_subject);
881
#endif
882
883
0
for (;;)
884
0
  {
885
#ifdef DEBUG_SHOW_OPS
886
fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
887
  OP_names[*Fecode]);
888
#endif
889
890
0
  Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
891
0
  switch(Fop)
892
0
    {
893
    /* ===================================================================== */
894
    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
895
    any currently open capturing brackets. Unlike reaching the end of a group,
896
    where we know the starting frame is at the top of the chained frames, in
897
    this case we have to search back for the relevant frame in case other types
898
    of group that use chained frames have intervened. Multiple OP_CLOSEs always
899
    come innermost first, which matches the chain order. We can ignore this in
900
    a recursion, because captures are not passed out of recursions. */
901
902
0
    case OP_CLOSE:
903
0
    if (Fcurrent_recurse == RECURSE_UNSET)
904
0
      {
905
0
      number = GET2(Fecode, 1);
906
0
      offset = Flast_group_offset;
907
0
      for(;;)
908
0
        {
909
        /* Corrupted heapframes?. Trigger an assert and return an error */
910
0
        PCRE2_ASSERT(offset != PCRE2_UNSET);
911
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
912
913
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
914
0
        P = (heapframe *)((char *)N - frame_size);
915
0
        if (N->group_frame_type == (GF_CAPTURE | number)) break;
916
0
        offset = P->last_group_offset;
917
0
        }
918
0
      offset = (number << 1) - 2;
919
0
      Fcapture_last = number;
920
0
      Fovector[offset] = P->eptr - mb->start_subject;
921
0
      Fovector[offset+1] = Feptr - mb->start_subject;
922
0
      if (offset >= Foffset_top) Foffset_top = offset + 2;
923
0
      }
924
0
    Fecode += PRIV(OP_lengths)[*Fecode];
925
0
    break;
926
927
928
    /* ===================================================================== */
929
    /* Real or forced end of the pattern, assertion, or recursion. In an
930
    assertion ACCEPT, update the last used pointer and remember the current
931
    frame so that the captures and mark can be fished out of it. */
932
933
0
    case OP_ASSERT_ACCEPT:
934
0
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
935
0
    assert_accept_frame = F;
936
0
    RRETURN(MATCH_ACCEPT);
937
938
    /* For ACCEPT within a recursion, we have to find the most recent
939
    recursion. If not in a recursion, fall through to code that is common with
940
    OP_END. */
941
942
0
    case OP_ACCEPT:
943
0
    if (Fcurrent_recurse != RECURSE_UNSET)
944
0
      {
945
#ifdef DEBUG_SHOW_OPS
946
      fprintf(stderr, "++ Accept within recursion\n");
947
#endif
948
0
      offset = Flast_group_offset;
949
0
      for(;;)
950
0
        {
951
        /* Corrupted heapframes?. Trigger an assert and return an error */
952
0
        PCRE2_ASSERT(offset != PCRE2_UNSET);
953
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
954
955
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
956
0
        P = (heapframe *)((char *)N - frame_size);
957
0
        if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
958
0
        offset = P->last_group_offset;
959
0
        }
960
961
      /* N is now the frame of the recursion; the previous frame is at the
962
      OP_RECURSE position. Go back there, copying the current subject position
963
      and mark, and the start_match position (\K might have changed it), and
964
      then move on past the OP_RECURSE. */
965
966
0
      P->eptr = Feptr;
967
0
      P->mark = Fmark;
968
0
      P->start_match = Fstart_match;
969
0
      F = P;
970
0
      Fecode += 1 + LINK_SIZE;
971
0
      continue;
972
0
      }
973
0
    PCRE2_FALLTHROUGH /* Fall through */
974
0
975
0
    /* OP_END itself can never be reached within a recursion because that is
976
0
    picked up when the OP_KET that always precedes OP_END is reached. */
977
0
978
0
    case OP_END:
979
980
    /* Fail for an empty string match if either PCRE2_NOTEMPTY is set, or if
981
    PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the
982
    subject. In both cases, backtracking will then try other alternatives, if
983
    any. */
984
985
0
    if (Feptr == Fstart_match &&
986
0
         ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
987
0
           ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
988
0
             Fstart_match == mb->start_subject + mb->start_offset)))
989
0
      {
990
#ifdef DEBUG_SHOW_OPS
991
      fprintf(stderr, "++ Backtrack because empty string\n");
992
#endif
993
0
      RRETURN(MATCH_NOMATCH);
994
0
      }
995
996
    /* Fail if PCRE2_ENDANCHORED is set and the end of the match is not
997
    the end of the subject. After (*ACCEPT) we fail the entire match (at this
998
    position) but backtrack if we've reached the end of the pattern. This
999
    applies whether or not we are in a recursion. */
1000
1001
0
    if (Feptr < mb->end_subject &&
1002
0
        ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
1003
0
      {
1004
0
      if (Fop == OP_END)
1005
0
        {
1006
#ifdef DEBUG_SHOW_OPS
1007
        fprintf(stderr, "++ Backtrack because not at end (endanchored set)\n");
1008
#endif
1009
0
        RRETURN(MATCH_NOMATCH);
1010
0
        }
1011
1012
#ifdef DEBUG_SHOW_OPS
1013
      fprintf(stderr, "++ Failed ACCEPT not at end (endanchored set)\n");
1014
#endif
1015
0
      return MATCH_NOMATCH;   /* (*ACCEPT) */
1016
0
      }
1017
1018
    /* Fail if we detect that the start position was moved to be either after
1019
    the end position (\K in lookahead) or before the start offset (\K in
1020
    lookbehind). If this occurs, the pattern must have used \K in a somewhat
1021
    sneaky way (e.g. by pattern recursion), because if the \K is actually
1022
    syntactically inside the lookaround, it's blocked at compile-time. */
1023
1024
0
    if (Fstart_match < mb->start_subject + mb->start_offset ||
1025
0
        Fstart_match > Feptr)
1026
0
      {
1027
      /* The \K expression is fairly rare. We assert it was used so that we
1028
      catch any unexpected invalid data in start_match. */
1029
0
      PCRE2_ASSERT(mb->hasbsk);
1030
1031
0
      if (!mb->allowlookaroundbsk)
1032
0
        return PCRE2_ERROR_BAD_BACKSLASH_K;
1033
0
      }
1034
1035
    /* We have a successful match of the whole pattern. Record the result and
1036
    then do a direct return from the function. If there is space in the offset
1037
    vector, set any pairs that follow the highest-numbered captured string but
1038
    are less than the number of capturing groups in the pattern to PCRE2_UNSET.
1039
    It is documented that this happens. "Gaps" are set to PCRE2_UNSET
1040
    dynamically. It is only those at the end that need setting here. */
1041
1042
0
    mb->end_match_ptr = Feptr;           /* Record where we ended */
1043
0
    mb->end_offset_top = Foffset_top;    /* and how many extracts were taken */
1044
0
    mb->mark = Fmark;                    /* and the last success mark */
1045
0
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
1046
1047
0
    match_data->ovector[0] = Fstart_match - mb->start_subject;
1048
0
    match_data->ovector[1] = Feptr - mb->start_subject;
1049
1050
    /* Set i to the smaller of the sizes of the external and frame ovectors. */
1051
1052
0
    i = 2 * ((top_bracket + 1 > match_data->oveccount)?
1053
0
      match_data->oveccount : top_bracket + 1);
1054
0
    memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
1055
0
    while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET;
1056
0
    return MATCH_MATCH;  /* Note: NOT RRETURN */
1057
1058
1059
    /*===================================================================== */
1060
    /* Match any single character type except newline; have to take care with
1061
    CRLF newlines and partial matching. */
1062
1063
0
    case OP_ANY:
1064
0
    if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
1065
0
    if (mb->partial != 0 &&
1066
0
        Feptr == mb->end_subject - 1 &&
1067
0
        NLBLOCK->nltype == NLTYPE_FIXED &&
1068
0
        NLBLOCK->nllen == 2 &&
1069
0
        UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
1070
0
      {
1071
0
      mb->hitend = TRUE;
1072
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
1073
0
      }
1074
0
    PCRE2_FALLTHROUGH /* Fall through */
1075
0
1076
0
    /* Match any single character whatsoever. */
1077
0
1078
0
    case OP_ALLANY:
1079
0
    if (Feptr >= mb->end_subject)  /* DO NOT merge the Feptr++ here; it must */
1080
0
      {                            /* not be updated before SCHECK_PARTIAL. */
1081
0
      SCHECK_PARTIAL();
1082
0
      RRETURN(MATCH_NOMATCH);
1083
0
      }
1084
0
    Feptr++;
1085
0
#ifdef SUPPORT_UNICODE
1086
0
    if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
1087
0
#endif
1088
0
    Fecode++;
1089
0
    break;
1090
1091
1092
    /* ===================================================================== */
1093
    /* Match a single code unit, even in UTF mode. This opcode really does
1094
    match any code unit, even newline. (It really should be called ANYCODEUNIT,
1095
    of course - the byte name is from pre-16 bit days.) */
1096
1097
0
    case OP_ANYBYTE:
1098
0
    if (Feptr >= mb->end_subject)   /* DO NOT merge the Feptr++ here; it must */
1099
0
      {                             /* not be updated before SCHECK_PARTIAL. */
1100
0
      SCHECK_PARTIAL();
1101
0
      RRETURN(MATCH_NOMATCH);
1102
0
      }
1103
0
    Feptr++;
1104
0
    Fecode++;
1105
0
    break;
1106
1107
1108
    /* ===================================================================== */
1109
    /* Match a single character, casefully */
1110
1111
0
    case OP_CHAR:
1112
0
#ifdef SUPPORT_UNICODE
1113
0
    if (utf)
1114
0
      {
1115
0
      Flength = 1;
1116
0
      Fecode++;
1117
0
      GETCHARLEN(fc, Fecode, Flength);
1118
0
      if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
1119
0
        {
1120
0
        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
1121
0
        RRETURN(MATCH_NOMATCH);
1122
0
        }
1123
0
      for (; Flength > 0; Flength--)
1124
0
        {
1125
0
        if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
1126
0
        }
1127
0
      }
1128
0
    else
1129
0
#endif
1130
1131
    /* Not UTF mode */
1132
0
      {
1133
0
      if (mb->end_subject - Feptr < 1)
1134
0
        {
1135
0
        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
1136
0
        RRETURN(MATCH_NOMATCH);
1137
0
        }
1138
0
      if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
1139
0
      Fecode += 2;
1140
0
      }
1141
0
    break;
1142
1143
1144
    /* ===================================================================== */
1145
    /* Match a single character, caselessly. If we are at the end of the
1146
    subject, give up immediately. We get here only when the pattern character
1147
    has at most one other case. Characters with more than two cases are coded
1148
    as OP_PROP with the pseudo-property PT_CLIST. */
1149
1150
0
    case OP_CHARI:
1151
0
    if (Feptr >= mb->end_subject)
1152
0
      {
1153
0
      SCHECK_PARTIAL();
1154
0
      RRETURN(MATCH_NOMATCH);
1155
0
      }
1156
1157
0
#ifdef SUPPORT_UNICODE
1158
0
    if (utf)
1159
0
      {
1160
0
      Flength = 1;
1161
0
      Fecode++;
1162
0
      GETCHARLEN(fc, Fecode, Flength);
1163
1164
      /* If the pattern character's value is < 128, we know that its other case
1165
      (if any) is also < 128 (and therefore only one code unit long in all
1166
      code-unit widths), so we can use the fast lookup table. We checked above
1167
      that there is at least one character left in the subject. */
1168
1169
0
      if (fc < 128)
1170
0
        {
1171
0
        uint32_t cc = UCHAR21(Feptr);
1172
0
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1173
0
        Fecode++;
1174
0
        Feptr++;
1175
0
        }
1176
1177
      /* Otherwise we must pick up the subject character and use Unicode
1178
      property support to test its other case. Note that we cannot use the
1179
      value of "Flength" to check for sufficient bytes left, because the other
1180
      case of the character may have more or fewer code units. */
1181
1182
0
      else
1183
0
        {
1184
0
        uint32_t dc;
1185
0
        GETCHARINC(dc, Feptr);
1186
0
        Fecode += Flength;
1187
0
        if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1188
0
        }
1189
0
      }
1190
1191
    /* If UCP is set without UTF we must do the same as above, but with one
1192
    character per code unit. */
1193
1194
0
    else if (ucp)
1195
0
      {
1196
0
      uint32_t cc = UCHAR21(Feptr);
1197
0
      fc = Fecode[1];
1198
0
      if (fc < 128)
1199
0
        {
1200
0
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1201
0
        }
1202
0
      else
1203
0
        {
1204
0
        if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1205
0
        }
1206
0
      Feptr++;
1207
0
      Fecode += 2;
1208
0
      }
1209
1210
0
    else
1211
0
#endif   /* SUPPORT_UNICODE */
1212
1213
    /* Not UTF or UCP mode; use the table for characters < 256. */
1214
0
      {
1215
0
      if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
1216
0
          != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
1217
0
      Feptr++;
1218
0
      Fecode += 2;
1219
0
      }
1220
0
    break;
1221
1222
1223
    /* ===================================================================== */
1224
    /* Match not a single character. */
1225
1226
0
    case OP_NOT:
1227
0
    case OP_NOTI:
1228
0
    if (Feptr >= mb->end_subject)
1229
0
      {
1230
0
      SCHECK_PARTIAL();
1231
0
      RRETURN(MATCH_NOMATCH);
1232
0
      }
1233
1234
0
#ifdef SUPPORT_UNICODE
1235
0
    if (utf)
1236
0
      {
1237
0
      uint32_t ch;
1238
0
      Fecode++;
1239
0
      GETCHARINC(ch, Fecode);
1240
0
      GETCHARINC(fc, Feptr);
1241
0
      if (ch == fc)
1242
0
        {
1243
0
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1244
0
        }
1245
0
      else if (Fop == OP_NOTI)   /* If caseless */
1246
0
        {
1247
0
        if (ch > 127)
1248
0
          ch = UCD_OTHERCASE(ch);
1249
0
        else
1250
0
          ch = (mb->fcc)[ch];
1251
0
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1252
0
        }
1253
0
      }
1254
1255
    /* UCP without UTF is as above, but with one character per code unit. */
1256
1257
0
    else if (ucp)
1258
0
      {
1259
0
      uint32_t ch;
1260
0
      fc = UCHAR21INC(Feptr);
1261
0
      ch = Fecode[1];
1262
0
      Fecode += 2;
1263
1264
0
      if (ch == fc)
1265
0
        {
1266
0
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1267
0
        }
1268
0
      else if (Fop == OP_NOTI)   /* If caseless */
1269
0
        {
1270
0
        if (ch > 127)
1271
0
          ch = UCD_OTHERCASE(ch);
1272
0
        else
1273
0
          ch = (mb->fcc)[ch];
1274
0
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1275
0
        }
1276
0
      }
1277
1278
0
    else
1279
0
#endif  /* SUPPORT_UNICODE */
1280
1281
    /* Neither UTF nor UCP is set */
1282
1283
0
      {
1284
0
      uint32_t ch = Fecode[1];
1285
0
      fc = UCHAR21INC(Feptr);
1286
0
      if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
1287
0
        RRETURN(MATCH_NOMATCH);
1288
0
      Fecode += 2;
1289
0
      }
1290
0
    break;
1291
1292
1293
    /* ===================================================================== */
1294
    /* Match a single character repeatedly. */
1295
1296
0
#define Loclength    F->temp_size
1297
0
#define Lstart_eptr  F->temp_sptr[0]
1298
0
#define Lcharptr     F->temp_sptr[1]
1299
0
#define Lmin         F->temp_32[0]
1300
0
#define Lmax         F->temp_32[1]
1301
0
#define Lc           F->temp_32[2]
1302
0
#define Loc          F->temp_32[3]
1303
1304
0
    case OP_EXACT:
1305
0
    case OP_EXACTI:
1306
0
    Lmin = Lmax = GET2(Fecode, 1);
1307
0
    Fecode += 1 + IMM2_SIZE;
1308
0
    goto REPEATCHAR;
1309
1310
0
    case OP_POSUPTO:
1311
0
    case OP_POSUPTOI:
1312
0
    reptype = REPTYPE_POS;
1313
0
    Lmin = 0;
1314
0
    Lmax = GET2(Fecode, 1);
1315
0
    Fecode += 1 + IMM2_SIZE;
1316
0
    goto REPEATCHAR;
1317
1318
0
    case OP_UPTO:
1319
0
    case OP_UPTOI:
1320
0
    reptype = REPTYPE_MAX;
1321
0
    Lmin = 0;
1322
0
    Lmax = GET2(Fecode, 1);
1323
0
    Fecode += 1 + IMM2_SIZE;
1324
0
    goto REPEATCHAR;
1325
1326
0
    case OP_MINUPTO:
1327
0
    case OP_MINUPTOI:
1328
0
    reptype = REPTYPE_MIN;
1329
0
    Lmin = 0;
1330
0
    Lmax = GET2(Fecode, 1);
1331
0
    Fecode += 1 + IMM2_SIZE;
1332
0
    goto REPEATCHAR;
1333
1334
0
    case OP_POSSTAR:
1335
0
    case OP_POSSTARI:
1336
0
    reptype = REPTYPE_POS;
1337
0
    Lmin = 0;
1338
0
    Lmax = UINT32_MAX;
1339
0
    Fecode++;
1340
0
    goto REPEATCHAR;
1341
1342
0
    case OP_POSPLUS:
1343
0
    case OP_POSPLUSI:
1344
0
    reptype = REPTYPE_POS;
1345
0
    Lmin = 1;
1346
0
    Lmax = UINT32_MAX;
1347
0
    Fecode++;
1348
0
    goto REPEATCHAR;
1349
1350
0
    case OP_POSQUERY:
1351
0
    case OP_POSQUERYI:
1352
0
    reptype = REPTYPE_POS;
1353
0
    Lmin = 0;
1354
0
    Lmax = 1;
1355
0
    Fecode++;
1356
0
    goto REPEATCHAR;
1357
1358
0
    case OP_STAR:
1359
0
    case OP_STARI:
1360
0
    case OP_MINSTAR:
1361
0
    case OP_MINSTARI:
1362
0
    case OP_PLUS:
1363
0
    case OP_PLUSI:
1364
0
    case OP_MINPLUS:
1365
0
    case OP_MINPLUSI:
1366
0
    case OP_QUERY:
1367
0
    case OP_QUERYI:
1368
0
    case OP_MINQUERY:
1369
0
    case OP_MINQUERYI:
1370
0
    fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
1371
0
    Lmin = rep_min[fc];
1372
0
    Lmax = rep_max[fc];
1373
0
    reptype = rep_typ[fc];
1374
1375
    /* Common code for all repeated single-character matches. We first check
1376
    for the minimum number of characters. If the minimum equals the maximum, we
1377
    are done. Otherwise, if minimizing, check the rest of the pattern for a
1378
    match; if there isn't one, advance up to the maximum, one character at a
1379
    time.
1380
1381
    If maximizing, advance up to the maximum number of matching characters,
1382
    until Feptr is past the end of the maximum run. If possessive, we are
1383
    then done (no backing up). Otherwise, match at this position; anything
1384
    other than no match is immediately returned. For nomatch, back up one
1385
    character, unless we are matching \R and the last thing matched was
1386
    \r\n, in which case, back up two code units until we reach the first
1387
    optional character position.
1388
1389
    The various UTF/non-UTF and caseful/caseless cases are handled separately,
1390
    for speed. */
1391
1392
0
    REPEATCHAR:
1393
0
#ifdef SUPPORT_UNICODE
1394
0
    if (utf)
1395
0
      {
1396
0
      Flength = 1;
1397
0
      Lcharptr = Fecode;
1398
0
      GETCHARLEN(fc, Fecode, Flength);
1399
0
      Fecode += Flength;
1400
1401
      /* Handle multi-code-unit character matching, caseful and caseless. */
1402
1403
0
      if (Flength > 1)
1404
0
        {
1405
0
        uint32_t othercase;
1406
1407
0
        if (Fop >= OP_STARI &&     /* Caseless */
1408
0
            (othercase = UCD_OTHERCASE(fc)) != fc)
1409
0
          Loclength = PRIV(ord2utf)(othercase, Foccu);
1410
0
        else Loclength = 0;
1411
1412
0
        for (i = 1; i <= Lmin; i++)
1413
0
          {
1414
0
          if (Feptr <= mb->end_subject - Flength &&
1415
0
            memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1416
0
          else if (Loclength > 0 &&
1417
0
                   Feptr <= mb->end_subject - Loclength &&
1418
0
                   memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1419
0
            Feptr += Loclength;
1420
0
          else
1421
0
            {
1422
0
            CHECK_PARTIAL();
1423
0
            RRETURN(MATCH_NOMATCH);
1424
0
            }
1425
0
          }
1426
1427
0
        if (Lmin == Lmax) continue;
1428
1429
0
        if (reptype == REPTYPE_MIN)
1430
0
          {
1431
0
          for (;;)
1432
0
            {
1433
0
            RMATCH(Fecode, RM202);
1434
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1435
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1436
0
            if (Feptr <= mb->end_subject - Flength &&
1437
0
              memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1438
0
            else if (Loclength > 0 &&
1439
0
                     Feptr <= mb->end_subject - Loclength &&
1440
0
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1441
0
              Feptr += Loclength;
1442
0
            else
1443
0
              {
1444
0
              CHECK_PARTIAL();
1445
0
              RRETURN(MATCH_NOMATCH);
1446
0
              }
1447
0
            }
1448
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
1449
0
          }
1450
1451
0
        else  /* Maximize */
1452
0
          {
1453
0
          Lstart_eptr = Feptr;
1454
0
          for (i = Lmin; i < Lmax; i++)
1455
0
            {
1456
0
            if (Feptr <= mb->end_subject - Flength &&
1457
0
                memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
1458
0
              Feptr += Flength;
1459
0
            else if (Loclength > 0 &&
1460
0
                     Feptr <= mb->end_subject - Loclength &&
1461
0
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1462
0
              Feptr += Loclength;
1463
0
            else
1464
0
              {
1465
0
              CHECK_PARTIAL();
1466
0
              break;
1467
0
              }
1468
0
            }
1469
1470
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1471
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1472
          go too far. */
1473
1474
0
          if (reptype != REPTYPE_POS) for(;;)
1475
0
            {
1476
0
            if (Feptr <= Lstart_eptr) break;
1477
0
            RMATCH(Fecode, RM203);
1478
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1479
0
            Feptr--;
1480
0
            BACKCHAR(Feptr);
1481
0
            }
1482
0
          }
1483
0
        break;   /* End of repeated wide character handling */
1484
0
        }
1485
1486
      /* Length of UTF character is 1. Put it into the preserved variable and
1487
      fall through to the non-UTF code. */
1488
1489
0
      Lc = fc;
1490
0
      }
1491
0
    else
1492
0
#endif  /* SUPPORT_UNICODE */
1493
1494
    /* When not in UTF mode, load a single-code-unit character. Then proceed as
1495
    above, using Unicode casing if either UTF or UCP is set. */
1496
1497
0
    Lc = *Fecode++;
1498
1499
    /* Caseless comparison */
1500
1501
0
    if (Fop >= OP_STARI)
1502
0
      {
1503
0
#if PCRE2_CODE_UNIT_WIDTH == 8
1504
0
#ifdef SUPPORT_UNICODE
1505
0
      if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1506
0
      else
1507
0
#endif  /* SUPPORT_UNICODE */
1508
      /* Lc will be < 128 in UTF-8 mode. */
1509
0
      Loc = mb->fcc[Lc];
1510
#else /* 16-bit & 32-bit */
1511
#ifdef SUPPORT_UNICODE
1512
      if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1513
      else
1514
#endif  /* SUPPORT_UNICODE */
1515
      Loc = TABLE_GET(Lc, mb->fcc, Lc);
1516
#endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
1517
1518
0
      for (i = 1; i <= Lmin; i++)
1519
0
        {
1520
0
        uint32_t cc;                 /* Faster than PCRE2_UCHAR */
1521
0
        if (Feptr >= mb->end_subject)
1522
0
          {
1523
0
          SCHECK_PARTIAL();
1524
0
          RRETURN(MATCH_NOMATCH);
1525
0
          }
1526
0
        cc = UCHAR21TEST(Feptr);
1527
0
        if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1528
0
        Feptr++;
1529
0
        }
1530
0
      if (Lmin == Lmax) continue;
1531
1532
0
      if (reptype == REPTYPE_MIN)
1533
0
        {
1534
0
        for (;;)
1535
0
          {
1536
0
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1537
0
          RMATCH(Fecode, RM25);
1538
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1539
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1540
0
          if (Feptr >= mb->end_subject)
1541
0
            {
1542
0
            SCHECK_PARTIAL();
1543
0
            RRETURN(MATCH_NOMATCH);
1544
0
            }
1545
0
          cc = UCHAR21TEST(Feptr);
1546
0
          if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1547
0
          Feptr++;
1548
0
          }
1549
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1550
0
        }
1551
1552
0
      else  /* Maximize */
1553
0
        {
1554
0
        Lstart_eptr = Feptr;
1555
0
        for (i = Lmin; i < Lmax; i++)
1556
0
          {
1557
0
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1558
0
          if (Feptr >= mb->end_subject)
1559
0
            {
1560
0
            SCHECK_PARTIAL();
1561
0
            break;
1562
0
            }
1563
0
          cc = UCHAR21TEST(Feptr);
1564
0
          if (Lc != cc && Loc != cc) break;
1565
0
          Feptr++;
1566
0
          }
1567
0
        if (reptype != REPTYPE_POS) for (;;)
1568
0
          {
1569
0
          if (Feptr == Lstart_eptr) break;
1570
0
          RMATCH(Fecode, RM26);
1571
0
          Feptr--;
1572
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1573
0
          }
1574
0
        }
1575
0
      }
1576
1577
    /* Caseful comparisons (includes all multi-byte characters) */
1578
1579
0
    else
1580
0
      {
1581
0
      for (i = 1; i <= Lmin; i++)
1582
0
        {
1583
0
        if (Feptr >= mb->end_subject)
1584
0
          {
1585
0
          SCHECK_PARTIAL();
1586
0
          RRETURN(MATCH_NOMATCH);
1587
0
          }
1588
0
        if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1589
0
        }
1590
1591
0
      if (Lmin == Lmax) continue;
1592
1593
0
      if (reptype == REPTYPE_MIN)
1594
0
        {
1595
0
        for (;;)
1596
0
          {
1597
0
          RMATCH(Fecode, RM27);
1598
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1599
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1600
0
          if (Feptr >= mb->end_subject)
1601
0
            {
1602
0
            SCHECK_PARTIAL();
1603
0
            RRETURN(MATCH_NOMATCH);
1604
0
            }
1605
0
          if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1606
0
          }
1607
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1608
0
        }
1609
0
      else  /* Maximize */
1610
0
        {
1611
0
        Lstart_eptr = Feptr;
1612
0
        for (i = Lmin; i < Lmax; i++)
1613
0
          {
1614
0
          if (Feptr >= mb->end_subject)
1615
0
            {
1616
0
            SCHECK_PARTIAL();
1617
0
            break;
1618
0
            }
1619
1620
0
          if (Lc != UCHAR21TEST(Feptr)) break;
1621
0
          Feptr++;
1622
0
          }
1623
1624
0
        if (reptype != REPTYPE_POS) for (;;)
1625
0
          {
1626
0
          if (Feptr <= Lstart_eptr) break;
1627
0
          RMATCH(Fecode, RM28);
1628
0
          Feptr--;
1629
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1630
0
          }
1631
0
        }
1632
0
      }
1633
0
    break;
1634
1635
0
#undef Loclength
1636
0
#undef Lstart_eptr
1637
0
#undef Lcharptr
1638
0
#undef Lmin
1639
0
#undef Lmax
1640
0
#undef Lc
1641
0
#undef Loc
1642
1643
1644
    /* ===================================================================== */
1645
    /* Match a negated single one-byte character repeatedly. This is almost a
1646
    repeat of the code for a repeated single character, but I haven't found a
1647
    nice way of commoning these up that doesn't require a test of the
1648
    positive/negative option for each character match. Maybe that wouldn't add
1649
    very much to the time taken, but character matching *is* what this is all
1650
    about... */
1651
1652
0
#define Lstart_eptr  F->temp_sptr[0]
1653
0
#define Lmin         F->temp_32[0]
1654
0
#define Lmax         F->temp_32[1]
1655
0
#define Lc           F->temp_32[2]
1656
0
#define Loc          F->temp_32[3]
1657
1658
0
    case OP_NOTEXACT:
1659
0
    case OP_NOTEXACTI:
1660
0
    Lmin = Lmax = GET2(Fecode, 1);
1661
0
    Fecode += 1 + IMM2_SIZE;
1662
0
    goto REPEATNOTCHAR;
1663
1664
0
    case OP_NOTUPTO:
1665
0
    case OP_NOTUPTOI:
1666
0
    Lmin = 0;
1667
0
    Lmax = GET2(Fecode, 1);
1668
0
    reptype = REPTYPE_MAX;
1669
0
    Fecode += 1 + IMM2_SIZE;
1670
0
    goto REPEATNOTCHAR;
1671
1672
0
    case OP_NOTMINUPTO:
1673
0
    case OP_NOTMINUPTOI:
1674
0
    Lmin = 0;
1675
0
    Lmax = GET2(Fecode, 1);
1676
0
    reptype = REPTYPE_MIN;
1677
0
    Fecode += 1 + IMM2_SIZE;
1678
0
    goto REPEATNOTCHAR;
1679
1680
0
    case OP_NOTPOSSTAR:
1681
0
    case OP_NOTPOSSTARI:
1682
0
    reptype = REPTYPE_POS;
1683
0
    Lmin = 0;
1684
0
    Lmax = UINT32_MAX;
1685
0
    Fecode++;
1686
0
    goto REPEATNOTCHAR;
1687
1688
0
    case OP_NOTPOSPLUS:
1689
0
    case OP_NOTPOSPLUSI:
1690
0
    reptype = REPTYPE_POS;
1691
0
    Lmin = 1;
1692
0
    Lmax = UINT32_MAX;
1693
0
    Fecode++;
1694
0
    goto REPEATNOTCHAR;
1695
1696
0
    case OP_NOTPOSQUERY:
1697
0
    case OP_NOTPOSQUERYI:
1698
0
    reptype = REPTYPE_POS;
1699
0
    Lmin = 0;
1700
0
    Lmax = 1;
1701
0
    Fecode++;
1702
0
    goto REPEATNOTCHAR;
1703
1704
0
    case OP_NOTPOSUPTO:
1705
0
    case OP_NOTPOSUPTOI:
1706
0
    reptype = REPTYPE_POS;
1707
0
    Lmin = 0;
1708
0
    Lmax = GET2(Fecode, 1);
1709
0
    Fecode += 1 + IMM2_SIZE;
1710
0
    goto REPEATNOTCHAR;
1711
1712
0
    case OP_NOTSTAR:
1713
0
    case OP_NOTSTARI:
1714
0
    case OP_NOTMINSTAR:
1715
0
    case OP_NOTMINSTARI:
1716
0
    case OP_NOTPLUS:
1717
0
    case OP_NOTPLUSI:
1718
0
    case OP_NOTMINPLUS:
1719
0
    case OP_NOTMINPLUSI:
1720
0
    case OP_NOTQUERY:
1721
0
    case OP_NOTQUERYI:
1722
0
    case OP_NOTMINQUERY:
1723
0
    case OP_NOTMINQUERYI:
1724
0
    fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1725
0
    Lmin = rep_min[fc];
1726
0
    Lmax = rep_max[fc];
1727
0
    reptype = rep_typ[fc];
1728
1729
    /* Common code for all repeated single-character non-matches. */
1730
1731
0
    REPEATNOTCHAR:
1732
0
    GETCHARINCTEST(Lc, Fecode);
1733
1734
    /* The code is duplicated for the caseless and caseful cases, for speed,
1735
    since matching characters is likely to be quite common. First, ensure the
1736
    minimum number of matches are present. If Lmin = Lmax, we are done.
1737
    Otherwise, if minimizing, keep trying the rest of the expression and
1738
    advancing one matching character if failing, up to the maximum.
1739
    Alternatively, if maximizing, find the maximum number of characters and
1740
    work backwards. */
1741
1742
0
    if (Fop >= OP_NOTSTARI)     /* Caseless */
1743
0
      {
1744
0
#ifdef SUPPORT_UNICODE
1745
0
      if ((utf || ucp) && Lc > 127)
1746
0
        Loc = UCD_OTHERCASE(Lc);
1747
0
      else
1748
0
#endif /* SUPPORT_UNICODE */
1749
1750
0
      Loc = TABLE_GET(Lc, mb->fcc, Lc);  /* Other case from table */
1751
1752
0
#ifdef SUPPORT_UNICODE
1753
0
      if (utf)
1754
0
        {
1755
0
        uint32_t d;
1756
0
        for (i = 1; i <= Lmin; i++)
1757
0
          {
1758
0
          if (Feptr >= mb->end_subject)
1759
0
            {
1760
0
            SCHECK_PARTIAL();
1761
0
            RRETURN(MATCH_NOMATCH);
1762
0
            }
1763
0
          GETCHARINC(d, Feptr);
1764
0
          if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1765
0
          }
1766
0
        }
1767
0
      else
1768
0
#endif  /* SUPPORT_UNICODE */
1769
1770
      /* Not UTF mode */
1771
0
        {
1772
0
        for (i = 1; i <= Lmin; i++)
1773
0
          {
1774
0
          if (Feptr >= mb->end_subject)
1775
0
            {
1776
0
            SCHECK_PARTIAL();
1777
0
            RRETURN(MATCH_NOMATCH);
1778
0
            }
1779
0
          if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1780
0
          Feptr++;
1781
0
          }
1782
0
        }
1783
1784
0
      if (Lmin == Lmax) continue;  /* Finished for exact count */
1785
1786
0
      if (reptype == REPTYPE_MIN)
1787
0
        {
1788
0
#ifdef SUPPORT_UNICODE
1789
0
        if (utf)
1790
0
          {
1791
0
          uint32_t d;
1792
0
          for (;;)
1793
0
            {
1794
0
            RMATCH(Fecode, RM204);
1795
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1796
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1797
0
            if (Feptr >= mb->end_subject)
1798
0
              {
1799
0
              SCHECK_PARTIAL();
1800
0
              RRETURN(MATCH_NOMATCH);
1801
0
              }
1802
0
            GETCHARINC(d, Feptr);
1803
0
            if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1804
0
            }
1805
0
          }
1806
0
        else
1807
0
#endif  /*SUPPORT_UNICODE */
1808
1809
        /* Not UTF mode */
1810
0
          {
1811
0
          for (;;)
1812
0
            {
1813
0
            RMATCH(Fecode, RM29);
1814
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1815
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1816
0
            if (Feptr >= mb->end_subject)
1817
0
              {
1818
0
              SCHECK_PARTIAL();
1819
0
              RRETURN(MATCH_NOMATCH);
1820
0
              }
1821
0
            if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1822
0
            Feptr++;
1823
0
            }
1824
0
          }
1825
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1826
0
        }
1827
1828
      /* Maximize case */
1829
1830
0
      else
1831
0
        {
1832
0
        Lstart_eptr = Feptr;
1833
1834
0
#ifdef SUPPORT_UNICODE
1835
0
        if (utf)
1836
0
          {
1837
0
          uint32_t d;
1838
0
          for (i = Lmin; i < Lmax; i++)
1839
0
            {
1840
0
            int len = 1;
1841
0
            if (Feptr >= mb->end_subject)
1842
0
              {
1843
0
              SCHECK_PARTIAL();
1844
0
              break;
1845
0
              }
1846
0
            GETCHARLEN(d, Feptr, len);
1847
0
            if (Lc == d || Loc == d) break;
1848
0
            Feptr += len;
1849
0
            }
1850
1851
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1852
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1853
          go too far. */
1854
1855
0
          if (reptype != REPTYPE_POS) for(;;)
1856
0
            {
1857
0
            if (Feptr <= Lstart_eptr) break;
1858
0
            RMATCH(Fecode, RM205);
1859
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1860
0
            Feptr--;
1861
0
            BACKCHAR(Feptr);
1862
0
            }
1863
0
          }
1864
0
        else
1865
0
#endif  /* SUPPORT_UNICODE */
1866
1867
        /* Not UTF mode */
1868
0
          {
1869
0
          for (i = Lmin; i < Lmax; i++)
1870
0
            {
1871
0
            if (Feptr >= mb->end_subject)
1872
0
              {
1873
0
              SCHECK_PARTIAL();
1874
0
              break;
1875
0
              }
1876
0
            if (Lc == *Feptr || Loc == *Feptr) break;
1877
0
            Feptr++;
1878
0
            }
1879
0
          if (reptype != REPTYPE_POS) for (;;)
1880
0
            {
1881
0
            if (Feptr == Lstart_eptr) break;
1882
0
            RMATCH(Fecode, RM30);
1883
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1884
0
            Feptr--;
1885
0
            }
1886
0
          }
1887
0
        }
1888
0
      }
1889
1890
    /* Caseful comparisons */
1891
1892
0
    else
1893
0
      {
1894
0
#ifdef SUPPORT_UNICODE
1895
0
      if (utf)
1896
0
        {
1897
0
        uint32_t d;
1898
0
        for (i = 1; i <= Lmin; i++)
1899
0
          {
1900
0
          if (Feptr >= mb->end_subject)
1901
0
            {
1902
0
            SCHECK_PARTIAL();
1903
0
            RRETURN(MATCH_NOMATCH);
1904
0
            }
1905
0
          GETCHARINC(d, Feptr);
1906
0
          if (Lc == d) RRETURN(MATCH_NOMATCH);
1907
0
          }
1908
0
        }
1909
0
      else
1910
0
#endif
1911
      /* Not UTF mode */
1912
0
        {
1913
0
        for (i = 1; i <= Lmin; i++)
1914
0
          {
1915
0
          if (Feptr >= mb->end_subject)
1916
0
            {
1917
0
            SCHECK_PARTIAL();
1918
0
            RRETURN(MATCH_NOMATCH);
1919
0
            }
1920
0
          if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1921
0
          }
1922
0
        }
1923
1924
0
      if (Lmin == Lmax) continue;
1925
1926
0
      if (reptype == REPTYPE_MIN)
1927
0
        {
1928
0
#ifdef SUPPORT_UNICODE
1929
0
        if (utf)
1930
0
          {
1931
0
          uint32_t d;
1932
0
          for (;;)
1933
0
            {
1934
0
            RMATCH(Fecode, RM206);
1935
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1936
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1937
0
            if (Feptr >= mb->end_subject)
1938
0
              {
1939
0
              SCHECK_PARTIAL();
1940
0
              RRETURN(MATCH_NOMATCH);
1941
0
              }
1942
0
            GETCHARINC(d, Feptr);
1943
0
            if (Lc == d) RRETURN(MATCH_NOMATCH);
1944
0
            }
1945
0
          }
1946
0
        else
1947
0
#endif
1948
        /* Not UTF mode */
1949
0
          {
1950
0
          for (;;)
1951
0
            {
1952
0
            RMATCH(Fecode, RM31);
1953
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1954
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1955
0
            if (Feptr >= mb->end_subject)
1956
0
              {
1957
0
              SCHECK_PARTIAL();
1958
0
              RRETURN(MATCH_NOMATCH);
1959
0
              }
1960
0
            if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1961
0
            }
1962
0
          }
1963
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1964
0
        }
1965
1966
      /* Maximize case */
1967
1968
0
      else
1969
0
        {
1970
0
        Lstart_eptr = Feptr;
1971
1972
0
#ifdef SUPPORT_UNICODE
1973
0
        if (utf)
1974
0
          {
1975
0
          uint32_t d;
1976
0
          for (i = Lmin; i < Lmax; i++)
1977
0
            {
1978
0
            int len = 1;
1979
0
            if (Feptr >= mb->end_subject)
1980
0
              {
1981
0
              SCHECK_PARTIAL();
1982
0
              break;
1983
0
              }
1984
0
            GETCHARLEN(d, Feptr, len);
1985
0
            if (Lc == d) break;
1986
0
            Feptr += len;
1987
0
            }
1988
1989
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1990
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1991
          go too far. */
1992
1993
0
          if (reptype != REPTYPE_POS) for(;;)
1994
0
            {
1995
0
            if (Feptr <= Lstart_eptr) break;
1996
0
            RMATCH(Fecode, RM207);
1997
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1998
0
            Feptr--;
1999
0
            BACKCHAR(Feptr);
2000
0
            }
2001
0
          }
2002
0
        else
2003
0
#endif
2004
        /* Not UTF mode */
2005
0
          {
2006
0
          for (i = Lmin; i < Lmax; i++)
2007
0
            {
2008
0
            if (Feptr >= mb->end_subject)
2009
0
              {
2010
0
              SCHECK_PARTIAL();
2011
0
              break;
2012
0
              }
2013
0
            if (Lc == *Feptr) break;
2014
0
            Feptr++;
2015
0
            }
2016
0
          if (reptype != REPTYPE_POS) for (;;)
2017
0
            {
2018
0
            if (Feptr == Lstart_eptr) break;
2019
0
            RMATCH(Fecode, RM32);
2020
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2021
0
            Feptr--;
2022
0
            }
2023
0
          }
2024
0
        }
2025
0
      }
2026
0
    break;
2027
2028
0
#undef Lstart_eptr
2029
0
#undef Lmin
2030
0
#undef Lmax
2031
0
#undef Lc
2032
0
#undef Loc
2033
2034
2035
    /* ===================================================================== */
2036
    /* Match a bit-mapped character class, possibly repeatedly. These opcodes
2037
    are used when all the characters in the class have values in the range
2038
    0-255, and either the matching is caseful, or the characters are in the
2039
    range 0-127 when UTF processing is enabled. The only difference between
2040
    OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2041
    encountered. */
2042
2043
0
#define Lmin               F->temp_32[0]
2044
0
#define Lmax               F->temp_32[1]
2045
0
#define Lstart_eptr        F->temp_sptr[0]
2046
0
#define Lbyte_map_address  F->temp_sptr[1]
2047
0
#define Lbyte_map          ((const unsigned char *)Lbyte_map_address)
2048
2049
0
    case OP_NCLASS:
2050
0
    case OP_CLASS:
2051
0
      {
2052
0
      Lbyte_map_address = Fecode + 1;           /* Save for matching */
2053
0
      Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
2054
2055
      /* Look past the end of the item to see if there is repeat information
2056
      following. Then obey similar code to character type repeats. */
2057
2058
0
      switch (*Fecode)
2059
0
        {
2060
0
        case OP_CRSTAR:
2061
0
        case OP_CRMINSTAR:
2062
0
        case OP_CRPLUS:
2063
0
        case OP_CRMINPLUS:
2064
0
        case OP_CRQUERY:
2065
0
        case OP_CRMINQUERY:
2066
0
        case OP_CRPOSSTAR:
2067
0
        case OP_CRPOSPLUS:
2068
0
        case OP_CRPOSQUERY:
2069
0
        fc = *Fecode++ - OP_CRSTAR;
2070
0
        Lmin = rep_min[fc];
2071
0
        Lmax = rep_max[fc];
2072
0
        reptype = rep_typ[fc];
2073
0
        break;
2074
2075
0
        case OP_CRRANGE:
2076
0
        case OP_CRMINRANGE:
2077
0
        case OP_CRPOSRANGE:
2078
0
        Lmin = GET2(Fecode, 1);
2079
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2080
0
        if (Lmax == 0) Lmax = UINT32_MAX;       /* Max 0 => infinity */
2081
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
2082
0
        Fecode += 1 + 2 * IMM2_SIZE;
2083
0
        break;
2084
2085
0
        default:               /* No repeat follows */
2086
0
        Lmin = Lmax = 1;
2087
0
        break;
2088
0
        }
2089
2090
      /* First, ensure the minimum number of matches are present. */
2091
2092
0
#ifdef SUPPORT_UNICODE
2093
0
      if (utf)
2094
0
        {
2095
0
        for (i = 1; i <= Lmin; i++)
2096
0
          {
2097
0
          if (Feptr >= mb->end_subject)
2098
0
            {
2099
0
            SCHECK_PARTIAL();
2100
0
            RRETURN(MATCH_NOMATCH);
2101
0
            }
2102
0
          GETCHARINC(fc, Feptr);
2103
0
          if (fc > 255)
2104
0
            {
2105
0
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2106
0
            }
2107
0
          else
2108
0
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2109
0
          }
2110
0
        }
2111
0
      else
2112
0
#endif
2113
      /* Not UTF mode */
2114
0
        {
2115
0
        for (i = 1; i <= Lmin; i++)
2116
0
          {
2117
0
          if (Feptr >= mb->end_subject)
2118
0
            {
2119
0
            SCHECK_PARTIAL();
2120
0
            RRETURN(MATCH_NOMATCH);
2121
0
            }
2122
0
          fc = *Feptr++;
2123
#if PCRE2_CODE_UNIT_WIDTH != 8
2124
          if (fc > 255)
2125
            {
2126
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2127
            }
2128
          else
2129
#endif
2130
0
          if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2131
0
          }
2132
0
        }
2133
2134
      /* If Lmax == Lmin we are done. Continue with main loop. */
2135
2136
0
      if (Lmin == Lmax) continue;
2137
2138
      /* If minimizing, keep testing the rest of the expression and advancing
2139
      the pointer while it matches the class. */
2140
2141
0
      if (reptype == REPTYPE_MIN)
2142
0
        {
2143
0
#ifdef SUPPORT_UNICODE
2144
0
        if (utf)
2145
0
          {
2146
0
          for (;;)
2147
0
            {
2148
0
            RMATCH(Fecode, RM200);
2149
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2150
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2151
0
            if (Feptr >= mb->end_subject)
2152
0
              {
2153
0
              SCHECK_PARTIAL();
2154
0
              RRETURN(MATCH_NOMATCH);
2155
0
              }
2156
0
            GETCHARINC(fc, Feptr);
2157
0
            if (fc > 255)
2158
0
              {
2159
0
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2160
0
              }
2161
0
            else
2162
0
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2163
0
            }
2164
0
          }
2165
0
        else
2166
0
#endif
2167
        /* Not UTF mode */
2168
0
          {
2169
0
          for (;;)
2170
0
            {
2171
0
            RMATCH(Fecode, RM23);
2172
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2173
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2174
0
            if (Feptr >= mb->end_subject)
2175
0
              {
2176
0
              SCHECK_PARTIAL();
2177
0
              RRETURN(MATCH_NOMATCH);
2178
0
              }
2179
0
            fc = *Feptr++;
2180
#if PCRE2_CODE_UNIT_WIDTH != 8
2181
            if (fc > 255)
2182
              {
2183
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2184
              }
2185
            else
2186
#endif
2187
0
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2188
0
            }
2189
0
          }
2190
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
2191
0
        }
2192
2193
      /* If maximizing, find the longest possible run, then work backwards. */
2194
2195
0
      else
2196
0
        {
2197
0
        Lstart_eptr = Feptr;
2198
2199
0
#ifdef SUPPORT_UNICODE
2200
0
        if (utf)
2201
0
          {
2202
0
          for (i = Lmin; i < Lmax; i++)
2203
0
            {
2204
0
            int len = 1;
2205
0
            if (Feptr >= mb->end_subject)
2206
0
              {
2207
0
              SCHECK_PARTIAL();
2208
0
              break;
2209
0
              }
2210
0
            GETCHARLEN(fc, Feptr, len);
2211
0
            if (fc > 255)
2212
0
              {
2213
0
              if (Fop == OP_CLASS) break;
2214
0
              }
2215
0
            else
2216
0
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2217
0
            Feptr += len;
2218
0
            }
2219
2220
0
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2221
2222
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2223
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2224
          go too far. */
2225
2226
0
          for (;;)
2227
0
            {
2228
0
            RMATCH(Fecode, RM201);
2229
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2230
0
            if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2231
0
            BACKCHAR(Feptr);
2232
0
            }
2233
0
          }
2234
0
        else
2235
0
#endif
2236
          /* Not UTF mode */
2237
0
          {
2238
0
          for (i = Lmin; i < Lmax; i++)
2239
0
            {
2240
0
            if (Feptr >= mb->end_subject)
2241
0
              {
2242
0
              SCHECK_PARTIAL();
2243
0
              break;
2244
0
              }
2245
0
            fc = *Feptr;
2246
#if PCRE2_CODE_UNIT_WIDTH != 8
2247
            if (fc > 255)
2248
              {
2249
              if (Fop == OP_CLASS) break;
2250
              }
2251
            else
2252
#endif
2253
0
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2254
0
            Feptr++;
2255
0
            }
2256
2257
0
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2258
2259
0
          while (Feptr >= Lstart_eptr)
2260
0
            {
2261
0
            RMATCH(Fecode, RM24);
2262
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2263
0
            Feptr--;
2264
0
            }
2265
0
          }
2266
2267
0
        RRETURN(MATCH_NOMATCH);
2268
0
        }
2269
0
      }
2270
2271
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
2272
2273
0
#undef Lbyte_map_address
2274
0
#undef Lbyte_map
2275
0
#undef Lstart_eptr
2276
0
#undef Lmin
2277
0
#undef Lmax
2278
2279
2280
    /* ===================================================================== */
2281
    /* Match an extended character class. In the 8-bit library, this opcode is
2282
    encountered only when UTF-8 mode mode is supported. In the 16-bit and
2283
    32-bit libraries, codepoints greater than 255 may be encountered even when
2284
    UTF is not supported. */
2285
2286
0
#define Lstart_eptr  F->temp_sptr[0]
2287
0
#define Lxclass_data F->temp_sptr[1]
2288
0
#define Lmin         F->temp_32[0]
2289
0
#define Lmax         F->temp_32[1]
2290
2291
0
#ifdef SUPPORT_WIDE_CHARS
2292
0
    case OP_XCLASS:
2293
0
      {
2294
0
      Lxclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2295
0
      Fecode += GET(Fecode, 1);               /* Advance past the item */
2296
2297
0
      switch (*Fecode)
2298
0
        {
2299
0
        case OP_CRSTAR:
2300
0
        case OP_CRMINSTAR:
2301
0
        case OP_CRPLUS:
2302
0
        case OP_CRMINPLUS:
2303
0
        case OP_CRQUERY:
2304
0
        case OP_CRMINQUERY:
2305
0
        case OP_CRPOSSTAR:
2306
0
        case OP_CRPOSPLUS:
2307
0
        case OP_CRPOSQUERY:
2308
0
        fc = *Fecode++ - OP_CRSTAR;
2309
0
        Lmin = rep_min[fc];
2310
0
        Lmax = rep_max[fc];
2311
0
        reptype = rep_typ[fc];
2312
0
        break;
2313
2314
0
        case OP_CRRANGE:
2315
0
        case OP_CRMINRANGE:
2316
0
        case OP_CRPOSRANGE:
2317
0
        Lmin = GET2(Fecode, 1);
2318
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2319
0
        if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2320
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
2321
0
        Fecode += 1 + 2 * IMM2_SIZE;
2322
0
        break;
2323
2324
0
        default:               /* No repeat follows */
2325
0
        Lmin = Lmax = 1;
2326
0
        break;
2327
0
        }
2328
2329
      /* First, ensure the minimum number of matches are present. */
2330
2331
0
      for (i = 1; i <= Lmin; i++)
2332
0
        {
2333
0
        if (Feptr >= mb->end_subject)
2334
0
          {
2335
0
          SCHECK_PARTIAL();
2336
0
          RRETURN(MATCH_NOMATCH);
2337
0
          }
2338
0
        GETCHARINCTEST(fc, Feptr);
2339
0
        if (!PRIV(xclass)(fc, Lxclass_data,
2340
0
            (const uint8_t*)mb->start_code, utf))
2341
0
          RRETURN(MATCH_NOMATCH);
2342
0
        }
2343
2344
      /* If Lmax == Lmin we can just continue with the main loop. */
2345
2346
0
      if (Lmin == Lmax) continue;
2347
2348
      /* If minimizing, keep testing the rest of the expression and advancing
2349
      the pointer while it matches the class. */
2350
2351
0
      if (reptype == REPTYPE_MIN)
2352
0
        {
2353
0
        for (;;)
2354
0
          {
2355
0
          RMATCH(Fecode, RM100);
2356
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2357
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2358
0
          if (Feptr >= mb->end_subject)
2359
0
            {
2360
0
            SCHECK_PARTIAL();
2361
0
            RRETURN(MATCH_NOMATCH);
2362
0
            }
2363
0
          GETCHARINCTEST(fc, Feptr);
2364
0
          if (!PRIV(xclass)(fc, Lxclass_data,
2365
0
              (const uint8_t*)mb->start_code, utf))
2366
0
            RRETURN(MATCH_NOMATCH);
2367
0
          }
2368
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
2369
0
        }
2370
2371
      /* If maximizing, find the longest possible run, then work backwards. */
2372
2373
0
      else
2374
0
        {
2375
0
        Lstart_eptr = Feptr;
2376
0
        for (i = Lmin; i < Lmax; i++)
2377
0
          {
2378
0
          int len = 1;
2379
0
          if (Feptr >= mb->end_subject)
2380
0
            {
2381
0
            SCHECK_PARTIAL();
2382
0
            break;
2383
0
            }
2384
0
#ifdef SUPPORT_UNICODE
2385
0
          GETCHARLENTEST(fc, Feptr, len);
2386
#else
2387
          fc = *Feptr;
2388
#endif
2389
0
          if (!PRIV(xclass)(fc, Lxclass_data,
2390
0
              (const uint8_t*)mb->start_code, utf)) break;
2391
0
          Feptr += len;
2392
0
          }
2393
2394
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2395
2396
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2397
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2398
        go too far. */
2399
2400
0
        for(;;)
2401
0
          {
2402
0
          RMATCH(Fecode, RM101);
2403
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2404
0
          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2405
0
#ifdef SUPPORT_UNICODE
2406
0
          if (utf) BACKCHAR(Feptr);
2407
0
#endif
2408
0
          }
2409
0
        RRETURN(MATCH_NOMATCH);
2410
0
        }
2411
2412
0
      PCRE2_UNREACHABLE(); /* Control never reaches here */
2413
0
      }
2414
0
#endif  /* SUPPORT_WIDE_CHARS: end of XCLASS */
2415
2416
0
#undef Lstart_eptr
2417
0
#undef Lxclass_data
2418
0
#undef Lmin
2419
0
#undef Lmax
2420
2421
2422
    /* ===================================================================== */
2423
    /* Match a complex, set-based character class. This opcodes are used when
2424
    there is complex nesting or logical operations within the character
2425
    class. */
2426
2427
0
#define Lstart_eptr  F->temp_sptr[0]
2428
0
#define Leclass_data F->temp_sptr[1]
2429
0
#define Leclass_len  F->temp_size
2430
0
#define Lmin         F->temp_32[0]
2431
0
#define Lmax         F->temp_32[1]
2432
2433
0
#ifdef SUPPORT_WIDE_CHARS
2434
0
    case OP_ECLASS:
2435
0
      {
2436
0
      Leclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2437
0
      Fecode += GET(Fecode, 1);               /* Advance past the item */
2438
0
      Leclass_len = (PCRE2_SIZE)(Fecode - Leclass_data);
2439
2440
0
      switch (*Fecode)
2441
0
        {
2442
0
        case OP_CRSTAR:
2443
0
        case OP_CRMINSTAR:
2444
0
        case OP_CRPLUS:
2445
0
        case OP_CRMINPLUS:
2446
0
        case OP_CRQUERY:
2447
0
        case OP_CRMINQUERY:
2448
0
        case OP_CRPOSSTAR:
2449
0
        case OP_CRPOSPLUS:
2450
0
        case OP_CRPOSQUERY:
2451
0
        fc = *Fecode++ - OP_CRSTAR;
2452
0
        Lmin = rep_min[fc];
2453
0
        Lmax = rep_max[fc];
2454
0
        reptype = rep_typ[fc];
2455
0
        break;
2456
2457
0
        case OP_CRRANGE:
2458
0
        case OP_CRMINRANGE:
2459
0
        case OP_CRPOSRANGE:
2460
0
        Lmin = GET2(Fecode, 1);
2461
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2462
0
        if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2463
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
2464
0
        Fecode += 1 + 2 * IMM2_SIZE;
2465
0
        break;
2466
2467
0
        default:               /* No repeat follows */
2468
0
        Lmin = Lmax = 1;
2469
0
        break;
2470
0
        }
2471
2472
      /* First, ensure the minimum number of matches are present. */
2473
2474
0
      for (i = 1; i <= Lmin; i++)
2475
0
        {
2476
0
        if (Feptr >= mb->end_subject)
2477
0
          {
2478
0
          SCHECK_PARTIAL();
2479
0
          RRETURN(MATCH_NOMATCH);
2480
0
          }
2481
0
        GETCHARINCTEST(fc, Feptr);
2482
0
        if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len,
2483
0
                          (const uint8_t*)mb->start_code, utf))
2484
0
          RRETURN(MATCH_NOMATCH);
2485
0
        }
2486
2487
      /* If Lmax == Lmin we can just continue with the main loop. */
2488
2489
0
      if (Lmin == Lmax) continue;
2490
2491
      /* If minimizing, keep testing the rest of the expression and advancing
2492
      the pointer while it matches the class. */
2493
2494
0
      if (reptype == REPTYPE_MIN)
2495
0
        {
2496
0
        for (;;)
2497
0
          {
2498
0
          RMATCH(Fecode, RM102);
2499
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2500
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2501
0
          if (Feptr >= mb->end_subject)
2502
0
            {
2503
0
            SCHECK_PARTIAL();
2504
0
            RRETURN(MATCH_NOMATCH);
2505
0
            }
2506
0
          GETCHARINCTEST(fc, Feptr);
2507
0
          if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len,
2508
0
                            (const uint8_t*)mb->start_code, utf))
2509
0
            RRETURN(MATCH_NOMATCH);
2510
0
          }
2511
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
2512
0
        }
2513
2514
      /* If maximizing, find the longest possible run, then work backwards. */
2515
2516
0
      else
2517
0
        {
2518
0
        Lstart_eptr = Feptr;
2519
0
        for (i = Lmin; i < Lmax; i++)
2520
0
          {
2521
0
          int len = 1;
2522
0
          if (Feptr >= mb->end_subject)
2523
0
            {
2524
0
            SCHECK_PARTIAL();
2525
0
            break;
2526
0
            }
2527
0
#ifdef SUPPORT_UNICODE
2528
0
          GETCHARLENTEST(fc, Feptr, len);
2529
#else
2530
          fc = *Feptr;
2531
#endif
2532
0
          if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len,
2533
0
                            (const uint8_t*)mb->start_code, utf))
2534
0
            break;
2535
0
          Feptr += len;
2536
0
          }
2537
2538
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2539
2540
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2541
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2542
        go too far. */
2543
2544
0
        for(;;)
2545
0
          {
2546
0
          RMATCH(Fecode, RM103);
2547
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2548
0
          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2549
0
#ifdef SUPPORT_UNICODE
2550
0
          if (utf) BACKCHAR(Feptr);
2551
0
#endif
2552
0
          }
2553
0
        RRETURN(MATCH_NOMATCH);
2554
0
        }
2555
2556
0
      PCRE2_UNREACHABLE(); /* Control never reaches here */
2557
0
      }
2558
0
#endif  /* SUPPORT_WIDE_CHARS: end of ECLASS */
2559
2560
0
#undef Lstart_eptr
2561
0
#undef Leclass_data
2562
0
#undef Leclass_len
2563
0
#undef Lmin
2564
0
#undef Lmax
2565
2566
2567
    /* ===================================================================== */
2568
    /* Match various character types when PCRE2_UCP is not set. These opcodes
2569
    are not generated when PCRE2_UCP is set - instead appropriate property
2570
    tests are compiled. */
2571
2572
0
    case OP_NOT_DIGIT:
2573
0
    if (Feptr >= mb->end_subject)
2574
0
      {
2575
0
      SCHECK_PARTIAL();
2576
0
      RRETURN(MATCH_NOMATCH);
2577
0
      }
2578
0
    GETCHARINCTEST(fc, Feptr);
2579
0
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
2580
0
      RRETURN(MATCH_NOMATCH);
2581
0
    Fecode++;
2582
0
    break;
2583
2584
0
    case OP_DIGIT:
2585
0
    if (Feptr >= mb->end_subject)
2586
0
      {
2587
0
      SCHECK_PARTIAL();
2588
0
      RRETURN(MATCH_NOMATCH);
2589
0
      }
2590
0
    GETCHARINCTEST(fc, Feptr);
2591
0
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
2592
0
      RRETURN(MATCH_NOMATCH);
2593
0
    Fecode++;
2594
0
    break;
2595
2596
0
    case OP_NOT_WHITESPACE:
2597
0
    if (Feptr >= mb->end_subject)
2598
0
      {
2599
0
      SCHECK_PARTIAL();
2600
0
      RRETURN(MATCH_NOMATCH);
2601
0
      }
2602
0
    GETCHARINCTEST(fc, Feptr);
2603
0
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
2604
0
      RRETURN(MATCH_NOMATCH);
2605
0
    Fecode++;
2606
0
    break;
2607
2608
0
    case OP_WHITESPACE:
2609
0
    if (Feptr >= mb->end_subject)
2610
0
      {
2611
0
      SCHECK_PARTIAL();
2612
0
      RRETURN(MATCH_NOMATCH);
2613
0
      }
2614
0
    GETCHARINCTEST(fc, Feptr);
2615
0
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
2616
0
      RRETURN(MATCH_NOMATCH);
2617
0
    Fecode++;
2618
0
    break;
2619
2620
0
    case OP_NOT_WORDCHAR:
2621
0
    if (Feptr >= mb->end_subject)
2622
0
      {
2623
0
      SCHECK_PARTIAL();
2624
0
      RRETURN(MATCH_NOMATCH);
2625
0
      }
2626
0
    GETCHARINCTEST(fc, Feptr);
2627
0
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
2628
0
      RRETURN(MATCH_NOMATCH);
2629
0
    Fecode++;
2630
0
    break;
2631
2632
0
    case OP_WORDCHAR:
2633
0
    if (Feptr >= mb->end_subject)
2634
0
      {
2635
0
      SCHECK_PARTIAL();
2636
0
      RRETURN(MATCH_NOMATCH);
2637
0
      }
2638
0
    GETCHARINCTEST(fc, Feptr);
2639
0
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
2640
0
      RRETURN(MATCH_NOMATCH);
2641
0
    Fecode++;
2642
0
    break;
2643
2644
0
    case OP_ANYNL:
2645
0
    if (Feptr >= mb->end_subject)
2646
0
      {
2647
0
      SCHECK_PARTIAL();
2648
0
      RRETURN(MATCH_NOMATCH);
2649
0
      }
2650
0
    GETCHARINCTEST(fc, Feptr);
2651
0
    switch(fc)
2652
0
      {
2653
0
      default: RRETURN(MATCH_NOMATCH);
2654
2655
0
      case CHAR_CR:
2656
0
      if (Feptr >= mb->end_subject)
2657
0
        {
2658
0
        SCHECK_PARTIAL();
2659
0
        }
2660
0
      else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
2661
0
      break;
2662
2663
0
      case CHAR_LF:
2664
0
      break;
2665
2666
0
      case CHAR_VT:
2667
0
      case CHAR_FF:
2668
0
      case CHAR_NEL:
2669
0
#ifndef EBCDIC
2670
0
      case 0x2028:
2671
0
      case 0x2029:
2672
0
#endif  /* Not EBCDIC */
2673
0
      if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2674
0
      break;
2675
0
      }
2676
0
    Fecode++;
2677
0
    break;
2678
2679
0
    case OP_NOT_HSPACE:
2680
0
    if (Feptr >= mb->end_subject)
2681
0
      {
2682
0
      SCHECK_PARTIAL();
2683
0
      RRETURN(MATCH_NOMATCH);
2684
0
      }
2685
0
    GETCHARINCTEST(fc, Feptr);
2686
0
    switch(fc)
2687
0
      {
2688
0
      HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2689
0
      default: break;
2690
0
      }
2691
0
    Fecode++;
2692
0
    break;
2693
2694
0
    case OP_HSPACE:
2695
0
    if (Feptr >= mb->end_subject)
2696
0
      {
2697
0
      SCHECK_PARTIAL();
2698
0
      RRETURN(MATCH_NOMATCH);
2699
0
      }
2700
0
    GETCHARINCTEST(fc, Feptr);
2701
0
    switch(fc)
2702
0
      {
2703
0
      HSPACE_CASES: break;  /* Byte and multibyte cases */
2704
0
      default: RRETURN(MATCH_NOMATCH);
2705
0
      }
2706
0
    Fecode++;
2707
0
    break;
2708
2709
0
    case OP_NOT_VSPACE:
2710
0
    if (Feptr >= mb->end_subject)
2711
0
      {
2712
0
      SCHECK_PARTIAL();
2713
0
      RRETURN(MATCH_NOMATCH);
2714
0
      }
2715
0
    GETCHARINCTEST(fc, Feptr);
2716
0
    switch(fc)
2717
0
      {
2718
0
      VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2719
0
      default: break;
2720
0
      }
2721
0
    Fecode++;
2722
0
    break;
2723
2724
0
    case OP_VSPACE:
2725
0
    if (Feptr >= mb->end_subject)
2726
0
      {
2727
0
      SCHECK_PARTIAL();
2728
0
      RRETURN(MATCH_NOMATCH);
2729
0
      }
2730
0
    GETCHARINCTEST(fc, Feptr);
2731
0
    switch(fc)
2732
0
      {
2733
0
      VSPACE_CASES: break;
2734
0
      default: RRETURN(MATCH_NOMATCH);
2735
0
      }
2736
0
    Fecode++;
2737
0
    break;
2738
2739
2740
0
#ifdef SUPPORT_UNICODE
2741
2742
    /* ===================================================================== */
2743
    /* Check the next character by Unicode property. We will get here only
2744
    if the support is in the binary; otherwise a compile-time error occurs. */
2745
2746
0
    case OP_PROP:
2747
0
    case OP_NOTPROP:
2748
0
    if (Feptr >= mb->end_subject)
2749
0
      {
2750
0
      SCHECK_PARTIAL();
2751
0
      RRETURN(MATCH_NOMATCH);
2752
0
      }
2753
0
    GETCHARINCTEST(fc, Feptr);
2754
0
      {
2755
0
      const uint32_t *cp;
2756
0
      uint32_t chartype;
2757
0
      const ucd_record *prop = GET_UCD(fc);
2758
0
      BOOL notmatch = Fop == OP_NOTPROP;
2759
2760
0
      switch(Fecode[1])
2761
0
        {
2762
0
        case PT_LAMP:
2763
0
        chartype = prop->chartype;
2764
0
        if ((chartype == ucp_Lu ||
2765
0
             chartype == ucp_Ll ||
2766
0
             chartype == ucp_Lt) == notmatch)
2767
0
          RRETURN(MATCH_NOMATCH);
2768
0
        break;
2769
2770
0
        case PT_GC:
2771
0
        if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch)
2772
0
          RRETURN(MATCH_NOMATCH);
2773
0
        break;
2774
2775
0
        case PT_PC:
2776
0
        if ((Fecode[2] == prop->chartype) == notmatch)
2777
0
          RRETURN(MATCH_NOMATCH);
2778
0
        break;
2779
2780
0
        case PT_SC:
2781
0
        if ((Fecode[2] == prop->script) == notmatch)
2782
0
          RRETURN(MATCH_NOMATCH);
2783
0
        break;
2784
2785
0
        case PT_SCX:
2786
0
          {
2787
0
          BOOL ok = (Fecode[2] == prop->script ||
2788
0
                     MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
2789
0
          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2790
0
          }
2791
0
        break;
2792
2793
        /* These are specials */
2794
2795
0
        case PT_ALNUM:
2796
0
        chartype = prop->chartype;
2797
0
        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2798
0
             PRIV(ucp_gentype)[chartype] == ucp_N) == notmatch)
2799
0
          RRETURN(MATCH_NOMATCH);
2800
0
        break;
2801
2802
        /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2803
        which means that Perl space and POSIX space are now identical. PCRE
2804
        was changed at release 8.34. */
2805
2806
0
        case PT_SPACE:    /* Perl space */
2807
0
        case PT_PXSPACE:  /* POSIX space */
2808
0
        switch(fc)
2809
0
          {
2810
0
          HSPACE_CASES:
2811
0
          VSPACE_CASES:
2812
0
          if (notmatch) RRETURN(MATCH_NOMATCH);
2813
0
          break;
2814
2815
0
          default:
2816
0
          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch)
2817
0
            RRETURN(MATCH_NOMATCH);
2818
0
          break;
2819
0
          }
2820
0
        break;
2821
2822
0
        case PT_WORD:
2823
0
        chartype = prop->chartype;
2824
0
        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2825
0
             PRIV(ucp_gentype)[chartype] == ucp_N ||
2826
0
             chartype == ucp_Mn ||
2827
0
             chartype == ucp_Pc) == notmatch)
2828
0
          RRETURN(MATCH_NOMATCH);
2829
0
        break;
2830
2831
0
        case PT_CLIST:
2832
#if PCRE2_CODE_UNIT_WIDTH == 32
2833
            if (fc > MAX_UTF_CODE_POINT)
2834
              {
2835
              if (notmatch) break;;
2836
              RRETURN(MATCH_NOMATCH);
2837
              }
2838
#endif
2839
0
        cp = PRIV(ucd_caseless_sets) + Fecode[2];
2840
0
        for (;;)
2841
0
          {
2842
0
          if (fc < *cp)
2843
0
            { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } }
2844
0
          if (fc == *cp++)
2845
0
            { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; }
2846
0
          }
2847
0
        break;
2848
2849
0
        case PT_UCNC:
2850
0
        if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2851
0
             fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2852
0
             fc >= 0xe000) == notmatch)
2853
0
          RRETURN(MATCH_NOMATCH);
2854
0
        break;
2855
2856
0
        case PT_BIDICL:
2857
0
        if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
2858
0
          RRETURN(MATCH_NOMATCH);
2859
0
        break;
2860
2861
0
        case PT_BOOL:
2862
0
          {
2863
0
          BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2864
0
            UCD_BPROPS_PROP(prop), Fecode[2]) != 0;
2865
0
          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2866
0
          }
2867
0
        break;
2868
2869
        /* This should never occur */
2870
2871
        /* LCOV_EXCL_START */
2872
0
        default:
2873
0
        PCRE2_DEBUG_UNREACHABLE();
2874
0
        return PCRE2_ERROR_INTERNAL;
2875
        /* LCOV_EXCL_STOP */
2876
0
        }
2877
2878
0
      Fecode += 3;
2879
0
      }
2880
0
    break;
2881
2882
2883
    /* ===================================================================== */
2884
    /* Match an extended Unicode sequence. We will get here only if the support
2885
    is in the binary; otherwise a compile-time error occurs. */
2886
2887
0
    case OP_EXTUNI:
2888
0
    if (Feptr >= mb->end_subject)
2889
0
      {
2890
0
      SCHECK_PARTIAL();
2891
0
      RRETURN(MATCH_NOMATCH);
2892
0
      }
2893
0
    else
2894
0
      {
2895
0
      GETCHARINCTEST(fc, Feptr);
2896
0
      Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
2897
0
        NULL);
2898
0
      }
2899
0
    CHECK_PARTIAL();
2900
0
    Fecode++;
2901
0
    break;
2902
2903
0
#endif  /* SUPPORT_UNICODE */
2904
2905
2906
    /* ===================================================================== */
2907
    /* Match a single character type repeatedly. Note that the property type
2908
    does not need to be in a stack frame as it is not used within an RMATCH()
2909
    loop. */
2910
2911
0
#define Lstart_eptr  F->temp_sptr[0]
2912
0
#define Lmin         F->temp_32[0]
2913
0
#define Lmax         F->temp_32[1]
2914
0
#define Lctype       F->temp_32[2]
2915
0
#define Lpropvalue   F->temp_32[3]
2916
2917
0
    case OP_TYPEEXACT:
2918
0
    Lmin = Lmax = GET2(Fecode, 1);
2919
0
    Fecode += 1 + IMM2_SIZE;
2920
0
    goto REPEATTYPE;
2921
2922
0
    case OP_TYPEUPTO:
2923
0
    case OP_TYPEMINUPTO:
2924
0
    Lmin = 0;
2925
0
    Lmax = GET2(Fecode, 1);
2926
0
    reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
2927
0
    Fecode += 1 + IMM2_SIZE;
2928
0
    goto REPEATTYPE;
2929
2930
0
    case OP_TYPEPOSSTAR:
2931
0
    reptype = REPTYPE_POS;
2932
0
    Lmin = 0;
2933
0
    Lmax = UINT32_MAX;
2934
0
    Fecode++;
2935
0
    goto REPEATTYPE;
2936
2937
0
    case OP_TYPEPOSPLUS:
2938
0
    reptype = REPTYPE_POS;
2939
0
    Lmin = 1;
2940
0
    Lmax = UINT32_MAX;
2941
0
    Fecode++;
2942
0
    goto REPEATTYPE;
2943
2944
0
    case OP_TYPEPOSQUERY:
2945
0
    reptype = REPTYPE_POS;
2946
0
    Lmin = 0;
2947
0
    Lmax = 1;
2948
0
    Fecode++;
2949
0
    goto REPEATTYPE;
2950
2951
0
    case OP_TYPEPOSUPTO:
2952
0
    reptype = REPTYPE_POS;
2953
0
    Lmin = 0;
2954
0
    Lmax = GET2(Fecode, 1);
2955
0
    Fecode += 1 + IMM2_SIZE;
2956
0
    goto REPEATTYPE;
2957
2958
0
    case OP_TYPESTAR:
2959
0
    case OP_TYPEMINSTAR:
2960
0
    case OP_TYPEPLUS:
2961
0
    case OP_TYPEMINPLUS:
2962
0
    case OP_TYPEQUERY:
2963
0
    case OP_TYPEMINQUERY:
2964
0
    fc = *Fecode++ - OP_TYPESTAR;
2965
0
    Lmin = rep_min[fc];
2966
0
    Lmax = rep_max[fc];
2967
0
    reptype = rep_typ[fc];
2968
2969
    /* Common code for all repeated character type matches. */
2970
2971
0
    REPEATTYPE:
2972
0
    Lctype = *Fecode++;      /* Code for the character type */
2973
2974
0
#ifdef SUPPORT_UNICODE
2975
0
    if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
2976
0
      {
2977
0
      proptype = *Fecode++;
2978
0
      Lpropvalue = *Fecode++;
2979
0
      }
2980
0
    else proptype = -1;
2981
0
#endif
2982
2983
    /* First, ensure the minimum number of matches are present. Use inline
2984
    code for maximizing the speed, and do the type test once at the start
2985
    (i.e. keep it out of the loops). As there are no calls to RMATCH in the
2986
    loops, we can use an ordinary variable for "notmatch". The code for UTF
2987
    mode is separated out for tidiness, except for Unicode property tests. */
2988
2989
0
    if (Lmin > 0)
2990
0
      {
2991
0
#ifdef SUPPORT_UNICODE
2992
0
      if (proptype >= 0)  /* Property tests in all modes */
2993
0
        {
2994
0
        BOOL notmatch = Lctype == OP_NOTPROP;
2995
0
        switch(proptype)
2996
0
          {
2997
0
          case PT_LAMP:
2998
0
          for (i = 1; i <= Lmin; i++)
2999
0
            {
3000
0
            int chartype;
3001
0
            if (Feptr >= mb->end_subject)
3002
0
              {
3003
0
              SCHECK_PARTIAL();
3004
0
              RRETURN(MATCH_NOMATCH);
3005
0
              }
3006
0
            GETCHARINCTEST(fc, Feptr);
3007
0
            chartype = UCD_CHARTYPE(fc);
3008
0
            if ((chartype == ucp_Lu ||
3009
0
                 chartype == ucp_Ll ||
3010
0
                 chartype == ucp_Lt) == notmatch)
3011
0
              RRETURN(MATCH_NOMATCH);
3012
0
            }
3013
0
          break;
3014
3015
0
          case PT_GC:
3016
0
          for (i = 1; i <= Lmin; i++)
3017
0
            {
3018
0
            if (Feptr >= mb->end_subject)
3019
0
              {
3020
0
              SCHECK_PARTIAL();
3021
0
              RRETURN(MATCH_NOMATCH);
3022
0
              }
3023
0
            GETCHARINCTEST(fc, Feptr);
3024
0
            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch)
3025
0
              RRETURN(MATCH_NOMATCH);
3026
0
            }
3027
0
          break;
3028
3029
0
          case PT_PC:
3030
0
          for (i = 1; i <= Lmin; i++)
3031
0
            {
3032
0
            if (Feptr >= mb->end_subject)
3033
0
              {
3034
0
              SCHECK_PARTIAL();
3035
0
              RRETURN(MATCH_NOMATCH);
3036
0
              }
3037
0
            GETCHARINCTEST(fc, Feptr);
3038
0
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch)
3039
0
              RRETURN(MATCH_NOMATCH);
3040
0
            }
3041
0
          break;
3042
3043
0
          case PT_SC:
3044
0
          for (i = 1; i <= Lmin; i++)
3045
0
            {
3046
0
            if (Feptr >= mb->end_subject)
3047
0
              {
3048
0
              SCHECK_PARTIAL();
3049
0
              RRETURN(MATCH_NOMATCH);
3050
0
              }
3051
0
            GETCHARINCTEST(fc, Feptr);
3052
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch)
3053
0
              RRETURN(MATCH_NOMATCH);
3054
0
            }
3055
0
          break;
3056
3057
0
          case PT_SCX:
3058
0
          for (i = 1; i <= Lmin; i++)
3059
0
            {
3060
0
            BOOL ok;
3061
0
            const ucd_record *prop;
3062
0
            if (Feptr >= mb->end_subject)
3063
0
              {
3064
0
              SCHECK_PARTIAL();
3065
0
              RRETURN(MATCH_NOMATCH);
3066
0
              }
3067
0
            GETCHARINCTEST(fc, Feptr);
3068
0
            prop = GET_UCD(fc);
3069
0
            ok = (prop->script == Lpropvalue ||
3070
0
                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
3071
0
            if (ok == notmatch)
3072
0
              RRETURN(MATCH_NOMATCH);
3073
0
            }
3074
0
          break;
3075
3076
0
          case PT_ALNUM:
3077
0
          for (i = 1; i <= Lmin; i++)
3078
0
            {
3079
0
            int category;
3080
0
            if (Feptr >= mb->end_subject)
3081
0
              {
3082
0
              SCHECK_PARTIAL();
3083
0
              RRETURN(MATCH_NOMATCH);
3084
0
              }
3085
0
            GETCHARINCTEST(fc, Feptr);
3086
0
            category = UCD_CATEGORY(fc);
3087
0
            if ((category == ucp_L || category == ucp_N) == notmatch)
3088
0
              RRETURN(MATCH_NOMATCH);
3089
0
            }
3090
0
          break;
3091
3092
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3093
          which means that Perl space and POSIX space are now identical. PCRE
3094
          was changed at release 8.34. */
3095
3096
0
          case PT_SPACE:    /* Perl space */
3097
0
          case PT_PXSPACE:  /* POSIX space */
3098
0
          for (i = 1; i <= Lmin; i++)
3099
0
            {
3100
0
            if (Feptr >= mb->end_subject)
3101
0
              {
3102
0
              SCHECK_PARTIAL();
3103
0
              RRETURN(MATCH_NOMATCH);
3104
0
              }
3105
0
            GETCHARINCTEST(fc, Feptr);
3106
0
            switch(fc)
3107
0
              {
3108
0
              HSPACE_CASES:
3109
0
              VSPACE_CASES:
3110
0
              if (notmatch) RRETURN(MATCH_NOMATCH);
3111
0
              break;
3112
3113
0
              default:
3114
0
              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
3115
0
                RRETURN(MATCH_NOMATCH);
3116
0
              break;
3117
0
              }
3118
0
            }
3119
0
          break;
3120
3121
0
          case PT_WORD:
3122
0
          for (i = 1; i <= Lmin; i++)
3123
0
            {
3124
0
            int chartype, category;
3125
0
            if (Feptr >= mb->end_subject)
3126
0
              {
3127
0
              SCHECK_PARTIAL();
3128
0
              RRETURN(MATCH_NOMATCH);
3129
0
              }
3130
0
            GETCHARINCTEST(fc, Feptr);
3131
0
            chartype = UCD_CHARTYPE(fc);
3132
0
            category = PRIV(ucp_gentype)[chartype];
3133
0
            if ((category == ucp_L || category == ucp_N ||
3134
0
                 chartype == ucp_Mn || chartype == ucp_Pc) == notmatch)
3135
0
              RRETURN(MATCH_NOMATCH);
3136
0
            }
3137
0
          break;
3138
3139
0
          case PT_CLIST:
3140
0
          for (i = 1; i <= Lmin; i++)
3141
0
            {
3142
0
            const uint32_t *cp;
3143
0
            if (Feptr >= mb->end_subject)
3144
0
              {
3145
0
              SCHECK_PARTIAL();
3146
0
              RRETURN(MATCH_NOMATCH);
3147
0
              }
3148
0
            GETCHARINCTEST(fc, Feptr);
3149
#if PCRE2_CODE_UNIT_WIDTH == 32
3150
            if (fc > MAX_UTF_CODE_POINT)
3151
              {
3152
              if (notmatch) continue;
3153
              RRETURN(MATCH_NOMATCH);
3154
              }
3155
#endif
3156
0
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3157
0
            for (;;)
3158
0
              {
3159
0
              if (fc < *cp)
3160
0
                {
3161
0
                if (notmatch) break;
3162
0
                RRETURN(MATCH_NOMATCH);
3163
0
                }
3164
0
              if (fc == *cp++)
3165
0
                {
3166
0
                if (notmatch) RRETURN(MATCH_NOMATCH);
3167
0
                break;
3168
0
                }
3169
0
              }
3170
0
            }
3171
0
          break;
3172
3173
0
          case PT_UCNC:
3174
0
          for (i = 1; i <= Lmin; i++)
3175
0
            {
3176
0
            if (Feptr >= mb->end_subject)
3177
0
              {
3178
0
              SCHECK_PARTIAL();
3179
0
              RRETURN(MATCH_NOMATCH);
3180
0
              }
3181
0
            GETCHARINCTEST(fc, Feptr);
3182
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3183
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3184
0
                 fc >= 0xe000) == notmatch)
3185
0
              RRETURN(MATCH_NOMATCH);
3186
0
            }
3187
0
          break;
3188
3189
0
          case PT_BIDICL:
3190
0
          for (i = 1; i <= Lmin; i++)
3191
0
            {
3192
0
            if (Feptr >= mb->end_subject)
3193
0
              {
3194
0
              SCHECK_PARTIAL();
3195
0
              RRETURN(MATCH_NOMATCH);
3196
0
              }
3197
0
            GETCHARINCTEST(fc, Feptr);
3198
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch)
3199
0
              RRETURN(MATCH_NOMATCH);
3200
0
            }
3201
0
          break;
3202
3203
0
          case PT_BOOL:
3204
0
          for (i = 1; i <= Lmin; i++)
3205
0
            {
3206
0
            BOOL ok;
3207
0
            const ucd_record *prop;
3208
0
            if (Feptr >= mb->end_subject)
3209
0
              {
3210
0
              SCHECK_PARTIAL();
3211
0
              RRETURN(MATCH_NOMATCH);
3212
0
              }
3213
0
            GETCHARINCTEST(fc, Feptr);
3214
0
            prop = GET_UCD(fc);
3215
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
3216
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
3217
0
            if (ok == notmatch)
3218
0
              RRETURN(MATCH_NOMATCH);
3219
0
            }
3220
0
          break;
3221
3222
          /* This should not occur */
3223
3224
          /* LCOV_EXCL_START */
3225
0
          default:
3226
0
          PCRE2_DEBUG_UNREACHABLE();
3227
0
          return PCRE2_ERROR_INTERNAL;
3228
          /* LCOV_EXCL_STOP */
3229
0
          }
3230
0
        }
3231
3232
      /* Match extended Unicode sequences. We will get here only if the
3233
      support is in the binary; otherwise a compile-time error occurs. */
3234
3235
0
      else if (Lctype == OP_EXTUNI)
3236
0
        {
3237
0
        for (i = 1; i <= Lmin; i++)
3238
0
          {
3239
0
          if (Feptr >= mb->end_subject)
3240
0
            {
3241
0
            SCHECK_PARTIAL();
3242
0
            RRETURN(MATCH_NOMATCH);
3243
0
            }
3244
0
          else
3245
0
            {
3246
0
            GETCHARINCTEST(fc, Feptr);
3247
0
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
3248
0
              mb->end_subject, utf, NULL);
3249
0
            }
3250
0
          CHECK_PARTIAL();
3251
0
          }
3252
0
        }
3253
0
      else
3254
0
#endif     /* SUPPORT_UNICODE */
3255
3256
/* Handle all other cases in UTF mode */
3257
3258
0
#ifdef SUPPORT_UNICODE
3259
0
      if (utf) switch(Lctype)
3260
0
        {
3261
0
        case OP_ANY:
3262
0
        for (i = 1; i <= Lmin; i++)
3263
0
          {
3264
0
          if (Feptr >= mb->end_subject)
3265
0
            {
3266
0
            SCHECK_PARTIAL();
3267
0
            RRETURN(MATCH_NOMATCH);
3268
0
            }
3269
0
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3270
0
          if (mb->partial != 0 &&
3271
0
              Feptr + 1 >= mb->end_subject &&
3272
0
              NLBLOCK->nltype == NLTYPE_FIXED &&
3273
0
              NLBLOCK->nllen == 2 &&
3274
0
              UCHAR21(Feptr) == NLBLOCK->nl[0])
3275
0
            {
3276
0
            mb->hitend = TRUE;
3277
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3278
0
            }
3279
0
          Feptr++;
3280
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3281
0
          }
3282
0
        break;
3283
3284
0
        case OP_ALLANY:
3285
0
        for (i = 1; i <= Lmin; i++)
3286
0
          {
3287
0
          if (Feptr >= mb->end_subject)
3288
0
            {
3289
0
            SCHECK_PARTIAL();
3290
0
            RRETURN(MATCH_NOMATCH);
3291
0
            }
3292
0
          Feptr++;
3293
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3294
0
          }
3295
0
        break;
3296
3297
0
        case OP_ANYBYTE:
3298
0
        if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
3299
0
        Feptr += Lmin;
3300
0
        break;
3301
3302
0
        case OP_ANYNL:
3303
0
        for (i = 1; i <= Lmin; i++)
3304
0
          {
3305
0
          if (Feptr >= mb->end_subject)
3306
0
            {
3307
0
            SCHECK_PARTIAL();
3308
0
            RRETURN(MATCH_NOMATCH);
3309
0
            }
3310
0
          GETCHARINC(fc, Feptr);
3311
0
          switch(fc)
3312
0
            {
3313
0
            default: RRETURN(MATCH_NOMATCH);
3314
3315
0
            case CHAR_CR:
3316
0
            if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3317
0
            break;
3318
3319
0
            case CHAR_LF:
3320
0
            break;
3321
3322
0
            case CHAR_VT:
3323
0
            case CHAR_FF:
3324
0
            case CHAR_NEL:
3325
0
#ifndef EBCDIC
3326
0
            case 0x2028:
3327
0
            case 0x2029:
3328
0
#endif  /* Not EBCDIC */
3329
0
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3330
0
            break;
3331
0
            }
3332
0
          }
3333
0
        break;
3334
3335
0
        case OP_NOT_HSPACE:
3336
0
        for (i = 1; i <= Lmin; i++)
3337
0
          {
3338
0
          if (Feptr >= mb->end_subject)
3339
0
            {
3340
0
            SCHECK_PARTIAL();
3341
0
            RRETURN(MATCH_NOMATCH);
3342
0
            }
3343
0
          GETCHARINC(fc, Feptr);
3344
0
          switch(fc)
3345
0
            {
3346
0
            HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3347
0
            default: break;
3348
0
            }
3349
0
          }
3350
0
        break;
3351
3352
0
        case OP_HSPACE:
3353
0
        for (i = 1; i <= Lmin; i++)
3354
0
          {
3355
0
          if (Feptr >= mb->end_subject)
3356
0
            {
3357
0
            SCHECK_PARTIAL();
3358
0
            RRETURN(MATCH_NOMATCH);
3359
0
            }
3360
0
          GETCHARINC(fc, Feptr);
3361
0
          switch(fc)
3362
0
            {
3363
0
            HSPACE_CASES: break;
3364
0
            default: RRETURN(MATCH_NOMATCH);
3365
0
            }
3366
0
          }
3367
0
        break;
3368
3369
0
        case OP_NOT_VSPACE:
3370
0
        for (i = 1; i <= Lmin; i++)
3371
0
          {
3372
0
          if (Feptr >= mb->end_subject)
3373
0
            {
3374
0
            SCHECK_PARTIAL();
3375
0
            RRETURN(MATCH_NOMATCH);
3376
0
            }
3377
0
          GETCHARINC(fc, Feptr);
3378
0
          switch(fc)
3379
0
            {
3380
0
            VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3381
0
            default: break;
3382
0
            }
3383
0
          }
3384
0
        break;
3385
3386
0
        case OP_VSPACE:
3387
0
        for (i = 1; i <= Lmin; i++)
3388
0
          {
3389
0
          if (Feptr >= mb->end_subject)
3390
0
            {
3391
0
            SCHECK_PARTIAL();
3392
0
            RRETURN(MATCH_NOMATCH);
3393
0
            }
3394
0
          GETCHARINC(fc, Feptr);
3395
0
          switch(fc)
3396
0
            {
3397
0
            VSPACE_CASES: break;
3398
0
            default: RRETURN(MATCH_NOMATCH);
3399
0
            }
3400
0
          }
3401
0
        break;
3402
3403
0
        case OP_NOT_DIGIT:
3404
0
        for (i = 1; i <= Lmin; i++)
3405
0
          {
3406
0
          if (Feptr >= mb->end_subject)
3407
0
            {
3408
0
            SCHECK_PARTIAL();
3409
0
            RRETURN(MATCH_NOMATCH);
3410
0
            }
3411
0
          GETCHARINC(fc, Feptr);
3412
0
          if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
3413
0
            RRETURN(MATCH_NOMATCH);
3414
0
          }
3415
0
        break;
3416
3417
0
        case OP_DIGIT:
3418
0
        for (i = 1; i <= Lmin; i++)
3419
0
          {
3420
0
          uint32_t cc;
3421
0
          if (Feptr >= mb->end_subject)
3422
0
            {
3423
0
            SCHECK_PARTIAL();
3424
0
            RRETURN(MATCH_NOMATCH);
3425
0
            }
3426
0
          cc = UCHAR21(Feptr);
3427
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
3428
0
            RRETURN(MATCH_NOMATCH);
3429
0
          Feptr++;
3430
          /* No need to skip more code units - we know it has only one. */
3431
0
          }
3432
0
        break;
3433
3434
0
        case OP_NOT_WHITESPACE:
3435
0
        for (i = 1; i <= Lmin; i++)
3436
0
          {
3437
0
          uint32_t cc;
3438
0
          if (Feptr >= mb->end_subject)
3439
0
            {
3440
0
            SCHECK_PARTIAL();
3441
0
            RRETURN(MATCH_NOMATCH);
3442
0
            }
3443
0
          cc = UCHAR21(Feptr);
3444
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
3445
0
            RRETURN(MATCH_NOMATCH);
3446
0
          Feptr++;
3447
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3448
0
          }
3449
0
        break;
3450
3451
0
        case OP_WHITESPACE:
3452
0
        for (i = 1; i <= Lmin; i++)
3453
0
          {
3454
0
          uint32_t cc;
3455
0
          if (Feptr >= mb->end_subject)
3456
0
            {
3457
0
            SCHECK_PARTIAL();
3458
0
            RRETURN(MATCH_NOMATCH);
3459
0
            }
3460
0
          cc = UCHAR21(Feptr);
3461
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
3462
0
            RRETURN(MATCH_NOMATCH);
3463
0
          Feptr++;
3464
          /* No need to skip more code units - we know it has only one. */
3465
0
          }
3466
0
        break;
3467
3468
0
        case OP_NOT_WORDCHAR:
3469
0
        for (i = 1; i <= Lmin; i++)
3470
0
          {
3471
0
          uint32_t cc;
3472
0
          if (Feptr >= mb->end_subject)
3473
0
            {
3474
0
            SCHECK_PARTIAL();
3475
0
            RRETURN(MATCH_NOMATCH);
3476
0
            }
3477
0
          cc = UCHAR21(Feptr);
3478
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
3479
0
            RRETURN(MATCH_NOMATCH);
3480
0
          Feptr++;
3481
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3482
0
          }
3483
0
        break;
3484
3485
0
        case OP_WORDCHAR:
3486
0
        for (i = 1; i <= Lmin; i++)
3487
0
          {
3488
0
          uint32_t cc;
3489
0
          if (Feptr >= mb->end_subject)
3490
0
            {
3491
0
            SCHECK_PARTIAL();
3492
0
            RRETURN(MATCH_NOMATCH);
3493
0
            }
3494
0
          cc = UCHAR21(Feptr);
3495
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
3496
0
            RRETURN(MATCH_NOMATCH);
3497
0
          Feptr++;
3498
          /* No need to skip more code units - we know it has only one. */
3499
0
          }
3500
0
        break;
3501
3502
        /* LCOV_EXCL_START */
3503
0
        default:
3504
0
        PCRE2_DEBUG_UNREACHABLE();
3505
0
        return PCRE2_ERROR_INTERNAL;
3506
        /* LCOV_EXCL_STOP */
3507
0
        }  /* End switch(Lctype) */
3508
3509
0
      else
3510
0
#endif     /* SUPPORT_UNICODE */
3511
3512
      /* Code for the non-UTF case for minimum matching of operators other
3513
      than OP_PROP and OP_NOTPROP. */
3514
3515
0
      switch(Lctype)
3516
0
        {
3517
0
        case OP_ANY:
3518
0
        for (i = 1; i <= Lmin; i++)
3519
0
          {
3520
0
          if (Feptr >= mb->end_subject)
3521
0
            {
3522
0
            SCHECK_PARTIAL();
3523
0
            RRETURN(MATCH_NOMATCH);
3524
0
            }
3525
0
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3526
0
          if (mb->partial != 0 &&
3527
0
              Feptr + 1 >= mb->end_subject &&
3528
0
              NLBLOCK->nltype == NLTYPE_FIXED &&
3529
0
              NLBLOCK->nllen == 2 &&
3530
0
              *Feptr == NLBLOCK->nl[0])
3531
0
            {
3532
0
            mb->hitend = TRUE;
3533
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3534
0
            }
3535
0
          Feptr++;
3536
0
          }
3537
0
        break;
3538
3539
0
        case OP_ALLANY:
3540
0
        if (Feptr > mb->end_subject - Lmin)
3541
0
          {
3542
0
          SCHECK_PARTIAL();
3543
0
          RRETURN(MATCH_NOMATCH);
3544
0
          }
3545
0
        Feptr += Lmin;
3546
0
        break;
3547
3548
        /* This OP_ANYBYTE case will never be reached because \C gets turned
3549
        into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
3550
        reports don't complain about it's never being used. */
3551
3552
/*        case OP_ANYBYTE:
3553
*        if (Feptr > mb->end_subject - Lmin)
3554
*          {
3555
*          SCHECK_PARTIAL();
3556
*          RRETURN(MATCH_NOMATCH);
3557
*          }
3558
*        Feptr += Lmin;
3559
*        break;
3560
*/
3561
0
        case OP_ANYNL:
3562
0
        for (i = 1; i <= Lmin; i++)
3563
0
          {
3564
0
          if (Feptr >= mb->end_subject)
3565
0
            {
3566
0
            SCHECK_PARTIAL();
3567
0
            RRETURN(MATCH_NOMATCH);
3568
0
            }
3569
0
          switch(*Feptr++)
3570
0
            {
3571
0
            default: RRETURN(MATCH_NOMATCH);
3572
3573
0
            case CHAR_CR:
3574
0
            if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3575
0
            break;
3576
3577
0
            case CHAR_LF:
3578
0
            break;
3579
3580
0
            case CHAR_VT:
3581
0
            case CHAR_FF:
3582
0
            case CHAR_NEL:
3583
#if PCRE2_CODE_UNIT_WIDTH != 8
3584
            case 0x2028:
3585
            case 0x2029:
3586
#endif
3587
0
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3588
0
            break;
3589
0
            }
3590
0
          }
3591
0
        break;
3592
3593
0
        case OP_NOT_HSPACE:
3594
0
        for (i = 1; i <= Lmin; i++)
3595
0
          {
3596
0
          if (Feptr >= mb->end_subject)
3597
0
            {
3598
0
            SCHECK_PARTIAL();
3599
0
            RRETURN(MATCH_NOMATCH);
3600
0
            }
3601
0
          switch(*Feptr++)
3602
0
            {
3603
0
            default: break;
3604
0
            HSPACE_BYTE_CASES:
3605
#if PCRE2_CODE_UNIT_WIDTH != 8
3606
            HSPACE_MULTIBYTE_CASES:
3607
#endif
3608
0
            RRETURN(MATCH_NOMATCH);
3609
0
            }
3610
0
          }
3611
0
        break;
3612
3613
0
        case OP_HSPACE:
3614
0
        for (i = 1; i <= Lmin; i++)
3615
0
          {
3616
0
          if (Feptr >= mb->end_subject)
3617
0
            {
3618
0
            SCHECK_PARTIAL();
3619
0
            RRETURN(MATCH_NOMATCH);
3620
0
            }
3621
0
          switch(*Feptr++)
3622
0
            {
3623
0
            default: RRETURN(MATCH_NOMATCH);
3624
0
            HSPACE_BYTE_CASES:
3625
#if PCRE2_CODE_UNIT_WIDTH != 8
3626
            HSPACE_MULTIBYTE_CASES:
3627
#endif
3628
0
            break;
3629
0
            }
3630
0
          }
3631
0
        break;
3632
3633
0
        case OP_NOT_VSPACE:
3634
0
        for (i = 1; i <= Lmin; i++)
3635
0
          {
3636
0
          if (Feptr >= mb->end_subject)
3637
0
            {
3638
0
            SCHECK_PARTIAL();
3639
0
            RRETURN(MATCH_NOMATCH);
3640
0
            }
3641
0
          switch(*Feptr++)
3642
0
            {
3643
0
            VSPACE_BYTE_CASES:
3644
#if PCRE2_CODE_UNIT_WIDTH != 8
3645
            VSPACE_MULTIBYTE_CASES:
3646
#endif
3647
0
            RRETURN(MATCH_NOMATCH);
3648
0
            default: break;
3649
0
            }
3650
0
          }
3651
0
        break;
3652
3653
0
        case OP_VSPACE:
3654
0
        for (i = 1; i <= Lmin; i++)
3655
0
          {
3656
0
          if (Feptr >= mb->end_subject)
3657
0
            {
3658
0
            SCHECK_PARTIAL();
3659
0
            RRETURN(MATCH_NOMATCH);
3660
0
            }
3661
0
          switch(*Feptr++)
3662
0
            {
3663
0
            default: RRETURN(MATCH_NOMATCH);
3664
0
            VSPACE_BYTE_CASES:
3665
#if PCRE2_CODE_UNIT_WIDTH != 8
3666
            VSPACE_MULTIBYTE_CASES:
3667
#endif
3668
0
            break;
3669
0
            }
3670
0
          }
3671
0
        break;
3672
3673
0
        case OP_NOT_DIGIT:
3674
0
        for (i = 1; i <= Lmin; i++)
3675
0
          {
3676
0
          if (Feptr >= mb->end_subject)
3677
0
            {
3678
0
            SCHECK_PARTIAL();
3679
0
            RRETURN(MATCH_NOMATCH);
3680
0
            }
3681
0
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
3682
0
            RRETURN(MATCH_NOMATCH);
3683
0
          Feptr++;
3684
0
          }
3685
0
        break;
3686
3687
0
        case OP_DIGIT:
3688
0
        for (i = 1; i <= Lmin; i++)
3689
0
          {
3690
0
          if (Feptr >= mb->end_subject)
3691
0
            {
3692
0
            SCHECK_PARTIAL();
3693
0
            RRETURN(MATCH_NOMATCH);
3694
0
            }
3695
0
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
3696
0
            RRETURN(MATCH_NOMATCH);
3697
0
          Feptr++;
3698
0
          }
3699
0
        break;
3700
3701
0
        case OP_NOT_WHITESPACE:
3702
0
        for (i = 1; i <= Lmin; i++)
3703
0
          {
3704
0
          if (Feptr >= mb->end_subject)
3705
0
            {
3706
0
            SCHECK_PARTIAL();
3707
0
            RRETURN(MATCH_NOMATCH);
3708
0
            }
3709
0
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
3710
0
            RRETURN(MATCH_NOMATCH);
3711
0
          Feptr++;
3712
0
          }
3713
0
        break;
3714
3715
0
        case OP_WHITESPACE:
3716
0
        for (i = 1; i <= Lmin; i++)
3717
0
          {
3718
0
          if (Feptr >= mb->end_subject)
3719
0
            {
3720
0
            SCHECK_PARTIAL();
3721
0
            RRETURN(MATCH_NOMATCH);
3722
0
            }
3723
0
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
3724
0
            RRETURN(MATCH_NOMATCH);
3725
0
          Feptr++;
3726
0
          }
3727
0
        break;
3728
3729
0
        case OP_NOT_WORDCHAR:
3730
0
        for (i = 1; i <= Lmin; i++)
3731
0
          {
3732
0
          if (Feptr >= mb->end_subject)
3733
0
            {
3734
0
            SCHECK_PARTIAL();
3735
0
            RRETURN(MATCH_NOMATCH);
3736
0
            }
3737
0
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
3738
0
            RRETURN(MATCH_NOMATCH);
3739
0
          Feptr++;
3740
0
          }
3741
0
        break;
3742
3743
0
        case OP_WORDCHAR:
3744
0
        for (i = 1; i <= Lmin; i++)
3745
0
          {
3746
0
          if (Feptr >= mb->end_subject)
3747
0
            {
3748
0
            SCHECK_PARTIAL();
3749
0
            RRETURN(MATCH_NOMATCH);
3750
0
            }
3751
0
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
3752
0
            RRETURN(MATCH_NOMATCH);
3753
0
          Feptr++;
3754
0
          }
3755
0
        break;
3756
3757
        /* LCOV_EXCL_START */
3758
0
        default:
3759
0
        PCRE2_DEBUG_UNREACHABLE();
3760
0
        return PCRE2_ERROR_INTERNAL;
3761
        /* LCOV_EXCL_STOP */
3762
0
        }
3763
0
      }
3764
3765
    /* If Lmin = Lmax we are done. Continue with the main loop. */
3766
3767
0
    if (Lmin == Lmax) continue;
3768
3769
    /* If minimizing, we have to test the rest of the pattern before each
3770
    subsequent match. This means we cannot use a local "notmatch" variable as
3771
    in the other cases. As all 4 temporary 32-bit values in the frame are
3772
    already in use, just test the type each time. */
3773
3774
0
    if (reptype == REPTYPE_MIN)
3775
0
      {
3776
0
#ifdef SUPPORT_UNICODE
3777
0
      if (proptype >= 0)
3778
0
        {
3779
0
        switch(proptype)
3780
0
          {
3781
0
          case PT_LAMP:
3782
0
          for (;;)
3783
0
            {
3784
0
            int chartype;
3785
0
            RMATCH(Fecode, RM208);
3786
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3787
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3788
0
            if (Feptr >= mb->end_subject)
3789
0
              {
3790
0
              SCHECK_PARTIAL();
3791
0
              RRETURN(MATCH_NOMATCH);
3792
0
              }
3793
0
            GETCHARINCTEST(fc, Feptr);
3794
0
            chartype = UCD_CHARTYPE(fc);
3795
0
            if ((chartype == ucp_Lu ||
3796
0
                 chartype == ucp_Ll ||
3797
0
                 chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3798
0
              RRETURN(MATCH_NOMATCH);
3799
0
            }
3800
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3801
3802
0
          case PT_GC:
3803
0
          for (;;)
3804
0
            {
3805
0
            RMATCH(Fecode, RM209);
3806
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3807
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3808
0
            if (Feptr >= mb->end_subject)
3809
0
              {
3810
0
              SCHECK_PARTIAL();
3811
0
              RRETURN(MATCH_NOMATCH);
3812
0
              }
3813
0
            GETCHARINCTEST(fc, Feptr);
3814
0
            if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3815
0
              RRETURN(MATCH_NOMATCH);
3816
0
            }
3817
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3818
3819
0
          case PT_PC:
3820
0
          for (;;)
3821
0
            {
3822
0
            RMATCH(Fecode, RM210);
3823
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3824
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3825
0
            if (Feptr >= mb->end_subject)
3826
0
              {
3827
0
              SCHECK_PARTIAL();
3828
0
              RRETURN(MATCH_NOMATCH);
3829
0
              }
3830
0
            GETCHARINCTEST(fc, Feptr);
3831
0
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3832
0
              RRETURN(MATCH_NOMATCH);
3833
0
            }
3834
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3835
3836
0
          case PT_SC:
3837
0
          for (;;)
3838
0
            {
3839
0
            RMATCH(Fecode, RM211);
3840
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3841
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3842
0
            if (Feptr >= mb->end_subject)
3843
0
              {
3844
0
              SCHECK_PARTIAL();
3845
0
              RRETURN(MATCH_NOMATCH);
3846
0
              }
3847
0
            GETCHARINCTEST(fc, Feptr);
3848
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3849
0
              RRETURN(MATCH_NOMATCH);
3850
0
            }
3851
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3852
3853
0
          case PT_SCX:
3854
0
          for (;;)
3855
0
            {
3856
0
            BOOL ok;
3857
0
            const ucd_record *prop;
3858
0
            RMATCH(Fecode, RM224);
3859
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3860
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3861
0
            if (Feptr >= mb->end_subject)
3862
0
              {
3863
0
              SCHECK_PARTIAL();
3864
0
              RRETURN(MATCH_NOMATCH);
3865
0
              }
3866
0
            GETCHARINCTEST(fc, Feptr);
3867
0
            prop = GET_UCD(fc);
3868
0
            ok = (prop->script == Lpropvalue
3869
0
                  || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
3870
0
            if (ok == (Lctype == OP_NOTPROP))
3871
0
              RRETURN(MATCH_NOMATCH);
3872
0
            }
3873
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3874
3875
0
          case PT_ALNUM:
3876
0
          for (;;)
3877
0
            {
3878
0
            int category;
3879
0
            RMATCH(Fecode, RM212);
3880
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3881
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3882
0
            if (Feptr >= mb->end_subject)
3883
0
              {
3884
0
              SCHECK_PARTIAL();
3885
0
              RRETURN(MATCH_NOMATCH);
3886
0
              }
3887
0
            GETCHARINCTEST(fc, Feptr);
3888
0
            category = UCD_CATEGORY(fc);
3889
0
            if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
3890
0
              RRETURN(MATCH_NOMATCH);
3891
0
            }
3892
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3893
3894
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3895
          which means that Perl space and POSIX space are now identical. PCRE
3896
          was changed at release 8.34. */
3897
3898
0
          case PT_SPACE:    /* Perl space */
3899
0
          case PT_PXSPACE:  /* POSIX space */
3900
0
          for (;;)
3901
0
            {
3902
0
            RMATCH(Fecode, RM213);
3903
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3904
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3905
0
            if (Feptr >= mb->end_subject)
3906
0
              {
3907
0
              SCHECK_PARTIAL();
3908
0
              RRETURN(MATCH_NOMATCH);
3909
0
              }
3910
0
            GETCHARINCTEST(fc, Feptr);
3911
0
            switch(fc)
3912
0
              {
3913
0
              HSPACE_CASES:
3914
0
              VSPACE_CASES:
3915
0
              if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3916
0
              break;
3917
3918
0
              default:
3919
0
              if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3920
0
                RRETURN(MATCH_NOMATCH);
3921
0
              break;
3922
0
              }
3923
0
            }
3924
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3925
3926
0
          case PT_WORD:
3927
0
          for (;;)
3928
0
            {
3929
0
            int chartype, category;
3930
0
            RMATCH(Fecode, RM214);
3931
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3932
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3933
0
            if (Feptr >= mb->end_subject)
3934
0
              {
3935
0
              SCHECK_PARTIAL();
3936
0
              RRETURN(MATCH_NOMATCH);
3937
0
              }
3938
0
            GETCHARINCTEST(fc, Feptr);
3939
0
            chartype = UCD_CHARTYPE(fc);
3940
0
            category = PRIV(ucp_gentype)[chartype];
3941
0
            if ((category == ucp_L ||
3942
0
                 category == ucp_N ||
3943
0
                 chartype == ucp_Mn ||
3944
0
                 chartype == ucp_Pc) == (Lctype == OP_NOTPROP))
3945
0
              RRETURN(MATCH_NOMATCH);
3946
0
            }
3947
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3948
3949
0
          case PT_CLIST:
3950
0
          for (;;)
3951
0
            {
3952
0
            const uint32_t *cp;
3953
0
            RMATCH(Fecode, RM215);
3954
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3955
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3956
0
            if (Feptr >= mb->end_subject)
3957
0
              {
3958
0
              SCHECK_PARTIAL();
3959
0
              RRETURN(MATCH_NOMATCH);
3960
0
              }
3961
0
            GETCHARINCTEST(fc, Feptr);
3962
#if PCRE2_CODE_UNIT_WIDTH == 32
3963
            if (fc > MAX_UTF_CODE_POINT)
3964
              {
3965
              if (Lctype == OP_NOTPROP) continue;
3966
              RRETURN(MATCH_NOMATCH);
3967
              }
3968
#endif
3969
0
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3970
0
            for (;;)
3971
0
              {
3972
0
              if (fc < *cp)
3973
0
                {
3974
0
                if (Lctype == OP_NOTPROP) break;
3975
0
                RRETURN(MATCH_NOMATCH);
3976
0
                }
3977
0
              if (fc == *cp++)
3978
0
                {
3979
0
                if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3980
0
                break;
3981
0
                }
3982
0
              }
3983
0
            }
3984
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3985
3986
0
          case PT_UCNC:
3987
0
          for (;;)
3988
0
            {
3989
0
            RMATCH(Fecode, RM216);
3990
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3991
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3992
0
            if (Feptr >= mb->end_subject)
3993
0
              {
3994
0
              SCHECK_PARTIAL();
3995
0
              RRETURN(MATCH_NOMATCH);
3996
0
              }
3997
0
            GETCHARINCTEST(fc, Feptr);
3998
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3999
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
4000
0
                 fc >= 0xe000) == (Lctype == OP_NOTPROP))
4001
0
              RRETURN(MATCH_NOMATCH);
4002
0
            }
4003
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
4004
4005
0
          case PT_BIDICL:
4006
0
          for (;;)
4007
0
            {
4008
0
            RMATCH(Fecode, RM223);
4009
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4010
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
4011
0
            if (Feptr >= mb->end_subject)
4012
0
              {
4013
0
              SCHECK_PARTIAL();
4014
0
              RRETURN(MATCH_NOMATCH);
4015
0
              }
4016
0
            GETCHARINCTEST(fc, Feptr);
4017
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
4018
0
              RRETURN(MATCH_NOMATCH);
4019
0
            }
4020
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
4021
4022
0
          case PT_BOOL:
4023
0
          for (;;)
4024
0
            {
4025
0
            BOOL ok;
4026
0
            const ucd_record *prop;
4027
0
            RMATCH(Fecode, RM222);
4028
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4029
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
4030
0
            if (Feptr >= mb->end_subject)
4031
0
              {
4032
0
              SCHECK_PARTIAL();
4033
0
              RRETURN(MATCH_NOMATCH);
4034
0
              }
4035
0
            GETCHARINCTEST(fc, Feptr);
4036
0
            prop = GET_UCD(fc);
4037
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
4038
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
4039
0
            if (ok == (Lctype == OP_NOTPROP))
4040
0
              RRETURN(MATCH_NOMATCH);
4041
0
            }
4042
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
4043
4044
          /* This should never occur */
4045
4046
          /* LCOV_EXCL_START */
4047
0
          default:
4048
0
          PCRE2_DEBUG_UNREACHABLE();
4049
0
          return PCRE2_ERROR_INTERNAL;
4050
          /* LCOV_EXCL_STOP */
4051
0
          }
4052
0
        }
4053
4054
      /* Match extended Unicode sequences. We will get here only if the
4055
      support is in the binary; otherwise a compile-time error occurs. */
4056
4057
0
      else if (Lctype == OP_EXTUNI)
4058
0
        {
4059
0
        for (;;)
4060
0
          {
4061
0
          RMATCH(Fecode, RM217);
4062
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4063
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
4064
0
          if (Feptr >= mb->end_subject)
4065
0
            {
4066
0
            SCHECK_PARTIAL();
4067
0
            RRETURN(MATCH_NOMATCH);
4068
0
            }
4069
0
          else
4070
0
            {
4071
0
            GETCHARINCTEST(fc, Feptr);
4072
0
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4073
0
              utf, NULL);
4074
0
            }
4075
0
          CHECK_PARTIAL();
4076
0
          }
4077
0
        }
4078
0
      else
4079
0
#endif     /* SUPPORT_UNICODE */
4080
4081
      /* UTF mode for non-property testing character types. */
4082
4083
0
#ifdef SUPPORT_UNICODE
4084
0
      if (utf)
4085
0
        {
4086
0
        for (;;)
4087
0
          {
4088
0
          RMATCH(Fecode, RM218);
4089
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4090
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
4091
0
          if (Feptr >= mb->end_subject)
4092
0
            {
4093
0
            SCHECK_PARTIAL();
4094
0
            RRETURN(MATCH_NOMATCH);
4095
0
            }
4096
0
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
4097
0
          GETCHARINC(fc, Feptr);
4098
0
          switch(Lctype)
4099
0
            {
4100
0
            case OP_ANY:               /* This is the non-NL case */
4101
0
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4102
0
                Feptr >= mb->end_subject &&
4103
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
4104
0
                NLBLOCK->nllen == 2 &&
4105
0
                fc == NLBLOCK->nl[0])
4106
0
              {
4107
0
              mb->hitend = TRUE;
4108
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4109
0
              }
4110
0
            break;
4111
4112
0
            case OP_ALLANY:
4113
0
            case OP_ANYBYTE:
4114
0
            break;
4115
4116
0
            case OP_ANYNL:
4117
0
            switch(fc)
4118
0
              {
4119
0
              default: RRETURN(MATCH_NOMATCH);
4120
4121
0
              case CHAR_CR:
4122
0
              if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
4123
0
              break;
4124
4125
0
              case CHAR_LF:
4126
0
              break;
4127
4128
0
              case CHAR_VT:
4129
0
              case CHAR_FF:
4130
0
              case CHAR_NEL:
4131
0
#ifndef EBCDIC
4132
0
              case 0x2028:
4133
0
              case 0x2029:
4134
0
#endif  /* Not EBCDIC */
4135
0
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
4136
0
                RRETURN(MATCH_NOMATCH);
4137
0
              break;
4138
0
              }
4139
0
            break;
4140
4141
0
            case OP_NOT_HSPACE:
4142
0
            switch(fc)
4143
0
              {
4144
0
              HSPACE_CASES: RRETURN(MATCH_NOMATCH);
4145
0
              default: break;
4146
0
              }
4147
0
            break;
4148
4149
0
            case OP_HSPACE:
4150
0
            switch(fc)
4151
0
              {
4152
0
              HSPACE_CASES: break;
4153
0
              default: RRETURN(MATCH_NOMATCH);
4154
0
              }
4155
0
            break;
4156
4157
0
            case OP_NOT_VSPACE:
4158
0
            switch(fc)
4159
0
              {
4160
0
              VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4161
0
              default: break;
4162
0
              }
4163
0
            break;
4164
4165
0
            case OP_VSPACE:
4166
0
            switch(fc)
4167
0
              {
4168
0
              VSPACE_CASES: break;
4169
0
              default: RRETURN(MATCH_NOMATCH);
4170
0
              }
4171
0
            break;
4172
4173
0
            case OP_NOT_DIGIT:
4174
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
4175
0
              RRETURN(MATCH_NOMATCH);
4176
0
            break;
4177
4178
0
            case OP_DIGIT:
4179
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
4180
0
              RRETURN(MATCH_NOMATCH);
4181
0
            break;
4182
4183
0
            case OP_NOT_WHITESPACE:
4184
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
4185
0
              RRETURN(MATCH_NOMATCH);
4186
0
            break;
4187
4188
0
            case OP_WHITESPACE:
4189
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
4190
0
              RRETURN(MATCH_NOMATCH);
4191
0
            break;
4192
4193
0
            case OP_NOT_WORDCHAR:
4194
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
4195
0
              RRETURN(MATCH_NOMATCH);
4196
0
            break;
4197
4198
0
            case OP_WORDCHAR:
4199
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
4200
0
              RRETURN(MATCH_NOMATCH);
4201
0
            break;
4202
4203
            /* LCOV_EXCL_START */
4204
0
            default:
4205
0
            PCRE2_DEBUG_UNREACHABLE();
4206
0
            return PCRE2_ERROR_INTERNAL;
4207
            /* LCOV_EXCL_STOP */
4208
0
            }
4209
0
          }
4210
0
        }
4211
0
      else
4212
0
#endif  /* SUPPORT_UNICODE */
4213
4214
      /* Not UTF mode */
4215
0
        {
4216
0
        for (;;)
4217
0
          {
4218
0
          RMATCH(Fecode, RM33);
4219
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4220
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
4221
0
          if (Feptr >= mb->end_subject)
4222
0
            {
4223
0
            SCHECK_PARTIAL();
4224
0
            RRETURN(MATCH_NOMATCH);
4225
0
            }
4226
0
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
4227
0
            RRETURN(MATCH_NOMATCH);
4228
0
          fc = *Feptr++;
4229
0
          switch(Lctype)
4230
0
            {
4231
0
            case OP_ANY:               /* This is the non-NL case */
4232
0
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4233
0
                Feptr >= mb->end_subject &&
4234
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
4235
0
                NLBLOCK->nllen == 2 &&
4236
0
                fc == NLBLOCK->nl[0])
4237
0
              {
4238
0
              mb->hitend = TRUE;
4239
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4240
0
              }
4241
0
            break;
4242
4243
0
            case OP_ALLANY:
4244
0
            case OP_ANYBYTE:
4245
0
            break;
4246
4247
0
            case OP_ANYNL:
4248
0
            switch(fc)
4249
0
              {
4250
0
              default: RRETURN(MATCH_NOMATCH);
4251
4252
0
              case CHAR_CR:
4253
0
              if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
4254
0
              break;
4255
4256
0
              case CHAR_LF:
4257
0
              break;
4258
4259
0
              case CHAR_VT:
4260
0
              case CHAR_FF:
4261
0
              case CHAR_NEL:
4262
#if PCRE2_CODE_UNIT_WIDTH != 8
4263
              case 0x2028:
4264
              case 0x2029:
4265
#endif
4266
0
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
4267
0
                RRETURN(MATCH_NOMATCH);
4268
0
              break;
4269
0
              }
4270
0
            break;
4271
4272
0
            case OP_NOT_HSPACE:
4273
0
            switch(fc)
4274
0
              {
4275
0
              default: break;
4276
0
              HSPACE_BYTE_CASES:
4277
#if PCRE2_CODE_UNIT_WIDTH != 8
4278
              HSPACE_MULTIBYTE_CASES:
4279
#endif
4280
0
              RRETURN(MATCH_NOMATCH);
4281
0
              }
4282
0
            break;
4283
4284
0
            case OP_HSPACE:
4285
0
            switch(fc)
4286
0
              {
4287
0
              default: RRETURN(MATCH_NOMATCH);
4288
0
              HSPACE_BYTE_CASES:
4289
#if PCRE2_CODE_UNIT_WIDTH != 8
4290
              HSPACE_MULTIBYTE_CASES:
4291
#endif
4292
0
              break;
4293
0
              }
4294
0
            break;
4295
4296
0
            case OP_NOT_VSPACE:
4297
0
            switch(fc)
4298
0
              {
4299
0
              default: break;
4300
0
              VSPACE_BYTE_CASES:
4301
#if PCRE2_CODE_UNIT_WIDTH != 8
4302
              VSPACE_MULTIBYTE_CASES:
4303
#endif
4304
0
              RRETURN(MATCH_NOMATCH);
4305
0
              }
4306
0
            break;
4307
4308
0
            case OP_VSPACE:
4309
0
            switch(fc)
4310
0
              {
4311
0
              default: RRETURN(MATCH_NOMATCH);
4312
0
              VSPACE_BYTE_CASES:
4313
#if PCRE2_CODE_UNIT_WIDTH != 8
4314
              VSPACE_MULTIBYTE_CASES:
4315
#endif
4316
0
              break;
4317
0
              }
4318
0
            break;
4319
4320
0
            case OP_NOT_DIGIT:
4321
0
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
4322
0
              RRETURN(MATCH_NOMATCH);
4323
0
            break;
4324
4325
0
            case OP_DIGIT:
4326
0
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
4327
0
              RRETURN(MATCH_NOMATCH);
4328
0
            break;
4329
4330
0
            case OP_NOT_WHITESPACE:
4331
0
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
4332
0
              RRETURN(MATCH_NOMATCH);
4333
0
            break;
4334
4335
0
            case OP_WHITESPACE:
4336
0
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
4337
0
              RRETURN(MATCH_NOMATCH);
4338
0
            break;
4339
4340
0
            case OP_NOT_WORDCHAR:
4341
0
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
4342
0
              RRETURN(MATCH_NOMATCH);
4343
0
            break;
4344
4345
0
            case OP_WORDCHAR:
4346
0
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
4347
0
              RRETURN(MATCH_NOMATCH);
4348
0
            break;
4349
4350
            /* LCOV_EXCL_START */
4351
0
            default:
4352
0
            PCRE2_DEBUG_UNREACHABLE();
4353
0
            return PCRE2_ERROR_INTERNAL;
4354
            /* LCOV_EXCL_STOP */
4355
0
            }
4356
0
          }
4357
0
        }
4358
4359
0
      PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
4360
0
      }
4361
4362
    /* If maximizing, it is worth using inline code for speed, doing the type
4363
    test once at the start (i.e. keep it out of the loops). Once again,
4364
    "notmatch" can be an ordinary local variable because the loops do not call
4365
    RMATCH. */
4366
4367
0
    else
4368
0
      {
4369
0
      Lstart_eptr = Feptr;  /* Remember where we started */
4370
4371
0
#ifdef SUPPORT_UNICODE
4372
0
      if (proptype >= 0)
4373
0
        {
4374
0
        BOOL notmatch = Lctype == OP_NOTPROP;
4375
0
        switch(proptype)
4376
0
          {
4377
0
          case PT_LAMP:
4378
0
          for (i = Lmin; i < Lmax; i++)
4379
0
            {
4380
0
            int chartype;
4381
0
            int len = 1;
4382
0
            if (Feptr >= mb->end_subject)
4383
0
              {
4384
0
              SCHECK_PARTIAL();
4385
0
              break;
4386
0
              }
4387
0
            GETCHARLENTEST(fc, Feptr, len);
4388
0
            chartype = UCD_CHARTYPE(fc);
4389
0
            if ((chartype == ucp_Lu ||
4390
0
                 chartype == ucp_Ll ||
4391
0
                 chartype == ucp_Lt) == notmatch)
4392
0
              break;
4393
0
            Feptr+= len;
4394
0
            }
4395
0
          break;
4396
4397
0
          case PT_GC:
4398
0
          for (i = Lmin; i < Lmax; i++)
4399
0
            {
4400
0
            int len = 1;
4401
0
            if (Feptr >= mb->end_subject)
4402
0
              {
4403
0
              SCHECK_PARTIAL();
4404
0
              break;
4405
0
              }
4406
0
            GETCHARLENTEST(fc, Feptr, len);
4407
0
            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break;
4408
0
            Feptr+= len;
4409
0
            }
4410
0
          break;
4411
4412
0
          case PT_PC:
4413
0
          for (i = Lmin; i < Lmax; i++)
4414
0
            {
4415
0
            int len = 1;
4416
0
            if (Feptr >= mb->end_subject)
4417
0
              {
4418
0
              SCHECK_PARTIAL();
4419
0
              break;
4420
0
              }
4421
0
            GETCHARLENTEST(fc, Feptr, len);
4422
0
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break;
4423
0
            Feptr+= len;
4424
0
            }
4425
0
          break;
4426
4427
0
          case PT_SC:
4428
0
          for (i = Lmin; i < Lmax; i++)
4429
0
            {
4430
0
            int len = 1;
4431
0
            if (Feptr >= mb->end_subject)
4432
0
              {
4433
0
              SCHECK_PARTIAL();
4434
0
              break;
4435
0
              }
4436
0
            GETCHARLENTEST(fc, Feptr, len);
4437
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break;
4438
0
            Feptr+= len;
4439
0
            }
4440
0
          break;
4441
4442
0
          case PT_SCX:
4443
0
          for (i = Lmin; i < Lmax; i++)
4444
0
            {
4445
0
            BOOL ok;
4446
0
            const ucd_record *prop;
4447
0
            int len = 1;
4448
0
            if (Feptr >= mb->end_subject)
4449
0
              {
4450
0
              SCHECK_PARTIAL();
4451
0
              break;
4452
0
              }
4453
0
            GETCHARLENTEST(fc, Feptr, len);
4454
0
            prop = GET_UCD(fc);
4455
0
            ok = (prop->script == Lpropvalue ||
4456
0
                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
4457
0
            if (ok == notmatch) break;
4458
0
            Feptr+= len;
4459
0
            }
4460
0
          break;
4461
4462
0
          case PT_ALNUM:
4463
0
          for (i = Lmin; i < Lmax; i++)
4464
0
            {
4465
0
            int category;
4466
0
            int len = 1;
4467
0
            if (Feptr >= mb->end_subject)
4468
0
              {
4469
0
              SCHECK_PARTIAL();
4470
0
              break;
4471
0
              }
4472
0
            GETCHARLENTEST(fc, Feptr, len);
4473
0
            category = UCD_CATEGORY(fc);
4474
0
            if ((category == ucp_L || category == ucp_N) == notmatch)
4475
0
              break;
4476
0
            Feptr+= len;
4477
0
            }
4478
0
          break;
4479
4480
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4481
          which means that Perl space and POSIX space are now identical. PCRE
4482
          was changed at release 8.34. */
4483
4484
0
          case PT_SPACE:    /* Perl space */
4485
0
          case PT_PXSPACE:  /* POSIX space */
4486
0
          for (i = Lmin; i < Lmax; i++)
4487
0
            {
4488
0
            int len = 1;
4489
0
            if (Feptr >= mb->end_subject)
4490
0
              {
4491
0
              SCHECK_PARTIAL();
4492
0
              break;
4493
0
              }
4494
0
            GETCHARLENTEST(fc, Feptr, len);
4495
0
            switch(fc)
4496
0
              {
4497
0
              HSPACE_CASES:
4498
0
              VSPACE_CASES:
4499
0
              if (notmatch) goto ENDLOOP99;  /* Break the loop */
4500
0
              break;
4501
4502
0
              default:
4503
0
              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
4504
0
                goto ENDLOOP99;   /* Break the loop */
4505
0
              break;
4506
0
              }
4507
0
            Feptr+= len;
4508
0
            }
4509
0
          ENDLOOP99:
4510
0
          break;
4511
4512
0
          case PT_WORD:
4513
0
          for (i = Lmin; i < Lmax; i++)
4514
0
            {
4515
0
            int chartype, category;
4516
0
            int len = 1;
4517
0
            if (Feptr >= mb->end_subject)
4518
0
              {
4519
0
              SCHECK_PARTIAL();
4520
0
              break;
4521
0
              }
4522
0
            GETCHARLENTEST(fc, Feptr, len);
4523
0
            chartype = UCD_CHARTYPE(fc);
4524
0
            category = PRIV(ucp_gentype)[chartype];
4525
0
            if ((category == ucp_L ||
4526
0
                 category == ucp_N ||
4527
0
                 chartype == ucp_Mn ||
4528
0
                 chartype == ucp_Pc) == notmatch)
4529
0
              break;
4530
0
            Feptr+= len;
4531
0
            }
4532
0
          break;
4533
4534
0
          case PT_CLIST:
4535
0
          for (i = Lmin; i < Lmax; i++)
4536
0
            {
4537
0
            const uint32_t *cp;
4538
0
            int len = 1;
4539
0
            if (Feptr >= mb->end_subject)
4540
0
              {
4541
0
              SCHECK_PARTIAL();
4542
0
              break;
4543
0
              }
4544
0
            GETCHARLENTEST(fc, Feptr, len);
4545
#if PCRE2_CODE_UNIT_WIDTH == 32
4546
            if (fc > MAX_UTF_CODE_POINT)
4547
              {
4548
              if (!notmatch) goto GOT_MAX;
4549
              }
4550
            else
4551
#endif
4552
0
              {
4553
0
              cp = PRIV(ucd_caseless_sets) + Lpropvalue;
4554
0
              for (;;)
4555
0
                {
4556
0
                if (fc < *cp)
4557
0
                  { if (notmatch) break; else goto GOT_MAX; }
4558
0
                if (fc == *cp++)
4559
0
                  { if (notmatch) goto GOT_MAX; else break; }
4560
0
                }
4561
0
              }
4562
4563
0
            Feptr += len;
4564
0
            }
4565
0
          GOT_MAX:
4566
0
          break;
4567
4568
0
          case PT_UCNC:
4569
0
          for (i = Lmin; i < Lmax; i++)
4570
0
            {
4571
0
            int len = 1;
4572
0
            if (Feptr >= mb->end_subject)
4573
0
              {
4574
0
              SCHECK_PARTIAL();
4575
0
              break;
4576
0
              }
4577
0
            GETCHARLENTEST(fc, Feptr, len);
4578
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
4579
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
4580
0
                 fc >= 0xe000) == notmatch)
4581
0
              break;
4582
0
            Feptr += len;
4583
0
            }
4584
0
          break;
4585
4586
0
          case PT_BIDICL:
4587
0
          for (i = Lmin; i < Lmax; i++)
4588
0
            {
4589
0
            int len = 1;
4590
0
            if (Feptr >= mb->end_subject)
4591
0
              {
4592
0
              SCHECK_PARTIAL();
4593
0
              break;
4594
0
              }
4595
0
            GETCHARLENTEST(fc, Feptr, len);
4596
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break;
4597
0
            Feptr+= len;
4598
0
            }
4599
0
          break;
4600
4601
0
          case PT_BOOL:
4602
0
          for (i = Lmin; i < Lmax; i++)
4603
0
            {
4604
0
            BOOL ok;
4605
0
            const ucd_record *prop;
4606
0
            int len = 1;
4607
0
            if (Feptr >= mb->end_subject)
4608
0
              {
4609
0
              SCHECK_PARTIAL();
4610
0
              break;
4611
0
              }
4612
0
            GETCHARLENTEST(fc, Feptr, len);
4613
0
            prop = GET_UCD(fc);
4614
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
4615
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
4616
0
            if (ok == notmatch) break;
4617
0
            Feptr+= len;
4618
0
            }
4619
0
          break;
4620
4621
          /* LCOV_EXCL_START */
4622
0
          default:
4623
0
          PCRE2_DEBUG_UNREACHABLE();
4624
0
          return PCRE2_ERROR_INTERNAL;
4625
          /* LCOV_EXCL_STOP */
4626
0
          }
4627
4628
        /* Feptr is now past the end of the maximum run */
4629
4630
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4631
4632
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4633
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
4634
        go too far. */
4635
4636
0
        for(;;)
4637
0
          {
4638
0
          if (Feptr <= Lstart_eptr) break;
4639
0
          RMATCH(Fecode, RM221);
4640
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4641
0
          Feptr--;
4642
0
          if (utf) BACKCHAR(Feptr);
4643
0
          }
4644
0
        }
4645
4646
      /* Match extended Unicode grapheme clusters. We will get here only if the
4647
      support is in the binary; otherwise a compile-time error occurs. */
4648
4649
0
      else if (Lctype == OP_EXTUNI)
4650
0
        {
4651
0
        for (i = Lmin; i < Lmax; i++)
4652
0
          {
4653
0
          if (Feptr >= mb->end_subject)
4654
0
            {
4655
0
            SCHECK_PARTIAL();
4656
0
            break;
4657
0
            }
4658
0
          else
4659
0
            {
4660
0
            GETCHARINCTEST(fc, Feptr);
4661
0
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4662
0
              utf, NULL);
4663
0
            }
4664
0
          CHECK_PARTIAL();
4665
0
          }
4666
4667
        /* Feptr is now past the end of the maximum run */
4668
4669
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4670
4671
        /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
4672
        of the run while backtracking because the use of \C in UTF mode can
4673
        cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
4674
        the use of \C in UTF mode is fraught with danger. */
4675
4676
0
        for(;;)
4677
0
          {
4678
0
          int lgb, rgb;
4679
0
          PCRE2_SPTR fptr;
4680
4681
0
          if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4682
0
          RMATCH(Fecode, RM219);
4683
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4684
4685
          /* Backtracking over an extended grapheme cluster involves inspecting
4686
          the previous two characters (if present) to see if a break is
4687
          permitted between them. */
4688
4689
0
          Feptr--;
4690
0
          if (!utf) fc = *Feptr; else
4691
0
            {
4692
0
            BACKCHAR(Feptr);
4693
0
            GETCHAR(fc, Feptr);
4694
0
            }
4695
0
          rgb = UCD_GRAPHBREAK(fc);
4696
4697
0
          for (;;)
4698
0
            {
4699
0
            if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4700
0
            fptr = Feptr - 1;
4701
0
            if (!utf) fc = *fptr; else
4702
0
              {
4703
0
              BACKCHAR(fptr);
4704
0
              GETCHAR(fc, fptr);
4705
0
              }
4706
0
            lgb = UCD_GRAPHBREAK(fc);
4707
0
            if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
4708
0
            Feptr = fptr;
4709
0
            rgb = lgb;
4710
0
            }
4711
0
          }
4712
0
        }
4713
4714
0
      else
4715
0
#endif   /* SUPPORT_UNICODE */
4716
4717
0
#ifdef SUPPORT_UNICODE
4718
0
      if (utf)
4719
0
        {
4720
0
        switch(Lctype)
4721
0
          {
4722
0
          case OP_ANY:
4723
0
          for (i = Lmin; i < Lmax; i++)
4724
0
            {
4725
0
            if (Feptr >= mb->end_subject)
4726
0
              {
4727
0
              SCHECK_PARTIAL();
4728
0
              break;
4729
0
              }
4730
0
            if (IS_NEWLINE(Feptr)) break;
4731
0
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4732
0
                Feptr + 1 >= mb->end_subject &&
4733
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
4734
0
                NLBLOCK->nllen == 2 &&
4735
0
                UCHAR21(Feptr) == NLBLOCK->nl[0])
4736
0
              {
4737
0
              mb->hitend = TRUE;
4738
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4739
0
              }
4740
0
            Feptr++;
4741
0
            ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4742
0
            }
4743
0
          break;
4744
4745
0
          case OP_ALLANY:
4746
0
          if (Lmax < UINT32_MAX)
4747
0
            {
4748
0
            for (i = Lmin; i < Lmax; i++)
4749
0
              {
4750
0
              if (Feptr >= mb->end_subject)
4751
0
                {
4752
0
                SCHECK_PARTIAL();
4753
0
                break;
4754
0
                }
4755
0
              Feptr++;
4756
0
              ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4757
0
              }
4758
0
            }
4759
0
          else
4760
0
            {
4761
0
            Feptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
4762
0
            SCHECK_PARTIAL();
4763
0
            }
4764
0
          break;
4765
4766
          /* The "byte" (i.e. "code unit") case is the same as non-UTF */
4767
4768
0
          case OP_ANYBYTE:
4769
0
          fc = Lmax - Lmin;
4770
0
          if (fc > (uint32_t)(mb->end_subject - Feptr))
4771
0
            {
4772
0
            Feptr = mb->end_subject;
4773
0
            SCHECK_PARTIAL();
4774
0
            }
4775
0
          else Feptr += fc;
4776
0
          break;
4777
4778
0
          case OP_ANYNL:
4779
0
          for (i = Lmin; i < Lmax; i++)
4780
0
            {
4781
0
            int len = 1;
4782
0
            if (Feptr >= mb->end_subject)
4783
0
              {
4784
0
              SCHECK_PARTIAL();
4785
0
              break;
4786
0
              }
4787
0
            GETCHARLEN(fc, Feptr, len);
4788
0
            if (fc == CHAR_CR)
4789
0
              {
4790
0
              if (++Feptr >= mb->end_subject) break;
4791
0
              if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
4792
0
              }
4793
0
            else
4794
0
              {
4795
0
              if (fc != CHAR_LF &&
4796
0
                  (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4797
0
                   (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4798
0
#ifndef EBCDIC
4799
0
                    && fc != 0x2028 && fc != 0x2029
4800
0
#endif  /* Not EBCDIC */
4801
0
                    )))
4802
0
                break;
4803
0
              Feptr += len;
4804
0
              }
4805
0
            }
4806
0
          break;
4807
4808
0
          case OP_NOT_HSPACE:
4809
0
          case OP_HSPACE:
4810
0
          for (i = Lmin; i < Lmax; i++)
4811
0
            {
4812
0
            BOOL gotspace;
4813
0
            int len = 1;
4814
0
            if (Feptr >= mb->end_subject)
4815
0
              {
4816
0
              SCHECK_PARTIAL();
4817
0
              break;
4818
0
              }
4819
0
            GETCHARLEN(fc, Feptr, len);
4820
0
            switch(fc)
4821
0
              {
4822
0
              HSPACE_CASES: gotspace = TRUE; break;
4823
0
              default: gotspace = FALSE; break;
4824
0
              }
4825
0
            if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
4826
0
            Feptr += len;
4827
0
            }
4828
0
          break;
4829
4830
0
          case OP_NOT_VSPACE:
4831
0
          case OP_VSPACE:
4832
0
          for (i = Lmin; i < Lmax; i++)
4833
0
            {
4834
0
            BOOL gotspace;
4835
0
            int len = 1;
4836
0
            if (Feptr >= mb->end_subject)
4837
0
              {
4838
0
              SCHECK_PARTIAL();
4839
0
              break;
4840
0
              }
4841
0
            GETCHARLEN(fc, Feptr, len);
4842
0
            switch(fc)
4843
0
              {
4844
0
              VSPACE_CASES: gotspace = TRUE; break;
4845
0
              default: gotspace = FALSE; break;
4846
0
              }
4847
0
            if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
4848
0
            Feptr += len;
4849
0
            }
4850
0
          break;
4851
4852
0
          case OP_NOT_DIGIT:
4853
0
          for (i = Lmin; i < Lmax; i++)
4854
0
            {
4855
0
            int len = 1;
4856
0
            if (Feptr >= mb->end_subject)
4857
0
              {
4858
0
              SCHECK_PARTIAL();
4859
0
              break;
4860
0
              }
4861
0
            GETCHARLEN(fc, Feptr, len);
4862
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
4863
0
            Feptr+= len;
4864
0
            }
4865
0
          break;
4866
4867
0
          case OP_DIGIT:
4868
0
          for (i = Lmin; i < Lmax; i++)
4869
0
            {
4870
0
            int len = 1;
4871
0
            if (Feptr >= mb->end_subject)
4872
0
              {
4873
0
              SCHECK_PARTIAL();
4874
0
              break;
4875
0
              }
4876
0
            GETCHARLEN(fc, Feptr, len);
4877
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
4878
0
            Feptr+= len;
4879
0
            }
4880
0
          break;
4881
4882
0
          case OP_NOT_WHITESPACE:
4883
0
          for (i = Lmin; i < Lmax; i++)
4884
0
            {
4885
0
            int len = 1;
4886
0
            if (Feptr >= mb->end_subject)
4887
0
              {
4888
0
              SCHECK_PARTIAL();
4889
0
              break;
4890
0
              }
4891
0
            GETCHARLEN(fc, Feptr, len);
4892
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
4893
0
            Feptr+= len;
4894
0
            }
4895
0
          break;
4896
4897
0
          case OP_WHITESPACE:
4898
0
          for (i = Lmin; i < Lmax; i++)
4899
0
            {
4900
0
            int len = 1;
4901
0
            if (Feptr >= mb->end_subject)
4902
0
              {
4903
0
              SCHECK_PARTIAL();
4904
0
              break;
4905
0
              }
4906
0
            GETCHARLEN(fc, Feptr, len);
4907
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
4908
0
            Feptr+= len;
4909
0
            }
4910
0
          break;
4911
4912
0
          case OP_NOT_WORDCHAR:
4913
0
          for (i = Lmin; i < Lmax; i++)
4914
0
            {
4915
0
            int len = 1;
4916
0
            if (Feptr >= mb->end_subject)
4917
0
              {
4918
0
              SCHECK_PARTIAL();
4919
0
              break;
4920
0
              }
4921
0
            GETCHARLEN(fc, Feptr, len);
4922
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
4923
0
            Feptr+= len;
4924
0
            }
4925
0
          break;
4926
4927
0
          case OP_WORDCHAR:
4928
0
          for (i = Lmin; i < Lmax; i++)
4929
0
            {
4930
0
            int len = 1;
4931
0
            if (Feptr >= mb->end_subject)
4932
0
              {
4933
0
              SCHECK_PARTIAL();
4934
0
              break;
4935
0
              }
4936
0
            GETCHARLEN(fc, Feptr, len);
4937
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
4938
0
            Feptr+= len;
4939
0
            }
4940
0
          break;
4941
4942
          /* LCOV_EXCL_START */
4943
0
          default:
4944
0
          PCRE2_DEBUG_UNREACHABLE();
4945
0
          return PCRE2_ERROR_INTERNAL;
4946
          /* LCOV_EXCL_STOP */
4947
0
          }
4948
4949
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4950
4951
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4952
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
4953
        too far. */
4954
4955
0
        for(;;)
4956
0
          {
4957
0
          if (Feptr <= Lstart_eptr) break;
4958
0
          RMATCH(Fecode, RM220);
4959
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4960
0
          Feptr--;
4961
0
          BACKCHAR(Feptr);
4962
0
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
4963
0
              UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
4964
0
            Feptr--;
4965
0
          }
4966
0
        }
4967
0
      else
4968
0
#endif  /* SUPPORT_UNICODE */
4969
4970
      /* Not UTF mode */
4971
0
        {
4972
0
        switch(Lctype)
4973
0
          {
4974
0
          case OP_ANY:
4975
0
          for (i = Lmin; i < Lmax; i++)
4976
0
            {
4977
0
            if (Feptr >= mb->end_subject)
4978
0
              {
4979
0
              SCHECK_PARTIAL();
4980
0
              break;
4981
0
              }
4982
0
            if (IS_NEWLINE(Feptr)) break;
4983
0
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4984
0
                Feptr + 1 >= mb->end_subject &&
4985
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
4986
0
                NLBLOCK->nllen == 2 &&
4987
0
                *Feptr == NLBLOCK->nl[0])
4988
0
              {
4989
0
              mb->hitend = TRUE;
4990
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4991
0
              }
4992
0
            Feptr++;
4993
0
            }
4994
0
          break;
4995
4996
0
          case OP_ALLANY:
4997
0
          case OP_ANYBYTE:
4998
0
          fc = Lmax - Lmin;
4999
0
          if (fc > (uint32_t)(mb->end_subject - Feptr))
5000
0
            {
5001
0
            Feptr = mb->end_subject;
5002
0
            SCHECK_PARTIAL();
5003
0
            }
5004
0
          else Feptr += fc;
5005
0
          break;
5006
5007
0
          case OP_ANYNL:
5008
0
          for (i = Lmin; i < Lmax; i++)
5009
0
            {
5010
0
            if (Feptr >= mb->end_subject)
5011
0
              {
5012
0
              SCHECK_PARTIAL();
5013
0
              break;
5014
0
              }
5015
0
            fc = *Feptr;
5016
0
            if (fc == CHAR_CR)
5017
0
              {
5018
0
              if (++Feptr >= mb->end_subject) break;
5019
0
              if (*Feptr == CHAR_LF) Feptr++;
5020
0
              }
5021
0
            else
5022
0
              {
5023
0
              if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
5024
0
                 (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
5025
#if PCRE2_CODE_UNIT_WIDTH != 8
5026
                 && fc != 0x2028 && fc != 0x2029
5027
#endif
5028
0
                 ))) break;
5029
0
              Feptr++;
5030
0
              }
5031
0
            }
5032
0
          break;
5033
5034
0
          case OP_NOT_HSPACE:
5035
0
          for (i = Lmin; i < Lmax; i++)
5036
0
            {
5037
0
            if (Feptr >= mb->end_subject)
5038
0
              {
5039
0
              SCHECK_PARTIAL();
5040
0
              break;
5041
0
              }
5042
0
            switch(*Feptr)
5043
0
              {
5044
0
              default: Feptr++; break;
5045
0
              HSPACE_BYTE_CASES:
5046
#if PCRE2_CODE_UNIT_WIDTH != 8
5047
              HSPACE_MULTIBYTE_CASES:
5048
#endif
5049
0
              goto ENDLOOP00;
5050
0
              }
5051
0
            }
5052
0
          ENDLOOP00:
5053
0
          break;
5054
5055
0
          case OP_HSPACE:
5056
0
          for (i = Lmin; i < Lmax; i++)
5057
0
            {
5058
0
            if (Feptr >= mb->end_subject)
5059
0
              {
5060
0
              SCHECK_PARTIAL();
5061
0
              break;
5062
0
              }
5063
0
            switch(*Feptr)
5064
0
              {
5065
0
              default: goto ENDLOOP01;
5066
0
              HSPACE_BYTE_CASES:
5067
#if PCRE2_CODE_UNIT_WIDTH != 8
5068
              HSPACE_MULTIBYTE_CASES:
5069
#endif
5070
0
              Feptr++; break;
5071
0
              }
5072
0
            }
5073
0
          ENDLOOP01:
5074
0
          break;
5075
5076
0
          case OP_NOT_VSPACE:
5077
0
          for (i = Lmin; i < Lmax; i++)
5078
0
            {
5079
0
            if (Feptr >= mb->end_subject)
5080
0
              {
5081
0
              SCHECK_PARTIAL();
5082
0
              break;
5083
0
              }
5084
0
            switch(*Feptr)
5085
0
              {
5086
0
              default: Feptr++; break;
5087
0
              VSPACE_BYTE_CASES:
5088
#if PCRE2_CODE_UNIT_WIDTH != 8
5089
              VSPACE_MULTIBYTE_CASES:
5090
#endif
5091
0
              goto ENDLOOP02;
5092
0
              }
5093
0
            }
5094
0
          ENDLOOP02:
5095
0
          break;
5096
5097
0
          case OP_VSPACE:
5098
0
          for (i = Lmin; i < Lmax; i++)
5099
0
            {
5100
0
            if (Feptr >= mb->end_subject)
5101
0
              {
5102
0
              SCHECK_PARTIAL();
5103
0
              break;
5104
0
              }
5105
0
            switch(*Feptr)
5106
0
              {
5107
0
              default: goto ENDLOOP03;
5108
0
              VSPACE_BYTE_CASES:
5109
#if PCRE2_CODE_UNIT_WIDTH != 8
5110
              VSPACE_MULTIBYTE_CASES:
5111
#endif
5112
0
              Feptr++; break;
5113
0
              }
5114
0
            }
5115
0
          ENDLOOP03:
5116
0
          break;
5117
5118
0
          case OP_NOT_DIGIT:
5119
0
          for (i = Lmin; i < Lmax; i++)
5120
0
            {
5121
0
            if (Feptr >= mb->end_subject)
5122
0
              {
5123
0
              SCHECK_PARTIAL();
5124
0
              break;
5125
0
              }
5126
0
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
5127
0
              break;
5128
0
            Feptr++;
5129
0
            }
5130
0
          break;
5131
5132
0
          case OP_DIGIT:
5133
0
          for (i = Lmin; i < Lmax; i++)
5134
0
            {
5135
0
            if (Feptr >= mb->end_subject)
5136
0
              {
5137
0
              SCHECK_PARTIAL();
5138
0
              break;
5139
0
              }
5140
0
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
5141
0
              break;
5142
0
            Feptr++;
5143
0
            }
5144
0
          break;
5145
5146
0
          case OP_NOT_WHITESPACE:
5147
0
          for (i = Lmin; i < Lmax; i++)
5148
0
            {
5149
0
            if (Feptr >= mb->end_subject)
5150
0
              {
5151
0
              SCHECK_PARTIAL();
5152
0
              break;
5153
0
              }
5154
0
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
5155
0
              break;
5156
0
            Feptr++;
5157
0
            }
5158
0
          break;
5159
5160
0
          case OP_WHITESPACE:
5161
0
          for (i = Lmin; i < Lmax; i++)
5162
0
            {
5163
0
            if (Feptr >= mb->end_subject)
5164
0
              {
5165
0
              SCHECK_PARTIAL();
5166
0
              break;
5167
0
              }
5168
0
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
5169
0
              break;
5170
0
            Feptr++;
5171
0
            }
5172
0
          break;
5173
5174
0
          case OP_NOT_WORDCHAR:
5175
0
          for (i = Lmin; i < Lmax; i++)
5176
0
            {
5177
0
            if (Feptr >= mb->end_subject)
5178
0
              {
5179
0
              SCHECK_PARTIAL();
5180
0
              break;
5181
0
              }
5182
0
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
5183
0
              break;
5184
0
            Feptr++;
5185
0
            }
5186
0
          break;
5187
5188
0
          case OP_WORDCHAR:
5189
0
          for (i = Lmin; i < Lmax; i++)
5190
0
            {
5191
0
            if (Feptr >= mb->end_subject)
5192
0
              {
5193
0
              SCHECK_PARTIAL();
5194
0
              break;
5195
0
              }
5196
0
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
5197
0
              break;
5198
0
            Feptr++;
5199
0
            }
5200
0
          break;
5201
5202
          /* LCOV_EXCL_START */
5203
0
          default:
5204
0
          PCRE2_DEBUG_UNREACHABLE();
5205
0
          return PCRE2_ERROR_INTERNAL;
5206
          /* LCOV_EXCL_STOP */
5207
0
          }
5208
5209
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
5210
5211
0
        for (;;)
5212
0
          {
5213
0
          if (Feptr == Lstart_eptr) break;
5214
0
          RMATCH(Fecode, RM34);
5215
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5216
0
          Feptr--;
5217
0
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
5218
0
              Feptr[-1] == CHAR_CR) Feptr--;
5219
0
          }
5220
0
        }
5221
0
      }
5222
0
    break;  /* End of repeat character type processing */
5223
5224
0
#undef Lstart_eptr
5225
0
#undef Lmin
5226
0
#undef Lmax
5227
0
#undef Lctype
5228
0
#undef Lpropvalue
5229
5230
5231
    /* ===================================================================== */
5232
    /* Match a back reference, possibly repeatedly. Look past the end of the
5233
    item to see if there is repeat information following. The OP_REF and
5234
    OP_REFI opcodes are used for a reference to a numbered group or to a
5235
    non-duplicated named group. For a duplicated named group, OP_DNREF and
5236
    OP_DNREFI are used. In this case we must scan the list of groups to which
5237
    the name refers, and use the first one that is set. */
5238
5239
0
#define Lmin      F->temp_32[0]
5240
0
#define Lmax      F->temp_32[1]
5241
0
#define Lcaseless F->temp_32[2]
5242
0
#define Lcaseopts F->temp_32[3]
5243
0
#define Lstart    F->temp_sptr[0]
5244
0
#define Loffset   F->temp_size
5245
5246
0
    case OP_DNREF:
5247
0
    case OP_DNREFI:
5248
0
    Lcaseless = (Fop == OP_DNREFI);
5249
0
    Lcaseopts = (Fop == OP_DNREFI)? Fecode[1 + 2*IMM2_SIZE] : 0;
5250
0
      {
5251
0
      int count = GET2(Fecode, 1+IMM2_SIZE);
5252
0
      PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5253
0
      Fecode += 1 + 2*IMM2_SIZE + (Fop == OP_DNREFI? 1 : 0);
5254
5255
0
      while (count-- > 0)
5256
0
        {
5257
0
        Loffset = (GET2(slot, 0) << 1) - 2;
5258
0
        if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
5259
0
        slot += mb->name_entry_size;
5260
0
        }
5261
0
      }
5262
0
    goto REF_REPEAT;
5263
5264
0
    case OP_REF:
5265
0
    case OP_REFI:
5266
0
    Lcaseless = (Fop == OP_REFI);
5267
0
    Lcaseopts = (Fop == OP_REFI)? Fecode[1 + IMM2_SIZE] : 0;
5268
0
    Loffset = (GET2(Fecode, 1) << 1) - 2;
5269
0
    Fecode += 1 + IMM2_SIZE + (Fop == OP_REFI? 1 : 0);
5270
5271
    /* Set up for repetition, or handle the non-repeated case. The maximum and
5272
    minimum must be in the heap frame, but as they are short-term values, we
5273
    use temporary fields. */
5274
5275
0
    REF_REPEAT:
5276
0
    switch (*Fecode)
5277
0
      {
5278
0
      case OP_CRSTAR:
5279
0
      case OP_CRMINSTAR:
5280
0
      case OP_CRPLUS:
5281
0
      case OP_CRMINPLUS:
5282
0
      case OP_CRQUERY:
5283
0
      case OP_CRMINQUERY:
5284
0
      fc = *Fecode++ - OP_CRSTAR;
5285
0
      Lmin = rep_min[fc];
5286
0
      Lmax = rep_max[fc];
5287
0
      reptype = rep_typ[fc];
5288
0
      break;
5289
5290
0
      case OP_CRRANGE:
5291
0
      case OP_CRMINRANGE:
5292
0
      Lmin = GET2(Fecode, 1);
5293
0
      Lmax = GET2(Fecode, 1 + IMM2_SIZE);
5294
0
      reptype = rep_typ[*Fecode - OP_CRSTAR];
5295
0
      if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
5296
0
      Fecode += 1 + 2 * IMM2_SIZE;
5297
0
      break;
5298
5299
0
      default:                  /* No repeat follows */
5300
0
        {
5301
0
        rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &length);
5302
0
        if (rrc != 0)
5303
0
          {
5304
0
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5305
0
          CHECK_PARTIAL();
5306
0
          RRETURN(MATCH_NOMATCH);
5307
0
          }
5308
0
        }
5309
0
      Feptr += length;
5310
0
      continue;              /* With the main loop */
5311
0
      }
5312
5313
    /* Handle repeated back references. If a set group has length zero, just
5314
    continue with the main loop, because it matches however many times. For an
5315
    unset reference, if the minimum is zero, we can also just continue. We can
5316
    also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
5317
    group behave as a zero-length group. For any other unset cases, carrying
5318
    on will result in NOMATCH. */
5319
5320
0
    if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
5321
0
      {
5322
0
      if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
5323
0
      }
5324
0
    else  /* Group is not set */
5325
0
      {
5326
0
      if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
5327
0
        continue;
5328
0
      }
5329
5330
    /* First, ensure the minimum number of matches are present. */
5331
5332
0
    for (i = 1; i <= Lmin; i++)
5333
0
      {
5334
0
      PCRE2_SIZE slength;
5335
0
      rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5336
0
      if (rrc != 0)
5337
0
        {
5338
0
        if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5339
0
        CHECK_PARTIAL();
5340
0
        RRETURN(MATCH_NOMATCH);
5341
0
        }
5342
0
      Feptr += slength;
5343
0
      }
5344
5345
    /* If min = max, we are done. They are not both allowed to be zero. */
5346
5347
0
    if (Lmin == Lmax) continue;
5348
5349
    /* If minimizing, keep trying and advancing the pointer. */
5350
5351
0
    if (reptype == REPTYPE_MIN)
5352
0
      {
5353
0
      for (;;)
5354
0
        {
5355
0
        PCRE2_SIZE slength;
5356
0
        RMATCH(Fecode, RM20);
5357
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5358
0
        if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
5359
0
        rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5360
0
        if (rrc != 0)
5361
0
          {
5362
0
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5363
0
          CHECK_PARTIAL();
5364
0
          RRETURN(MATCH_NOMATCH);
5365
0
          }
5366
0
        Feptr += slength;
5367
0
        }
5368
5369
0
      PCRE2_UNREACHABLE(); /* Control never reaches here */
5370
0
      }
5371
5372
    /* If maximizing, find the longest string and work backwards, as long as
5373
    the matched lengths for each iteration are the same. */
5374
5375
0
    else
5376
0
      {
5377
0
      BOOL samelengths = TRUE;
5378
0
      Lstart = Feptr;     /* Starting position */
5379
0
      Flength = Fovector[Loffset+1] - Fovector[Loffset];
5380
5381
0
      for (i = Lmin; i < Lmax; i++)
5382
0
        {
5383
0
        PCRE2_SIZE slength;
5384
0
        rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5385
0
        if (rrc != 0)
5386
0
          {
5387
          /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
5388
          the soft partial matching case. */
5389
5390
0
          if (rrc > 0 && mb->partial != 0 &&
5391
0
              mb->end_subject > mb->start_used_ptr)
5392
0
            {
5393
0
            mb->hitend = TRUE;
5394
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5395
0
            }
5396
0
          break;
5397
0
          }
5398
5399
0
        if (slength != Flength) samelengths = FALSE;
5400
0
        Feptr += slength;
5401
0
        }
5402
5403
      /* If the length matched for each repetition is the same as the length of
5404
      the captured group, we can easily work backwards. This is the normal
5405
      case. However, in caseless UTF-8 mode there are pairs of case-equivalent
5406
      characters whose lengths (in terms of code units) differ. However, this
5407
      is very rare, so we handle it by re-matching fewer and fewer times. */
5408
5409
0
      if (samelengths)
5410
0
        {
5411
0
        while (Feptr >= Lstart)
5412
0
          {
5413
0
          RMATCH(Fecode, RM21);
5414
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5415
0
          Feptr -= Flength;
5416
0
          }
5417
0
        }
5418
5419
      /* The rare case of non-matching lengths. Re-scan the repetition for each
5420
      iteration. We know that match_ref() will succeed every time. */
5421
5422
0
      else
5423
0
        {
5424
0
        Lmax = i;
5425
0
        for (;;)
5426
0
          {
5427
0
          RMATCH(Fecode, RM22);
5428
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5429
0
          if (Feptr == Lstart) break; /* Failed after minimal repetition */
5430
0
          Feptr = Lstart;
5431
0
          Lmax--;
5432
0
          for (i = Lmin; i < Lmax; i++)
5433
0
            {
5434
0
            PCRE2_SIZE slength;
5435
0
            (void)match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5436
0
            Feptr += slength;
5437
0
            }
5438
0
          }
5439
0
        }
5440
5441
0
      RRETURN(MATCH_NOMATCH);
5442
0
      }
5443
5444
0
    PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
5445
5446
0
#undef Lcaseless
5447
0
#undef Lmin
5448
0
#undef Lmax
5449
0
#undef Lstart
5450
0
#undef Loffset
5451
5452
5453
5454
/* ========================================================================= */
5455
/*           Opcodes for the start of various parenthesized items            */
5456
/* ========================================================================= */
5457
5458
    /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
5459
    (*THEN) is within the current branch by comparing the address of OP_THEN
5460
    that is passed back with the end of the branch. If (*THEN) is within the
5461
    current branch, and the branch is one of two or more alternatives (it
5462
    either starts or ends with OP_ALT), we have reached the limit of THEN's
5463
    action, so convert the return code to NOMATCH, which will cause normal
5464
    backtracking to happen from now on. Otherwise, THEN is passed back to an
5465
    outer alternative. This implements Perl's treatment of parenthesized
5466
    groups, where a group not containing | does not affect the current
5467
    alternative, that is, (X) is NOT the same as (X|(*F)). */
5468
5469
5470
    /* ===================================================================== */
5471
    /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
5472
    bracket group, indicating that it may occur zero times. It may repeat
5473
    infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
5474
    the pattern. Brackets with fixed upper repeat limits are compiled as a
5475
    number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
5476
    Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
5477
5478
0
#define Lnext_ecode F->temp_sptr[0]
5479
5480
0
    case OP_BRAZERO:
5481
0
    Lnext_ecode = Fecode + 1;
5482
0
    RMATCH(Lnext_ecode, RM9);
5483
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5484
0
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5485
0
    Fecode = Lnext_ecode + 1 + LINK_SIZE;
5486
0
    break;
5487
5488
0
    case OP_BRAMINZERO:
5489
0
    Lnext_ecode = Fecode + 1;
5490
0
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5491
0
    RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
5492
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5493
0
    Fecode++;
5494
0
    break;
5495
5496
0
#undef Lnext_ecode
5497
5498
0
    case OP_SKIPZERO:
5499
0
    Fecode++;
5500
0
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5501
0
    Fecode += 1 + LINK_SIZE;
5502
0
    break;
5503
5504
5505
    /* ===================================================================== */
5506
    /* Handle possessive brackets with an unlimited repeat. The end of these
5507
    brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
5508
    going further in the pattern. */
5509
5510
0
#define Lframe_type    F->temp_32[0]
5511
0
#define Lmatched_once  F->temp_32[1]
5512
0
#define Lzero_allowed  F->temp_32[2]
5513
0
#define Lstart_eptr    F->temp_sptr[0]
5514
0
#define Lstart_group   F->temp_sptr[1]
5515
5516
0
    case OP_BRAPOSZERO:
5517
0
    Lzero_allowed = TRUE;                /* Zero repeat is allowed */
5518
0
    Fecode += 1;
5519
0
    if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
5520
0
      goto POSSESSIVE_CAPTURE;
5521
0
    goto POSSESSIVE_NON_CAPTURE;
5522
5523
0
    case OP_BRAPOS:
5524
0
    case OP_SBRAPOS:
5525
0
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5526
5527
0
    POSSESSIVE_NON_CAPTURE:
5528
0
    Lframe_type = GF_NOCAPTURE;          /* Remembered frame type */
5529
0
    goto POSSESSIVE_GROUP;
5530
5531
0
    case OP_CBRAPOS:
5532
0
    case OP_SCBRAPOS:
5533
0
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5534
5535
0
    POSSESSIVE_CAPTURE:
5536
0
    number = GET2(Fecode, 1+LINK_SIZE);
5537
0
    Lframe_type = GF_CAPTURE | number;   /* Remembered frame type */
5538
5539
0
    POSSESSIVE_GROUP:
5540
0
    Lmatched_once = FALSE;               /* Never matched */
5541
0
    Lstart_group = Fecode;               /* Start of this group */
5542
5543
0
    for (;;)
5544
0
      {
5545
0
      Lstart_eptr = Feptr;               /* Position at group start */
5546
0
      group_frame_type = Lframe_type;
5547
0
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
5548
0
      if (rrc == MATCH_KETRPOS)
5549
0
        {
5550
0
        Lmatched_once = TRUE;            /* Matched at least once */
5551
0
        if (Feptr == Lstart_eptr)        /* Empty match; skip to end */
5552
0
          {
5553
0
          do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5554
0
          break;
5555
0
          }
5556
5557
0
        Fecode = Lstart_group;
5558
0
        continue;
5559
0
        }
5560
5561
      /* See comment above about handling THEN. */
5562
5563
0
      if (rrc == MATCH_THEN)
5564
0
        {
5565
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5566
0
        if (mb->verb_ecode_ptr < next_ecode &&
5567
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5568
0
          rrc = MATCH_NOMATCH;
5569
0
        }
5570
5571
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5572
0
      Fecode += GET(Fecode, 1);
5573
0
      if (*Fecode != OP_ALT) break;
5574
0
      }
5575
5576
    /* Success if matched something or zero repeat allowed */
5577
5578
0
    if (Lmatched_once || Lzero_allowed)
5579
0
      {
5580
0
      Fecode += 1 + LINK_SIZE;
5581
0
      break;
5582
0
      }
5583
5584
0
    RRETURN(MATCH_NOMATCH);
5585
5586
0
#undef Lmatched_once
5587
0
#undef Lzero_allowed
5588
0
#undef Lframe_type
5589
0
#undef Lstart_eptr
5590
0
#undef Lstart_group
5591
5592
5593
    /* ===================================================================== */
5594
    /* Handle non-capturing brackets that cannot match an empty string. When we
5595
    get to the final alternative within the brackets, as long as there are no
5596
    THEN's in the pattern, we can optimize by not recording a new backtracking
5597
    point. (Ideally we should test for a THEN within this group, but we don't
5598
    have that information.) Don't do this if we are at the very top level,
5599
    however, because that would make handling assertions and once-only brackets
5600
    messier when there is nothing to go back to. */
5601
5602
0
#define Lframe_type F->temp_32[0]     /* Set for all that use GROUPLOOP */
5603
0
#define Lnext_branch F->temp_sptr[0]  /* Used only in OP_BRA handling */
5604
5605
0
    case OP_BRA:
5606
0
    if (mb->hasthen || Frdepth == 0)
5607
0
      {
5608
0
      Lframe_type = 0;
5609
0
      goto GROUPLOOP;
5610
0
      }
5611
5612
0
    for (;;)
5613
0
      {
5614
0
      Lnext_branch = Fecode + GET(Fecode, 1);
5615
0
      if (*Lnext_branch != OP_ALT) break;
5616
5617
      /* This is never the final branch. We do not need to test for MATCH_THEN
5618
      here because this code is not used when there is a THEN in the pattern. */
5619
5620
0
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
5621
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5622
0
      Fecode = Lnext_branch;
5623
0
      }
5624
5625
    /* Hit the start of the final branch. Continue at this level. */
5626
5627
0
    Fecode += PRIV(OP_lengths)[*Fecode];
5628
0
    break;
5629
5630
0
#undef Lnext_branch
5631
5632
5633
    /* ===================================================================== */
5634
    /* Handle a capturing bracket, other than those that are possessive with an
5635
    unlimited repeat. */
5636
5637
0
    case OP_CBRA:
5638
0
    case OP_SCBRA:
5639
0
    Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
5640
0
    goto GROUPLOOP;
5641
5642
5643
    /* ===================================================================== */
5644
    /* Atomic groups and non-capturing brackets that can match an empty string
5645
    must record a backtracking point and also set up a chained frame. */
5646
5647
0
    case OP_ONCE:
5648
0
    case OP_SCRIPT_RUN:
5649
0
    case OP_SBRA:
5650
0
    Lframe_type = GF_NOCAPTURE | Fop;
5651
5652
0
    GROUPLOOP:
5653
0
    for (;;)
5654
0
      {
5655
0
      group_frame_type = Lframe_type;
5656
0
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
5657
0
      if (rrc == MATCH_THEN)
5658
0
        {
5659
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5660
0
        if (mb->verb_ecode_ptr < next_ecode &&
5661
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5662
0
          rrc = MATCH_NOMATCH;
5663
0
        }
5664
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5665
0
      Fecode += GET(Fecode, 1);
5666
0
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5667
0
      }
5668
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
5669
5670
0
#undef Lframe_type
5671
5672
5673
    /* ===================================================================== */
5674
    /* Pattern recursion either matches the current regex, or some
5675
    subexpression. The offset data is the offset to the starting bracket from
5676
    the start of the whole pattern. This is so that it works from duplicated
5677
    subpatterns. For a whole-pattern recursion, we have to infer the number
5678
    zero. */
5679
5680
0
#define Lframe_type F->temp_32[0]
5681
0
#define Lstart_branch F->temp_sptr[0]
5682
5683
0
    case OP_RECURSE:
5684
0
    bracode = mb->start_code + GET(Fecode, 1);
5685
0
    number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
5686
5687
    /* If we are already in a pattern recursion, check for repeating the same
5688
    one without changing the subject pointer or the last referenced character
5689
    in the subject. This should catch convoluted mutual recursions; some
5690
    simple cases are caught at compile time. However, there are rare cases when
5691
    this check needs to be turned off. In this case, actual recursion loops
5692
    will be caught by the match or heap limits. */
5693
5694
0
    if (Fcurrent_recurse != RECURSE_UNSET)
5695
0
      {
5696
0
      offset = Flast_group_offset;
5697
0
      while (offset != PCRE2_UNSET)
5698
0
        {
5699
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
5700
0
        P = (heapframe *)((char *)N - frame_size);
5701
0
        if (N->group_frame_type == (GF_RECURSE | number))
5702
0
          {
5703
0
          if (Feptr == P->eptr && mb->last_used_ptr == P->recurse_last_used &&
5704
0
               (mb->moptions & PCRE2_DISABLE_RECURSELOOP_CHECK) == 0)
5705
0
            return PCRE2_ERROR_RECURSELOOP;
5706
0
          break;
5707
0
          }
5708
0
        offset = P->last_group_offset;
5709
0
        }
5710
0
      }
5711
5712
    /* Remember the current last referenced character and then run the
5713
    recursion branch by branch. */
5714
5715
0
    F->recurse_last_used = mb->last_used_ptr;
5716
0
    Lstart_branch = bracode;
5717
0
    Lframe_type = GF_RECURSE | number;
5718
5719
0
    for (;;)
5720
0
      {
5721
0
      PCRE2_SPTR next_ecode;
5722
5723
0
      group_frame_type = Lframe_type;
5724
0
      RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
5725
0
      next_ecode = Lstart_branch + GET(Lstart_branch,1);
5726
5727
      /* Handle backtracking verbs, which are defined in a range that can
5728
      easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
5729
      escape beyond a recursion; they cause a NOMATCH for the entire recursion.
5730
5731
      When one of these verbs triggers, the current recursion group number is
5732
      recorded. If it matches the recursion we are processing, the verb
5733
      happened within the recursion and we must deal with it. Otherwise it must
5734
      have happened after the recursion completed, and so has to be passed
5735
      back. See comment above about handling THEN. */
5736
5737
0
      if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
5738
0
          mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
5739
0
        {
5740
0
        if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
5741
0
            (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
5742
0
          rrc = MATCH_NOMATCH;
5743
0
        else RRETURN(MATCH_NOMATCH);
5744
0
        }
5745
5746
      /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
5747
      OP_ACCEPT code. Nothing needs to be done here. */
5748
5749
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5750
0
      Lstart_branch = next_ecode;
5751
0
      if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
5752
0
      }
5753
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
5754
5755
0
#undef Lframe_type
5756
0
#undef Lstart_branch
5757
5758
5759
    /* ===================================================================== */
5760
    /* Positive assertions are like other groups except that PCRE doesn't allow
5761
    the effect of (*THEN) to escape beyond an assertion; it is therefore
5762
    treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
5763
    captures and mark retained. Any other return is an error. */
5764
5765
0
#define Lframe_type  F->temp_32[0]
5766
5767
0
    case OP_ASSERT:
5768
0
    case OP_ASSERTBACK:
5769
0
    case OP_ASSERT_NA:
5770
0
    case OP_ASSERTBACK_NA:
5771
0
    Lframe_type = GF_NOCAPTURE | Fop;
5772
0
    for (;;)
5773
0
      {
5774
0
      group_frame_type = Lframe_type;
5775
0
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
5776
0
      if (rrc == MATCH_ACCEPT)
5777
0
        {
5778
0
        memcpy(Fovector,
5779
0
              (char *)assert_accept_frame + offsetof(heapframe, ovector),
5780
0
              assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5781
0
        Foffset_top = assert_accept_frame->offset_top;
5782
0
        Fmark = assert_accept_frame->mark;
5783
0
        break;
5784
0
        }
5785
0
      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
5786
0
      Fecode += GET(Fecode, 1);
5787
0
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5788
0
      }
5789
5790
0
    do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5791
0
    Fecode += 1 + LINK_SIZE;
5792
0
    break;
5793
5794
0
#undef Lframe_type
5795
5796
5797
    /* ===================================================================== */
5798
    /* Handle negative assertions. Loop for each non-matching branch as for
5799
    positive assertions. */
5800
5801
0
#define Lframe_type  F->temp_32[0]
5802
5803
0
    case OP_ASSERT_NOT:
5804
0
    case OP_ASSERTBACK_NOT:
5805
0
    Lframe_type  = GF_NOCAPTURE | Fop;
5806
5807
0
    for (;;)
5808
0
      {
5809
0
      group_frame_type = Lframe_type;
5810
0
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
5811
0
      switch(rrc)
5812
0
        {
5813
0
        case MATCH_ACCEPT:   /* Assertion matched, therefore it fails. */
5814
0
        case MATCH_MATCH:
5815
0
        RRETURN (MATCH_NOMATCH);
5816
5817
0
        case MATCH_NOMATCH:  /* Branch failed, try next if present. */
5818
0
        case MATCH_THEN:
5819
0
        Fecode += GET(Fecode, 1);
5820
0
        if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
5821
0
        break;
5822
5823
0
        case MATCH_COMMIT:   /* Assertion forced to fail, therefore continue. */
5824
0
        case MATCH_SKIP:
5825
0
        case MATCH_PRUNE:
5826
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5827
0
        goto ASSERT_NOT_FAILED;
5828
5829
0
        default:             /* Pass back any other return */
5830
0
        RRETURN(rrc);
5831
0
        }
5832
0
      }
5833
5834
    /* None of the branches have matched or there was a backtrack to (*COMMIT),
5835
    (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
5836
    negative assertion, so carry on. */
5837
5838
0
    ASSERT_NOT_FAILED:
5839
0
    Fecode += 1 + LINK_SIZE;
5840
0
    break;
5841
5842
0
#undef Lframe_type
5843
5844
    /* ===================================================================== */
5845
    /* Handle scan substring operation. */
5846
5847
0
#define Lframe_type          F->temp_32[0]
5848
0
#define Lextra_size          F->temp_32[1]
5849
0
#define Lsaved_moptions      F->temp_32[2]
5850
0
#define Lsaved_end_subject   F->temp_sptr[0]
5851
0
#define Lsaved_eptr          F->temp_sptr[1]
5852
0
#define Ltrue_end_extra      F->temp_size
5853
5854
0
    case OP_ASSERT_SCS:
5855
0
      {
5856
0
      PCRE2_SPTR ecode = Fecode + 1 + LINK_SIZE;
5857
0
      uint32_t extra_size = 0;
5858
0
      int count;
5859
0
      PCRE2_SPTR slot;
5860
5861
      /* Disable compiler warning. */
5862
0
      offset = 0;
5863
0
      (void)offset;
5864
5865
0
      for (;;)
5866
0
        {
5867
0
        if (*ecode == OP_CREF)
5868
0
          {
5869
0
          extra_size += 1+IMM2_SIZE;
5870
0
          offset = (GET2(ecode, 1) << 1) - 2;
5871
0
          ecode += 1+IMM2_SIZE;
5872
0
          if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET)
5873
0
            goto SCS_OFFSET_FOUND;
5874
0
          continue;
5875
0
          }
5876
5877
0
        if (*ecode != OP_DNCREF) RRETURN(MATCH_NOMATCH);
5878
5879
0
        count = GET2(ecode, 1 + IMM2_SIZE);
5880
0
        slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size;
5881
0
        extra_size += 1+2*IMM2_SIZE;
5882
0
        ecode += 1+2*IMM2_SIZE;
5883
5884
0
        while (count > 0)
5885
0
          {
5886
0
          offset = (GET2(slot, 0) << 1) - 2;
5887
0
          if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET)
5888
0
            goto SCS_OFFSET_FOUND;
5889
0
          slot += mb->name_entry_size;
5890
0
          count--;
5891
0
          }
5892
0
        }
5893
5894
0
      SCS_OFFSET_FOUND:
5895
5896
      /* Skip remaining options. */
5897
0
      for (;;)
5898
0
        {
5899
0
        if (*ecode == OP_CREF)
5900
0
          {
5901
0
          extra_size += 1+IMM2_SIZE;
5902
0
          ecode += 1+IMM2_SIZE;
5903
0
          }
5904
0
        else if (*ecode == OP_DNCREF)
5905
0
          {
5906
0
          extra_size += 1+2*IMM2_SIZE;
5907
0
          ecode += 1+2*IMM2_SIZE;
5908
0
          }
5909
0
        else break;
5910
0
        }
5911
5912
0
      Lextra_size = extra_size;
5913
0
      }
5914
5915
0
    Lsaved_end_subject = mb->end_subject;
5916
0
    Ltrue_end_extra = mb->true_end_subject - mb->end_subject;
5917
0
    Lsaved_eptr = Feptr;
5918
0
    Lsaved_moptions = mb->moptions;
5919
5920
0
    Feptr = mb->start_subject + Fovector[offset];
5921
0
    mb->true_end_subject = mb->end_subject =
5922
0
      mb->start_subject + Fovector[offset + 1];
5923
0
    mb->moptions &= ~PCRE2_NOTEOL;
5924
5925
0
    Lframe_type = GF_NOCAPTURE | Fop;
5926
0
    for (;;)
5927
0
      {
5928
0
      group_frame_type = Lframe_type;
5929
0
      RMATCH(Fecode + 1 + LINK_SIZE + Lextra_size, RM38);
5930
0
      if (rrc == MATCH_ACCEPT)
5931
0
        {
5932
0
        memcpy(Fovector,
5933
0
              (char *)assert_accept_frame + offsetof(heapframe, ovector),
5934
0
              assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5935
0
        Foffset_top = assert_accept_frame->offset_top;
5936
0
        Fmark = assert_accept_frame->mark;
5937
0
        mb->end_subject = Lsaved_end_subject;
5938
0
        mb->true_end_subject = mb->end_subject + Ltrue_end_extra;
5939
0
        mb->moptions = Lsaved_moptions;
5940
0
        break;
5941
0
        }
5942
5943
0
      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
5944
0
        {
5945
0
        mb->end_subject = Lsaved_end_subject;
5946
0
        mb->true_end_subject = mb->end_subject + Ltrue_end_extra;
5947
0
        mb->moptions = Lsaved_moptions;
5948
0
        RRETURN(rrc);
5949
0
        }
5950
5951
0
      Fecode += GET(Fecode, 1);
5952
0
      if (*Fecode != OP_ALT)
5953
0
        {
5954
0
        mb->end_subject = Lsaved_end_subject;
5955
0
        mb->true_end_subject = mb->end_subject + Ltrue_end_extra;
5956
0
        mb->moptions = Lsaved_moptions;
5957
0
        RRETURN(MATCH_NOMATCH);
5958
0
        }
5959
0
      Lextra_size = 0;
5960
0
      }
5961
5962
0
    do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5963
0
    Fecode += 1 + LINK_SIZE;
5964
0
    Feptr = Lsaved_eptr;
5965
0
    break;
5966
5967
0
#undef Lframe_type
5968
0
#undef Lextra_size
5969
0
#undef Lsaved_end_subject
5970
0
#undef Lsaved_eptr
5971
0
#undef Ltrue_end_extra
5972
0
#undef Lsave_moptions
5973
5974
    /* ===================================================================== */
5975
    /* The callout item calls an external function, if one is provided, passing
5976
    details of the match so far. This is mainly for debugging, though the
5977
    function is able to force a failure. */
5978
5979
0
    case OP_CALLOUT:
5980
0
    case OP_CALLOUT_STR:
5981
0
    rrc = do_callout(F, mb, &length);
5982
0
    if (rrc > 0) RRETURN(MATCH_NOMATCH);
5983
0
    if (rrc < 0) RRETURN(rrc);
5984
0
    Fecode += length;
5985
0
    break;
5986
5987
5988
    /* ===================================================================== */
5989
    /* Conditional group: compilation checked that there are no more than two
5990
    branches. If the condition is false, skipping the first branch takes us
5991
    past the end of the item if there is only one branch, but that's exactly
5992
    what we want. */
5993
5994
0
    case OP_COND:
5995
0
    case OP_SCOND:
5996
5997
    /* The variable Flength will be added to Fecode when the condition is
5998
    false, to get to the second branch. Setting it to the offset to the ALT or
5999
    KET, then incrementing Fecode achieves this effect. However, if the second
6000
    branch is non-existent, we must point to the KET so that the end of the
6001
    group is correctly processed. We now have Fecode pointing to the condition
6002
    or callout. */
6003
6004
0
    Flength = GET(Fecode, 1);    /* Offset to the second branch */
6005
0
    if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
6006
0
    Fecode += 1 + LINK_SIZE;     /* From this opcode */
6007
6008
    /* Because of the way auto-callout works during compile, a callout item is
6009
    inserted between OP_COND and an assertion condition. Such a callout can
6010
    also be inserted manually. */
6011
6012
0
    if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
6013
0
      {
6014
0
      rrc = do_callout(F, mb, &length);
6015
0
      if (rrc > 0) RRETURN(MATCH_NOMATCH);
6016
0
      if (rrc < 0) RRETURN(rrc);
6017
6018
      /* Advance Fecode past the callout, so it now points to the condition. We
6019
      must adjust Flength so that the value of Fecode+Flength is unchanged. */
6020
6021
0
      Fecode += length;
6022
0
      Flength -= length;
6023
0
      }
6024
6025
    /* Test the various possible conditions */
6026
6027
0
    condition = FALSE;
6028
0
    switch(*Fecode)
6029
0
      {
6030
0
      case OP_RREF:                  /* Group recursion test */
6031
0
      if (Fcurrent_recurse != RECURSE_UNSET)
6032
0
        {
6033
0
        number = GET2(Fecode, 1);
6034
0
        condition = (number == RREF_ANY || number == Fcurrent_recurse);
6035
0
        }
6036
0
      break;
6037
6038
0
      case OP_DNRREF:       /* Duplicate named group recursion test */
6039
0
      if (Fcurrent_recurse != RECURSE_UNSET)
6040
0
        {
6041
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
6042
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
6043
0
        while (count-- > 0)
6044
0
          {
6045
0
          number = GET2(slot, 0);
6046
0
          condition = number == Fcurrent_recurse;
6047
0
          if (condition) break;
6048
0
          slot += mb->name_entry_size;
6049
0
          }
6050
0
        }
6051
0
      break;
6052
6053
0
      case OP_CREF:                         /* Numbered group used test */
6054
0
      offset = (GET2(Fecode, 1) << 1) - 2;  /* Doubled ref number */
6055
0
      condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
6056
0
      break;
6057
6058
0
      case OP_DNCREF:      /* Duplicate named group used test */
6059
0
        {
6060
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
6061
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
6062
0
        while (count-- > 0)
6063
0
          {
6064
0
          offset = (GET2(slot, 0) << 1) - 2;
6065
0
          condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
6066
0
          if (condition) break;
6067
0
          slot += mb->name_entry_size;
6068
0
          }
6069
0
        }
6070
0
      break;
6071
6072
0
      case OP_FALSE:
6073
0
      case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
6074
0
      break;
6075
6076
0
      case OP_TRUE:
6077
0
      condition = TRUE;
6078
0
      break;
6079
6080
      /* The condition is an assertion. Run code similar to the assertion code
6081
      above. */
6082
6083
0
#define Lpositive      F->temp_32[0]
6084
0
#define Lstart_branch  F->temp_sptr[0]
6085
6086
0
      default:
6087
0
      Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
6088
0
      Lstart_branch = Fecode;
6089
6090
0
      for (;;)
6091
0
        {
6092
0
        group_frame_type = GF_CONDASSERT | *Fecode;
6093
0
        RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
6094
6095
0
        switch(rrc)
6096
0
          {
6097
0
          case MATCH_ACCEPT:  /* Save captures */
6098
0
          memcpy(Fovector,
6099
0
                (char *)assert_accept_frame + offsetof(heapframe, ovector),
6100
0
                assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
6101
0
          Foffset_top = assert_accept_frame->offset_top;
6102
6103
0
          PCRE2_FALLTHROUGH /* Fall through */
6104
          /* In the case of a match, the captures have already been put into
6105
          the current frame. */
6106
6107
0
          case MATCH_MATCH:
6108
0
          condition = Lpositive;   /* TRUE for positive assertion */
6109
0
          break;
6110
6111
          /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
6112
          assertion; it is therefore always treated as NOMATCH. */
6113
6114
0
          case MATCH_NOMATCH:
6115
0
          case MATCH_THEN:
6116
0
          Lstart_branch += GET(Lstart_branch, 1);
6117
0
          if (*Lstart_branch == OP_ALT) continue;  /* Try next branch */
6118
0
          condition = !Lpositive;  /* TRUE for negative assertion */
6119
0
          break;
6120
6121
          /* These force no match without checking other branches. */
6122
6123
0
          case MATCH_COMMIT:
6124
0
          case MATCH_SKIP:
6125
0
          case MATCH_PRUNE:
6126
0
          condition = !Lpositive;
6127
0
          break;
6128
6129
0
          default:
6130
0
          RRETURN(rrc);
6131
0
          }
6132
0
        break;  /* Out of the branch loop */
6133
0
        }
6134
6135
      /* If the condition is true, find the end of the assertion so that
6136
      advancing past it gets us to the start of the first branch. */
6137
6138
0
      if (condition)
6139
0
        {
6140
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
6141
0
        }
6142
0
      break;  /* End of assertion condition */
6143
0
      }
6144
6145
0
#undef Lpositive
6146
0
#undef Lstart_branch
6147
6148
    /* Choose branch according to the condition. */
6149
6150
0
    Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
6151
6152
    /* If the opcode is OP_SCOND it means we are at a repeated conditional
6153
    group that might match an empty string. We must therefore descend a level
6154
    so that the start is remembered for checking. For OP_COND we can just
6155
    continue at this level. */
6156
6157
0
    if (Fop == OP_SCOND)
6158
0
      {
6159
0
      group_frame_type  = GF_NOCAPTURE | Fop;
6160
0
      RMATCH(Fecode, RM35);
6161
0
      RRETURN(rrc);
6162
0
      }
6163
0
    break;
6164
6165
6166
6167
/* ========================================================================= */
6168
/*                  End of start of parenthesis opcodes                      */
6169
/* ========================================================================= */
6170
6171
6172
    /* ===================================================================== */
6173
    /* Move the subject pointer back by one fixed amount. This occurs at the
6174
    start of each branch that has a fixed length in a lookbehind assertion. If
6175
    we are too close to the start to move back, fail. When working with UTF-8
6176
    we move back a number of characters, not bytes. */
6177
6178
0
    case OP_REVERSE:
6179
0
    number = GET2(Fecode, 1);
6180
0
#ifdef SUPPORT_UNICODE
6181
0
    if (utf)
6182
0
      {
6183
      /* We used to do a simpler `while (number-- > 0)` but that triggers
6184
      clang's unsigned integer overflow sanitizer. */
6185
0
      while (number > 0)
6186
0
        {
6187
0
        --number;
6188
0
        if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
6189
0
        Feptr--;
6190
0
        BACKCHAR(Feptr);
6191
0
        }
6192
0
      }
6193
0
    else
6194
0
#endif
6195
6196
    /* No UTF support, or not in UTF mode: count is code unit count */
6197
6198
0
      {
6199
0
      if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
6200
0
      Feptr -= number;
6201
0
      }
6202
6203
    /* Save the earliest consulted character, then skip to next opcode */
6204
6205
0
    if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
6206
0
    Fecode += 1 + IMM2_SIZE;
6207
0
    break;
6208
6209
6210
    /* ===================================================================== */
6211
    /* Move the subject pointer back by a variable amount. This occurs at the
6212
    start of each branch of a lookbehind assertion when the branch has a
6213
    variable, but limited, length. A loop is needed to try matching the branch
6214
    after moving back different numbers of characters. If we are too close to
6215
    the start to move back even the minimum amount, fail. When working with
6216
    UTF-8 we move back a number of characters, not bytes. */
6217
6218
0
#define Lmin F->temp_32[0]
6219
0
#define Lmax F->temp_32[1]
6220
0
#define Leptr F->temp_sptr[0]
6221
6222
0
    case OP_VREVERSE:
6223
0
    Lmin = GET2(Fecode, 1);
6224
0
    Lmax = GET2(Fecode, 1 + IMM2_SIZE);
6225
0
    Leptr = Feptr;
6226
6227
    /* Move back by the maximum branch length and then work forwards. This
6228
    ensures that items such as \d{3,5} get the maximum length, which is
6229
    relevant for captures, and makes for Perl compatibility. */
6230
6231
0
#ifdef SUPPORT_UNICODE
6232
0
    if (utf)
6233
0
      {
6234
0
      for (i = 0; i < Lmax; i++)
6235
0
        {
6236
0
        if (Feptr == mb->start_subject)
6237
0
          {
6238
0
          if (i < Lmin) RRETURN(MATCH_NOMATCH);
6239
0
          Lmax = i;
6240
0
          break;
6241
0
          }
6242
0
        Feptr--;
6243
0
        BACKCHAR(Feptr);
6244
0
        }
6245
0
      }
6246
0
    else
6247
0
#endif
6248
6249
    /* No UTF support or not in UTF mode */
6250
6251
0
      {
6252
0
      ptrdiff_t diff = Feptr - mb->start_subject;
6253
0
      uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? (int)diff : 0);
6254
0
      if (Lmin > available) RRETURN(MATCH_NOMATCH);
6255
0
      if (Lmax > available) Lmax = available;
6256
0
      Feptr -= Lmax;
6257
0
      }
6258
6259
    /* Now try matching, moving forward one character on failure, until we
6260
    reach the minimum back length. */
6261
6262
0
    for (;;)
6263
0
      {
6264
0
      RMATCH(Fecode + 1 + 2 * IMM2_SIZE, RM37);
6265
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6266
0
      if (Lmax-- <= Lmin) RRETURN(MATCH_NOMATCH);
6267
0
      Feptr++;
6268
0
#ifdef SUPPORT_UNICODE
6269
0
      if (utf) { FORWARDCHARTEST(Feptr, mb->end_subject); }
6270
0
#endif
6271
0
      }
6272
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
6273
6274
0
#undef Lmin
6275
0
#undef Lmax
6276
0
#undef Leptr
6277
6278
    /* ===================================================================== */
6279
    /* An alternation is the end of a branch; scan along to find the end of the
6280
    bracketed group. */
6281
6282
0
    case OP_ALT:
6283
0
    branch_end = Fecode;
6284
0
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
6285
0
    break;
6286
6287
6288
    /* ===================================================================== */
6289
    /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
6290
    starting frame was added to the chained frames in order to remember the
6291
    starting subject position for the group. (Not true for OP_BRA when it's a
6292
    whole pattern recursion, but that is handled separately below.)*/
6293
6294
0
    case OP_KET:
6295
0
    case OP_KETRMIN:
6296
0
    case OP_KETRMAX:
6297
0
    case OP_KETRPOS:
6298
6299
0
    bracode = Fecode - GET(Fecode, 1);
6300
6301
0
    if (branch_end == NULL) branch_end = Fecode;
6302
0
    branch_start = bracode;
6303
0
    while (branch_start + GET(branch_start, 1) != branch_end)
6304
0
      branch_start += GET(branch_start, 1);
6305
0
    branch_end = NULL;
6306
6307
    /* Point N to the frame at the start of the most recent group, and P to its
6308
    predecessor. Remember the subject pointer at the start of the group. */
6309
6310
0
    if (*bracode != OP_BRA && *bracode != OP_COND)
6311
0
      {
6312
0
      N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset);
6313
0
      P = (heapframe *)((char *)N - frame_size);
6314
0
      Flast_group_offset = P->last_group_offset;
6315
6316
#ifdef DEBUG_SHOW_RMATCH
6317
      fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
6318
        N->rdepth, N->group_frame_type,
6319
        (char *)P->eptr - (char *)mb->start_subject);
6320
#endif
6321
6322
      /* If we are at the end of an assertion that is a condition, first check
6323
      to see if we are at the end of a variable-length branch in a lookbehind.
6324
      If this is the case and we have not landed on the current character,
6325
      return no match. Compare code below for non-condition lookbehinds. In
6326
      other cases, return a match, discarding any intermediate backtracking
6327
      points. Copy back the mark setting and the captures into the frame before
6328
      N so that they are set on return. Doing this for all assertions, both
6329
      positive and negative, seems to match what Perl does. */
6330
6331
0
      if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
6332
0
        {
6333
0
        if ((*bracode == OP_ASSERTBACK || *bracode == OP_ASSERTBACK_NOT) &&
6334
0
            branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6335
0
          RRETURN(MATCH_NOMATCH);
6336
0
        memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
6337
0
          Foffset_top * sizeof(PCRE2_SIZE));
6338
0
        P->offset_top = Foffset_top;
6339
0
        P->mark = Fmark;
6340
0
        Fback_frame = (char *)F - (char *)P;
6341
0
        RRETURN(MATCH_MATCH);
6342
0
        }
6343
0
      }
6344
0
    else P = NULL;   /* Indicates starting frame not recorded */
6345
6346
    /* The group was not a conditional assertion. */
6347
6348
0
    switch (*bracode)
6349
0
      {
6350
      /* Whole pattern recursion is handled as a recursion into group 0, but
6351
      the entire pattern is wrapped in OP_BRA/OP_KET rather than a capturing
6352
      group - a design mistake: it should perhaps have been capture group 0.
6353
      Anyway, that means the end of such recursion must be handled here. It is
6354
      detected by checking for an immediately following OP_END when we are
6355
      recursing in group 0. If this is not the end of a whole-pattern
6356
      recursion, there is nothing to be done. */
6357
6358
0
      case OP_BRA:
6359
0
      if (Fcurrent_recurse != 0 || Fecode[1+LINK_SIZE] != OP_END) break;
6360
6361
      /* It is the end of whole-pattern recursion. */
6362
6363
0
      offset = Flast_group_offset;
6364
6365
      /* Corrupted heapframes?. Trigger an assert and return an error */
6366
0
      PCRE2_ASSERT(offset != PCRE2_UNSET);
6367
0
      if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
6368
6369
0
      N = (heapframe *)((char *)match_data->heapframes + offset);
6370
0
      P = (heapframe *)((char *)N - frame_size);
6371
0
      Flast_group_offset = P->last_group_offset;
6372
6373
      /* Reinstate the previous set of captures and then carry on after the
6374
      recursion call. */
6375
6376
0
      Fecode = P->ecode + 1 + LINK_SIZE;
6377
6378
0
      if (*Fecode != OP_CREF)
6379
0
        {
6380
0
        memcpy(F->ovector, P->ovector, Foffset_top * sizeof(PCRE2_SIZE));
6381
0
        Foffset_top = P->offset_top;
6382
0
        }
6383
0
      else
6384
0
        recurse_update_offsets(F, P);
6385
6386
0
      Fcapture_last = P->capture_last;
6387
0
      Fcurrent_recurse = P->current_recurse;
6388
0
      continue;  /* With next opcode */
6389
6390
0
      case OP_COND:     /* No need to do anything for these */
6391
0
      case OP_SCOND:
6392
0
      break;
6393
6394
      /* Non-atomic positive assertions are like OP_BRA, except that the
6395
      subject pointer must be put back to where it was at the start of the
6396
      assertion. For a variable lookbehind, check its end point. */
6397
6398
0
      case OP_ASSERTBACK_NA:
6399
0
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6400
0
        RRETURN(MATCH_NOMATCH);
6401
0
      PCRE2_FALLTHROUGH /* Fall through */
6402
0
6403
0
      case OP_ASSERT_NA:
6404
0
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6405
0
      Feptr = P->eptr;
6406
0
      break;
6407
6408
      /* Atomic positive assertions are like OP_ONCE, except that in addition
6409
      the subject pointer must be put back to where it was at the start of the
6410
      assertion. For a variable lookbehind, check its end point. */
6411
6412
0
      case OP_ASSERTBACK:
6413
0
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6414
0
        RRETURN(MATCH_NOMATCH);
6415
0
      PCRE2_FALLTHROUGH /* Fall through */
6416
0
6417
0
      case OP_ASSERT:
6418
0
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6419
0
      Feptr = P->eptr;
6420
0
      PCRE2_FALLTHROUGH /* Fall through */
6421
6422
      /* For an atomic group, discard internal backtracking points. We must
6423
      also ensure that any remaining branches within the top-level of the group
6424
      are not tried. Do this by adjusting the code pointer within the backtrack
6425
      frame so that it points to the final branch. */
6426
6427
0
      case OP_ONCE:
6428
0
      Fback_frame = ((char *)F - (char *)P);
6429
0
      for (;;)
6430
0
        {
6431
0
        uint32_t y = GET(P->ecode,1);
6432
0
        if ((P->ecode)[y] != OP_ALT) break;
6433
0
        P->ecode += y;
6434
0
        }
6435
0
      break;
6436
6437
      /* A matching negative assertion returns MATCH, which is turned into
6438
      NOMATCH at the assertion level. For a variable lookbehind, check its end
6439
      point. */
6440
6441
0
      case OP_ASSERTBACK_NOT:
6442
0
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6443
0
        RRETURN(MATCH_NOMATCH);
6444
0
      PCRE2_FALLTHROUGH /* Fall through */
6445
0
6446
0
      case OP_ASSERT_NOT:
6447
0
      RRETURN(MATCH_MATCH);
6448
6449
      /* A scan substring group must preserve the current end_subject,
6450
      and restore it before the backtracking is performed into its sub
6451
      pattern. */
6452
6453
0
      case OP_ASSERT_SCS:
6454
0
      F->temp_sptr[0] = mb->end_subject;
6455
0
      mb->end_subject = P->temp_sptr[0];
6456
0
      mb->true_end_subject = mb->end_subject + P->temp_size;
6457
0
      Feptr = P->temp_sptr[1];
6458
6459
0
      RMATCH(Fecode + 1 + LINK_SIZE, RM39);
6460
6461
0
      mb->end_subject = F->temp_sptr[0];
6462
0
      mb->true_end_subject = mb->end_subject;
6463
0
      RRETURN(rrc);
6464
0
      break;
6465
6466
      /* At the end of a script run, apply the script-checking rules. This code
6467
      will never by exercised if Unicode support it not compiled, because in
6468
      that environment script runs cause an error at compile time. */
6469
6470
0
      case OP_SCRIPT_RUN:
6471
0
      if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
6472
0
      break;
6473
6474
      /* Whole-pattern recursion is coded as a recurse into group 0, and is
6475
      handled with OP_BRA above. Other recursion is handled here. */
6476
6477
0
      case OP_CBRA:
6478
0
      case OP_CBRAPOS:
6479
0
      case OP_SCBRA:
6480
0
      case OP_SCBRAPOS:
6481
0
      number = GET2(bracode, 1+LINK_SIZE);
6482
6483
      /* Handle a recursively called group. We reinstate the previous set of
6484
      captures and then carry on after the recursion call. */
6485
6486
0
      if (Fcurrent_recurse == number)
6487
0
        {
6488
0
        P = (heapframe *)((char *)N - frame_size);
6489
0
        Fecode = P->ecode + 1 + LINK_SIZE;
6490
6491
0
        if (*Fecode != OP_CREF)
6492
0
          {
6493
0
          memcpy(F->ovector, P->ovector, Foffset_top * sizeof(PCRE2_SIZE));
6494
0
          Foffset_top = P->offset_top;
6495
0
          }
6496
0
        else
6497
0
          recurse_update_offsets(F, P);
6498
6499
0
        Fcapture_last = P->capture_last;
6500
0
        Fcurrent_recurse = P->current_recurse;
6501
0
        continue;  /* With next opcode */
6502
0
        }
6503
6504
      /* Deal with actual capturing. */
6505
6506
0
      offset = (number << 1) - 2;
6507
0
      Fcapture_last = number;
6508
0
      Fovector[offset] = P->eptr - mb->start_subject;
6509
0
      Fovector[offset+1] = Feptr - mb->start_subject;
6510
0
      if (offset >= Foffset_top) Foffset_top = offset + 2;
6511
0
      break;
6512
0
      }  /* End actions relating to the starting opcode */
6513
6514
    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
6515
    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
6516
    at a time from the outer level. This must precede the empty string test -
6517
    in this case that test is done at the outer level. */
6518
6519
0
    if (*Fecode == OP_KETRPOS)
6520
0
      {
6521
0
      memcpy((char *)P + offsetof(heapframe, eptr),
6522
0
             (char *)F + offsetof(heapframe, eptr),
6523
0
             frame_copy_size);
6524
0
      RRETURN(MATCH_KETRPOS);
6525
0
      }
6526
6527
    /* Handle the different kinds of closing brackets. A non-repeating ket
6528
    needs no special action, just continuing at this level. This also happens
6529
    for the repeating kets if the group matched no characters, in order to
6530
    forcibly break infinite loops. Otherwise, the repeating kets try the rest
6531
    of the pattern or restart from the preceding bracket, in the appropriate
6532
    order. */
6533
6534
0
    if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
6535
0
      {
6536
0
      if (Fop == OP_KETRMIN)
6537
0
        {
6538
0
        RMATCH(Fecode + 1 + LINK_SIZE, RM6);
6539
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6540
0
        Fecode -= GET(Fecode, 1);
6541
0
        break;   /* End of ket processing */
6542
0
        }
6543
6544
      /* Repeat the maximum number of times (KETRMAX) */
6545
6546
0
      RMATCH(bracode, RM7);
6547
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6548
0
      }
6549
6550
    /* Carry on at this level for a non-repeating ket, or after matching an
6551
    empty string, or after repeating for a maximum number of times. */
6552
6553
0
    Fecode += 1 + LINK_SIZE;
6554
0
    break;
6555
6556
6557
    /* ===================================================================== */
6558
    /* Start and end of line assertions, not multiline mode. */
6559
6560
0
    case OP_CIRC:   /* Start of line, unless PCRE2_NOTBOL is set. */
6561
0
    if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
6562
0
      RRETURN(MATCH_NOMATCH);
6563
0
    Fecode++;
6564
0
    break;
6565
6566
0
    case OP_SOD:    /* Unconditional start of subject */
6567
0
    if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
6568
0
    Fecode++;
6569
0
    break;
6570
6571
    /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
6572
    terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
6573
6574
0
    case OP_DOLL:
6575
0
    if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6576
0
    if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
6577
6578
0
    PCRE2_FALLTHROUGH /* Fall through */
6579
0
    /* Unconditional end of subject assertion (\z). */
6580
0
6581
0
    case OP_EOD:
6582
0
    if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH);
6583
0
    if (mb->partial != 0)
6584
0
      {
6585
0
      mb->hitend = TRUE;
6586
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6587
0
      }
6588
0
    Fecode++;
6589
0
    break;
6590
6591
    /* End of subject or ending \n assertion (\Z) */
6592
6593
0
    case OP_EODN:
6594
0
    ASSERT_NL_OR_EOS:
6595
0
    if (Feptr < mb->true_end_subject &&
6596
0
        (!IS_NEWLINE(Feptr) || Feptr != mb->true_end_subject - mb->nllen))
6597
0
      {
6598
0
      if (mb->partial != 0 &&
6599
0
          Feptr + 1 >= mb->end_subject &&
6600
0
          NLBLOCK->nltype == NLTYPE_FIXED &&
6601
0
          NLBLOCK->nllen == 2 &&
6602
0
          UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6603
0
        {
6604
0
        mb->hitend = TRUE;
6605
0
        if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6606
0
        }
6607
0
      RRETURN(MATCH_NOMATCH);
6608
0
      }
6609
6610
    /* Either at end of string or \n before end. */
6611
6612
0
    if (mb->partial != 0)
6613
0
      {
6614
0
      mb->hitend = TRUE;
6615
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6616
0
      }
6617
0
    Fecode++;
6618
0
    break;
6619
6620
6621
    /* ===================================================================== */
6622
    /* Start and end of line assertions, multiline mode. */
6623
6624
    /* Start of subject unless notbol, or after any newline except for one at
6625
    the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
6626
6627
0
    case OP_CIRCM:
6628
0
    if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
6629
0
      RRETURN(MATCH_NOMATCH);
6630
0
    if (Feptr != mb->start_subject &&
6631
0
        ((Feptr == mb->end_subject &&
6632
0
           (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
6633
0
         !WAS_NEWLINE(Feptr)))
6634
0
      RRETURN(MATCH_NOMATCH);
6635
0
    Fecode++;
6636
0
    break;
6637
6638
    /* Assert before any newline, or before end of subject unless noteol is
6639
    set. */
6640
6641
0
    case OP_DOLLM:
6642
0
    if (Feptr < mb->end_subject)
6643
0
      {
6644
0
      if (!IS_NEWLINE(Feptr))
6645
0
        {
6646
0
        if (mb->partial != 0 &&
6647
0
            Feptr + 1 >= mb->end_subject &&
6648
0
            NLBLOCK->nltype == NLTYPE_FIXED &&
6649
0
            NLBLOCK->nllen == 2 &&
6650
0
            UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6651
0
          {
6652
0
          mb->hitend = TRUE;
6653
0
          if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6654
0
          }
6655
0
        RRETURN(MATCH_NOMATCH);
6656
0
        }
6657
0
      }
6658
0
    else
6659
0
      {
6660
0
      if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6661
0
      SCHECK_PARTIAL();
6662
0
      }
6663
0
    Fecode++;
6664
0
    break;
6665
6666
6667
    /* ===================================================================== */
6668
    /* Start of match assertion */
6669
6670
0
    case OP_SOM:
6671
0
    if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
6672
0
    Fecode++;
6673
0
    break;
6674
6675
6676
    /* ===================================================================== */
6677
    /* Reset the start of match point */
6678
6679
0
    case OP_SET_SOM:
6680
0
    Fstart_match = Feptr;
6681
0
    Fecode++;
6682
0
    break;
6683
6684
6685
    /* ===================================================================== */
6686
    /* Word boundary assertions. Find out if the previous and current
6687
    characters are "word" characters. It takes a bit more work in UTF mode.
6688
    Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
6689
    not set. When it is set, use Unicode properties if available, even when not
6690
    in UTF mode. Remember the earliest and latest consulted characters. */
6691
6692
0
    case OP_NOT_WORD_BOUNDARY:
6693
0
    case OP_WORD_BOUNDARY:
6694
0
    case OP_NOT_UCP_WORD_BOUNDARY:
6695
0
    case OP_UCP_WORD_BOUNDARY:
6696
0
    if (Feptr == mb->check_subject) prev_is_word = FALSE; else
6697
0
      {
6698
0
      PCRE2_SPTR lastptr = Feptr - 1;
6699
0
#ifdef SUPPORT_UNICODE
6700
0
      if (utf)
6701
0
        {
6702
0
        BACKCHAR(lastptr);
6703
0
        GETCHAR(fc, lastptr);
6704
0
        }
6705
0
      else
6706
0
#endif  /* SUPPORT_UNICODE */
6707
0
      fc = *lastptr;
6708
0
      if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
6709
0
#ifdef SUPPORT_UNICODE
6710
0
      if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
6711
0
        {
6712
0
        int chartype = UCD_CHARTYPE(fc);
6713
0
        int category = PRIV(ucp_gentype)[chartype];
6714
0
        prev_is_word = (category == ucp_L || category == ucp_N ||
6715
0
          chartype == ucp_Mn || chartype == ucp_Pc);
6716
0
        }
6717
0
      else
6718
0
#endif  /* SUPPORT_UNICODE */
6719
0
      prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6720
0
      }
6721
6722
    /* Get status of next character */
6723
6724
0
    if (Feptr >= mb->end_subject)
6725
0
      {
6726
0
      SCHECK_PARTIAL();
6727
0
      cur_is_word = FALSE;
6728
0
      }
6729
0
    else
6730
0
      {
6731
0
      PCRE2_SPTR nextptr = Feptr + 1;
6732
0
#ifdef SUPPORT_UNICODE
6733
0
      if (utf)
6734
0
        {
6735
0
        FORWARDCHARTEST(nextptr, mb->end_subject);
6736
0
        GETCHAR(fc, Feptr);
6737
0
        }
6738
0
      else
6739
0
#endif  /* SUPPORT_UNICODE */
6740
0
      fc = *Feptr;
6741
0
      if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
6742
0
#ifdef SUPPORT_UNICODE
6743
0
      if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
6744
0
        {
6745
0
        int chartype = UCD_CHARTYPE(fc);
6746
0
        int category = PRIV(ucp_gentype)[chartype];
6747
0
        cur_is_word = (category == ucp_L || category == ucp_N ||
6748
0
          chartype == ucp_Mn || chartype == ucp_Pc);
6749
0
        }
6750
0
      else
6751
0
#endif  /* SUPPORT_UNICODE */
6752
0
      cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6753
0
      }
6754
6755
    /* Now see if the situation is what we want */
6756
6757
0
    if ((*Fecode++ == OP_WORD_BOUNDARY || Fop == OP_UCP_WORD_BOUNDARY)?
6758
0
         cur_is_word == prev_is_word : cur_is_word != prev_is_word)
6759
0
      RRETURN(MATCH_NOMATCH);
6760
0
    break;
6761
6762
6763
    /* ===================================================================== */
6764
    /* Backtracking (*VERB)s, with and without arguments. Note that if the
6765
    pattern is successfully matched, we do not come back from RMATCH. */
6766
6767
0
    case OP_MARK:
6768
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6769
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
6770
6771
    /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
6772
    argument, and we must check whether that argument matches this MARK's
6773
    argument. It is passed back in mb->verb_skip_ptr. If it does match, we
6774
    return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
6775
    position that corresponds to this mark. Otherwise, pass back the return
6776
    code unaltered. */
6777
6778
0
    if (rrc == MATCH_SKIP_ARG &&
6779
0
             PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
6780
0
      {
6781
0
      mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6782
0
      RRETURN(MATCH_SKIP);
6783
0
      }
6784
0
    RRETURN(rrc);
6785
6786
0
    case OP_FAIL:
6787
0
    RRETURN(MATCH_NOMATCH);
6788
6789
    /* Record the current recursing group number in mb->verb_current_recurse
6790
    when a backtracking return such as MATCH_COMMIT is given. This enables the
6791
    recurse processing to catch verbs from within the recursion. */
6792
6793
0
    case OP_COMMIT:
6794
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
6795
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6796
0
    mb->verb_current_recurse = Fcurrent_recurse;
6797
0
    RRETURN(MATCH_COMMIT);
6798
6799
0
    case OP_COMMIT_ARG:
6800
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6801
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
6802
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6803
0
    mb->verb_current_recurse = Fcurrent_recurse;
6804
0
    RRETURN(MATCH_COMMIT);
6805
6806
0
    case OP_PRUNE:
6807
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
6808
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6809
0
    mb->verb_current_recurse = Fcurrent_recurse;
6810
0
    RRETURN(MATCH_PRUNE);
6811
6812
0
    case OP_PRUNE_ARG:
6813
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6814
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
6815
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6816
0
    mb->verb_current_recurse = Fcurrent_recurse;
6817
0
    RRETURN(MATCH_PRUNE);
6818
6819
0
    case OP_SKIP:
6820
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
6821
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6822
0
    mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6823
0
    mb->verb_current_recurse = Fcurrent_recurse;
6824
0
    RRETURN(MATCH_SKIP);
6825
6826
    /* Note that, for Perl compatibility, SKIP with an argument does NOT set
6827
    nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
6828
    not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
6829
    that failed and any that precede it (either they also failed, or were not
6830
    triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
6831
    SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
6832
    set to the count of the one that failed. */
6833
6834
0
    case OP_SKIP_ARG:
6835
0
    mb->skip_arg_count++;
6836
0
    if (mb->skip_arg_count <= mb->ignore_skip_arg)
6837
0
      {
6838
0
      Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
6839
0
      break;
6840
0
      }
6841
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
6842
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6843
6844
    /* Pass back the current skip name and return the special MATCH_SKIP_ARG
6845
    return code. This will either be caught by a matching MARK, or get to the
6846
    top, where it causes a rematch with mb->ignore_skip_arg set to the value of
6847
    mb->skip_arg_count. */
6848
6849
0
    mb->verb_skip_ptr = Fecode + 2;
6850
0
    mb->verb_current_recurse = Fcurrent_recurse;
6851
0
    RRETURN(MATCH_SKIP_ARG);
6852
6853
    /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
6854
    the branch in which it occurs can be determined. */
6855
6856
0
    case OP_THEN:
6857
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
6858
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6859
0
    mb->verb_ecode_ptr = Fecode;
6860
0
    mb->verb_current_recurse = Fcurrent_recurse;
6861
0
    RRETURN(MATCH_THEN);
6862
6863
0
    case OP_THEN_ARG:
6864
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6865
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
6866
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6867
0
    mb->verb_ecode_ptr = Fecode;
6868
0
    mb->verb_current_recurse = Fcurrent_recurse;
6869
0
    RRETURN(MATCH_THEN);
6870
6871
6872
    /* ===================================================================== */
6873
    /* There's been some horrible disaster. Arrival here can only mean there is
6874
    something seriously wrong in the code above or the OP_xxx definitions. */
6875
6876
    /* LCOV_EXCL_START */
6877
0
    default:
6878
0
    PCRE2_DEBUG_UNREACHABLE();
6879
0
    return PCRE2_ERROR_INTERNAL;
6880
    /* LCOV_EXCL_STOP */
6881
0
    }
6882
6883
  /* Do not insert any code in here without much thought; it is assumed
6884
  that "continue" in the code above comes out to here to repeat the main
6885
  loop. */
6886
6887
0
  }  /* End of main loop */
6888
6889
0
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
6890
6891
/* ========================================================================= */
6892
/* The RRETURN() macro jumps here. The number that is saved in Freturn_id
6893
indicates which label we actually want to return to. The value in Frdepth is
6894
the index number of the frame in the vector. The return value has been placed
6895
in rrc. */
6896
6897
0
#define LBL(val) case val: goto L_RM##val;
6898
6899
0
RETURN_SWITCH:
6900
0
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6901
0
if (Frdepth == 0) return rrc;                     /* Exit from the top level */
6902
0
F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
6903
0
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
6904
6905
#ifdef DEBUG_SHOW_RMATCH
6906
fprintf(stderr, "++ RETURN %d to RM%d\n", rrc, Freturn_id);
6907
#endif
6908
6909
0
switch (Freturn_id)
6910
0
  {
6911
0
  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6912
0
  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
6913
0
  LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
6914
0
  LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
6915
0
  LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39)
6916
6917
0
#ifdef SUPPORT_WIDE_CHARS
6918
0
  LBL(100) LBL(101) LBL(102) LBL(103)
6919
0
#endif
6920
6921
0
#ifdef SUPPORT_UNICODE
6922
0
  LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
6923
0
  LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
6924
0
  LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
6925
0
  LBL(221) LBL(222) LBL(223) LBL(224)
6926
0
#endif
6927
6928
  /* LCOV_EXCL_START */
6929
0
  default:
6930
0
  PCRE2_DEBUG_UNREACHABLE();
6931
0
  return PCRE2_ERROR_INTERNAL;
6932
  /* LCOV_EXCL_STOP */
6933
0
  }
6934
0
#undef LBL
6935
0
}
6936
6937
6938
/*************************************************
6939
*           Match a Regular Expression           *
6940
*************************************************/
6941
6942
/* This function applies a compiled pattern to a subject string and picks out
6943
portions of the string if it matches. Two elements in the vector are set for
6944
each substring: the offsets to the start and end of the substring.
6945
6946
Arguments:
6947
  code            points to the compiled expression
6948
  subject         points to the subject string
6949
  length          length of subject string (may contain binary zeros)
6950
  start_offset    where to start in the subject string
6951
  options         option bits
6952
  match_data      points to a match_data block
6953
  mcontext        points a PCRE2 context
6954
6955
Returns:          > 0 => success; value is the number of ovector pairs filled
6956
                  = 0 => success, but ovector is not big enough
6957
                  = -1 => failed to match (PCRE2_ERROR_NOMATCH)
6958
                  = -2 => partial match (PCRE2_ERROR_PARTIAL)
6959
                  < -2 => some kind of unexpected problem
6960
*/
6961
6962
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
6963
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6964
  PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6965
  pcre2_match_context *mcontext)
6966
0
{
6967
0
int rc;
6968
0
const uint8_t *start_bits = NULL;
6969
0
const pcre2_real_code *re = (const pcre2_real_code *)code;
6970
0
uint32_t original_options = options;
6971
6972
0
BOOL anchored;
6973
0
BOOL firstline;
6974
0
BOOL has_first_cu = FALSE;
6975
0
BOOL has_req_cu = FALSE;
6976
0
BOOL startline;
6977
6978
0
#if PCRE2_CODE_UNIT_WIDTH == 8
6979
0
PCRE2_SPTR memchr_found_first_cu;
6980
0
PCRE2_SPTR memchr_found_first_cu2;
6981
0
#endif
6982
6983
0
PCRE2_UCHAR first_cu = 0;
6984
0
PCRE2_UCHAR first_cu2 = 0;
6985
0
PCRE2_UCHAR req_cu = 0;
6986
0
PCRE2_UCHAR req_cu2 = 0;
6987
6988
0
PCRE2_UCHAR null_str[1] = { 0xcd };
6989
0
PCRE2_SPTR original_subject = subject;
6990
0
PCRE2_SPTR bumpalong_limit;
6991
0
PCRE2_SPTR end_subject;
6992
0
PCRE2_SPTR true_end_subject;
6993
0
PCRE2_SPTR start_match;
6994
0
PCRE2_SPTR req_cu_ptr;
6995
0
PCRE2_SPTR start_partial;
6996
0
PCRE2_SPTR match_partial;
6997
6998
#ifdef SUPPORT_JIT
6999
BOOL use_jit;
7000
#endif
7001
7002
/* This flag is needed even when Unicode is not supported for convenience
7003
(it is used by the IS_NEWLINE macro). */
7004
7005
0
BOOL utf = FALSE;
7006
7007
0
#ifdef SUPPORT_UNICODE
7008
0
BOOL ucp = FALSE;
7009
0
BOOL allow_invalid;
7010
0
uint32_t fragment_options = 0;
7011
#ifdef SUPPORT_JIT
7012
BOOL jit_checked_utf = FALSE;
7013
#endif
7014
0
#endif  /* SUPPORT_UNICODE */
7015
7016
0
PCRE2_SIZE frame_size;
7017
0
PCRE2_SIZE heapframes_size;
7018
7019
/* We need to have mb as a pointer to a match block, because the IS_NEWLINE
7020
macro is used below, and it expects NLBLOCK to be defined as a pointer. */
7021
7022
0
pcre2_callout_block cb;
7023
0
match_block actual_match_block;
7024
0
match_block *mb = &actual_match_block;
7025
7026
/* Recognize NULL, length 0 as an empty string. */
7027
7028
0
if (subject == NULL && length == 0) subject = null_str;
7029
7030
/* Plausibility checks */
7031
7032
0
if (match_data == NULL) return PCRE2_ERROR_NULL;
7033
0
if (code == NULL || subject == NULL)
7034
0
  return match_data->rc = PCRE2_ERROR_NULL;
7035
0
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0)
7036
0
  return match_data->rc = PCRE2_ERROR_BADOPTION;
7037
7038
0
start_match = subject + start_offset;
7039
0
req_cu_ptr = start_match - 1;
7040
0
if (length == PCRE2_ZERO_TERMINATED)
7041
0
  {
7042
0
  length = PRIV(strlen)(subject);
7043
0
  }
7044
0
true_end_subject = end_subject = subject + length;
7045
7046
0
if (start_offset > length) return match_data->rc = PCRE2_ERROR_BADOFFSET;
7047
7048
/* Check that the first field in the block is the magic number. */
7049
7050
0
if (re->magic_number != MAGIC_NUMBER)
7051
0
  return match_data->rc = PCRE2_ERROR_BADMAGIC;
7052
7053
/* Check the code unit width. */
7054
7055
0
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
7056
0
  return match_data->rc = PCRE2_ERROR_BADMODE;
7057
7058
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
7059
options variable for this function. Users of PCRE2 who are not calling the
7060
function directly would like to have a way of setting these flags, in the same
7061
way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with
7062
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
7063
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
7064
transfer to the options for this function. The bits are guaranteed to be
7065
adjacent, but do not have the same values. This bit of Boolean trickery assumes
7066
that the match-time bits are not more significant than the flag bits. If by
7067
accident this is not the case, a compile-time division by zero error will
7068
occur. */
7069
7070
0
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
7071
0
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
7072
0
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
7073
0
#undef FF
7074
0
#undef OO
7075
7076
/* If the pattern was successfully studied with JIT support, we will run the
7077
JIT executable instead of the rest of this function. Most options must be set
7078
at compile time for the JIT code to be usable. */
7079
7080
#ifdef SUPPORT_JIT
7081
use_jit = (re->executable_jit != NULL &&
7082
          (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
7083
#endif
7084
7085
/* Initialize UTF/UCP parameters. */
7086
7087
0
#ifdef SUPPORT_UNICODE
7088
0
utf = (re->overall_options & PCRE2_UTF) != 0;
7089
0
allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
7090
0
ucp = (re->overall_options & PCRE2_UCP) != 0;
7091
0
#endif  /* SUPPORT_UNICODE */
7092
7093
/* Convert the partial matching flags into an integer. */
7094
7095
0
mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
7096
0
              ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
7097
7098
/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
7099
time. */
7100
7101
0
if (mb->partial != 0 &&
7102
0
   ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
7103
0
  return match_data->rc = PCRE2_ERROR_BADOPTION;
7104
7105
/* It is an error to set an offset limit without setting the flag at compile
7106
time. */
7107
7108
0
if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
7109
0
     (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
7110
0
  return match_data->rc = PCRE2_ERROR_BADOFFSETLIMIT;
7111
7112
/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
7113
free the memory that was obtained. Set the field to NULL for match error
7114
cases. */
7115
7116
0
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
7117
0
  {
7118
0
  match_data->memctl.free((void *)match_data->subject,
7119
0
    match_data->memctl.memory_data);
7120
0
  match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
7121
0
  }
7122
0
match_data->subject = NULL;
7123
7124
/* Zero the error offset in case the first code unit is invalid UTF. */
7125
7126
0
match_data->startchar = 0;
7127
7128
7129
/* ============================= JIT matching ============================== */
7130
7131
/* Prepare for JIT matching. Check a UTF string for validity unless no check is
7132
requested or invalid UTF can be handled. We check only the portion of the
7133
subject that might be be inspected during matching - from the offset minus the
7134
maximum lookbehind to the given length. This saves time when a small part of a
7135
large subject is being matched by the use of a starting offset. Note that the
7136
maximum lookbehind is a number of characters, not code units. */
7137
7138
#ifdef SUPPORT_JIT
7139
if (use_jit)
7140
  {
7141
#ifdef SUPPORT_UNICODE
7142
  if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
7143
    {
7144
7145
    /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
7146
    character start. */
7147
7148
#if PCRE2_CODE_UNIT_WIDTH != 32
7149
    if (start_match < end_subject && NOT_FIRSTCU(*start_match))
7150
      {
7151
      if (start_offset > 0) return match_data->rc = PCRE2_ERROR_BADUTFOFFSET;
7152
#if PCRE2_CODE_UNIT_WIDTH == 8
7153
      return match_data->rc = PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
7154
#else
7155
      return match_data->rc = PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
7156
#endif
7157
      }
7158
#endif  /* WIDTH != 32 */
7159
7160
    /* Move back by the maximum lookbehind, just in case it happens at the very
7161
    start of matching. */
7162
7163
#if PCRE2_CODE_UNIT_WIDTH != 32
7164
    for (unsigned int i = re->max_lookbehind; i > 0 && start_match > subject; i--)
7165
      {
7166
      start_match--;
7167
      while (start_match > subject &&
7168
#if PCRE2_CODE_UNIT_WIDTH == 8
7169
      (*start_match & 0xc0) == 0x80)
7170
#else  /* 16-bit */
7171
      (*start_match & 0xfc00) == 0xdc00)
7172
#endif
7173
        start_match--;
7174
      }
7175
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7176
7177
    /* In the 32-bit library, one code unit equals one character. However,
7178
    we cannot just subtract the lookbehind and then compare pointers, because
7179
    a very large lookbehind could create an invalid pointer. */
7180
7181
    if (start_offset >= re->max_lookbehind)
7182
      start_match -= re->max_lookbehind;
7183
    else
7184
      start_match = subject;
7185
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7186
7187
    /* Validate the relevant portion of the subject. Adjust the offset of an
7188
    invalid code point to be an absolute offset in the whole string. */
7189
7190
    rc = PRIV(valid_utf)(start_match,
7191
      length - (start_match - subject), &(match_data->startchar));
7192
    if (rc != 0)
7193
      {
7194
      match_data->startchar += start_match - subject;
7195
      return match_data->rc = rc;
7196
      }
7197
    jit_checked_utf = TRUE;
7198
    }
7199
#endif  /* SUPPORT_UNICODE */
7200
7201
  /* If JIT returns BADOPTION, which means that the selected complete or
7202
  partial matching mode was not compiled, fall through to the interpreter. */
7203
7204
  rc = pcre2_jit_match(code, subject, length, start_offset, options,
7205
    match_data, mcontext);
7206
  if (rc != PCRE2_ERROR_JIT_BADOPTION)
7207
    {
7208
    match_data->options = original_options;
7209
    if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
7210
      {
7211
      if (length != 0)
7212
        {
7213
        match_data->subject = match_data->memctl.malloc(CU2BYTES(length),
7214
          match_data->memctl.memory_data);
7215
        if (match_data->subject == NULL)
7216
          return match_data->rc = PCRE2_ERROR_NOMEMORY;
7217
        memcpy((void *)match_data->subject, subject, CU2BYTES(length));
7218
        }
7219
      else
7220
        match_data->subject = NULL;
7221
      match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
7222
      }
7223
    else
7224
      {
7225
      /* When pcre2_jit_match sets the subject, it doesn't know what the
7226
      original passed-in pointer was. */
7227
      if (match_data->subject != NULL) match_data->subject = original_subject;
7228
      }
7229
    return rc;
7230
    }
7231
  }
7232
#endif  /* SUPPORT_JIT */
7233
7234
/* ========================= End of JIT matching ========================== */
7235
7236
7237
/* Proceed with non-JIT matching. The default is to allow lookbehinds to the
7238
start of the subject. A UTF check when there is a non-zero offset may change
7239
this. */
7240
7241
0
mb->check_subject = subject;
7242
7243
/* If a UTF subject string was not checked for validity in the JIT code above,
7244
check it here, and handle support for invalid UTF strings. The check above
7245
happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
7246
If we get here in those circumstances, it means the subject string is valid,
7247
but for some reason JIT matching was not successful. There is no need to check
7248
the subject again.
7249
7250
We check only the portion of the subject that might be be inspected during
7251
matching - from the offset minus the maximum lookbehind to the given length.
7252
This saves time when a small part of a large subject is being matched by the
7253
use of a starting offset. Note that the maximum lookbehind is a number of
7254
characters, not code units.
7255
7256
Note also that support for invalid UTF forces a check, overriding the setting
7257
of PCRE2_NO_CHECK_UTF. */
7258
7259
0
#ifdef SUPPORT_UNICODE
7260
0
if (utf &&
7261
#ifdef SUPPORT_JIT
7262
    !jit_checked_utf &&
7263
#endif
7264
0
    ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
7265
0
  {
7266
0
#if PCRE2_CODE_UNIT_WIDTH != 32
7267
0
  BOOL skipped_bad_start = FALSE;
7268
0
#endif
7269
7270
  /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
7271
  character start. If we are handling invalid UTF, just skip over such code
7272
  units. Otherwise, give an appropriate error. */
7273
7274
0
#if PCRE2_CODE_UNIT_WIDTH != 32
7275
0
  if (allow_invalid)
7276
0
    {
7277
0
    while (start_match < end_subject && NOT_FIRSTCU(*start_match))
7278
0
      {
7279
0
      start_match++;
7280
0
      skipped_bad_start = TRUE;
7281
0
      }
7282
0
    }
7283
0
  else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
7284
0
    {
7285
0
    if (start_offset > 0) return match_data->rc = PCRE2_ERROR_BADUTFOFFSET;
7286
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7287
0
    return match_data->rc = PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
7288
#else
7289
    return match_data->rc = PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
7290
#endif
7291
0
    }
7292
0
#endif  /* WIDTH != 32 */
7293
7294
  /* The mb->check_subject field points to the start of UTF checking;
7295
  lookbehinds can go back no further than this. */
7296
7297
0
  mb->check_subject = start_match;
7298
7299
  /* Move back by the maximum lookbehind, just in case it happens at the very
7300
  start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
7301
  units above. */
7302
7303
0
#if PCRE2_CODE_UNIT_WIDTH != 32
7304
0
  if (!skipped_bad_start)
7305
0
    {
7306
0
    unsigned int i;
7307
0
    for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
7308
0
      {
7309
0
      mb->check_subject--;
7310
0
      while (mb->check_subject > subject &&
7311
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7312
0
      (*mb->check_subject & 0xc0) == 0x80)
7313
#else  /* 16-bit */
7314
      (*mb->check_subject & 0xfc00) == 0xdc00)
7315
#endif
7316
0
        mb->check_subject--;
7317
0
      }
7318
0
    }
7319
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7320
7321
  /* In the 32-bit library, one code unit equals one character. However,
7322
  we cannot just subtract the lookbehind and then compare pointers, because
7323
  a very large lookbehind could create an invalid pointer. */
7324
7325
  if (start_offset >= re->max_lookbehind)
7326
    mb->check_subject -= re->max_lookbehind;
7327
  else
7328
    mb->check_subject = subject;
7329
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7330
7331
  /* Validate the relevant portion of the subject. There's a loop in case we
7332
  encounter bad UTF in the characters preceding start_match which we are
7333
  scanning because of a lookbehind. */
7334
7335
0
  for (;;)
7336
0
    {
7337
0
    rc = PRIV(valid_utf)(mb->check_subject,
7338
0
      length - (mb->check_subject - subject), &(match_data->startchar));
7339
7340
0
    if (rc == 0) break;   /* Valid UTF string */
7341
7342
    /* Invalid UTF string. Adjust the offset to be an absolute offset in the
7343
    whole string. If we are handling invalid UTF strings, set end_subject to
7344
    stop before the bad code unit, and set the options to "not end of line".
7345
    Otherwise return the error. */
7346
7347
0
    match_data->startchar += mb->check_subject - subject;
7348
0
    if (!allow_invalid || rc > 0) return match_data->rc = rc;
7349
0
    end_subject = subject + match_data->startchar;
7350
7351
    /* If the end precedes start_match, it means there is invalid UTF in the
7352
    extra code units we reversed over because of a lookbehind. Advance past the
7353
    first bad code unit, and then skip invalid character starting code units in
7354
    8-bit and 16-bit modes, and try again with the original end point. */
7355
7356
0
    if (end_subject < start_match)
7357
0
      {
7358
0
      mb->check_subject = end_subject + 1;
7359
0
#if PCRE2_CODE_UNIT_WIDTH != 32
7360
0
      while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
7361
0
        mb->check_subject++;
7362
0
#endif
7363
0
      end_subject = true_end_subject;
7364
0
      }
7365
7366
    /* Otherwise, set the not end of line option, and do the match. */
7367
7368
0
    else
7369
0
      {
7370
0
      fragment_options = PCRE2_NOTEOL;
7371
0
      break;
7372
0
      }
7373
0
    }
7374
0
  }
7375
0
#endif  /* SUPPORT_UNICODE */
7376
7377
/* A NULL match context means "use a default context", but we take the memory
7378
control functions from the pattern. */
7379
7380
0
if (mcontext == NULL)
7381
0
  {
7382
0
  mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
7383
0
  mb->memctl = re->memctl;
7384
0
  }
7385
0
else mb->memctl = mcontext->memctl;
7386
7387
0
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
7388
0
firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0;
7389
0
startline = (re->flags & PCRE2_STARTLINE) != 0;
7390
0
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
7391
0
  true_end_subject : subject + mcontext->offset_limit;
7392
7393
/* Initialize and set up the fixed fields in the callout block, with a pointer
7394
in the match block. */
7395
7396
0
mb->cb = &cb;
7397
0
cb.version = 2;
7398
0
cb.subject = subject;
7399
0
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
7400
0
cb.callout_flags = 0;
7401
7402
/* Fill in the remaining fields in the match block, except for moptions, which
7403
gets set later. */
7404
7405
0
mb->callout = mcontext->callout;
7406
0
mb->callout_data = mcontext->callout_data;
7407
7408
0
mb->start_subject = subject;
7409
0
mb->start_offset = start_offset;
7410
0
mb->end_subject = end_subject;
7411
0
mb->true_end_subject = true_end_subject;
7412
0
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
7413
0
mb->hasbsk = (re->flags & PCRE2_HASBSK) != 0;
7414
0
mb->allowemptypartial = (re->max_lookbehind > 0) ||
7415
0
    (re->flags & PCRE2_MATCH_EMPTY) != 0;
7416
0
mb->allowlookaroundbsk =
7417
0
  (re->extra_options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) != 0;
7418
0
mb->poptions = re->overall_options;          /* Pattern options */
7419
0
mb->ignore_skip_arg = 0;
7420
0
mb->mark = mb->nomatch_mark = NULL;          /* In case never set */
7421
7422
/* The name table is needed for finding all the numbers associated with a
7423
given name, for condition testing. The code follows the name table. */
7424
7425
0
mb->name_table = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code));
7426
0
mb->name_count = re->name_count;
7427
0
mb->name_entry_size = re->name_entry_size;
7428
0
mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start);
7429
7430
/* Process the \R and newline settings. */
7431
7432
0
mb->bsr_convention = re->bsr_convention;
7433
0
mb->nltype = NLTYPE_FIXED;
7434
0
switch(re->newline_convention)
7435
0
  {
7436
0
  case PCRE2_NEWLINE_CR:
7437
0
  mb->nllen = 1;
7438
0
  mb->nl[0] = CHAR_CR;
7439
0
  break;
7440
7441
0
  case PCRE2_NEWLINE_LF:
7442
0
  mb->nllen = 1;
7443
0
  mb->nl[0] = CHAR_NL;
7444
0
  break;
7445
7446
0
  case PCRE2_NEWLINE_NUL:
7447
0
  mb->nllen = 1;
7448
0
  mb->nl[0] = CHAR_NUL;
7449
0
  break;
7450
7451
0
  case PCRE2_NEWLINE_CRLF:
7452
0
  mb->nllen = 2;
7453
0
  mb->nl[0] = CHAR_CR;
7454
0
  mb->nl[1] = CHAR_NL;
7455
0
  break;
7456
7457
0
  case PCRE2_NEWLINE_ANY:
7458
0
  mb->nltype = NLTYPE_ANY;
7459
0
  break;
7460
7461
0
  case PCRE2_NEWLINE_ANYCRLF:
7462
0
  mb->nltype = NLTYPE_ANYCRLF;
7463
0
  break;
7464
7465
  /* LCOV_EXCL_START */
7466
0
  default:
7467
0
  PCRE2_DEBUG_UNREACHABLE();
7468
0
  return match_data->rc = PCRE2_ERROR_INTERNAL;
7469
  /* LCOV_EXCL_STOP */
7470
0
  }
7471
7472
/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
7473
vector at the end, whose size depends on the number of capturing parentheses in
7474
the pattern. It is not used at all if there are no capturing parentheses.
7475
7476
  frame_size                   is the total size of each frame
7477
  match_data->heapframes       is the pointer to the frames vector
7478
  match_data->heapframes_size  is the allocated size of the vector
7479
7480
We must pad the frame_size for alignment to ensure subsequent frames are as
7481
aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
7482
array, that does not guarantee it is suitably aligned for pointers, as some
7483
architectures have pointers that are larger than a size_t. */
7484
7485
0
frame_size = (offsetof(heapframe, ovector) +
7486
0
  re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) &
7487
0
  ~(HEAPFRAME_ALIGNMENT - 1);
7488
7489
/* Limits set in the pattern override the match context only if they are
7490
smaller. */
7491
7492
0
mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)?
7493
0
  mcontext->heap_limit : re->limit_heap);
7494
7495
0
mb->match_limit = (mcontext->match_limit < re->limit_match)?
7496
0
  mcontext->match_limit : re->limit_match;
7497
7498
0
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
7499
0
  mcontext->depth_limit : re->limit_depth;
7500
7501
/* If a pattern has very many capturing parentheses, the frame size may be very
7502
large. Set the initial frame vector size to ensure that there are at least 10
7503
available frames, but enforce a minimum of START_FRAMES_SIZE. If this is
7504
greater than the heap limit, get as large a vector as possible. */
7505
7506
0
heapframes_size = frame_size * 10;
7507
0
if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE;
7508
0
if (heapframes_size / 1024 > mb->heap_limit)
7509
0
  {
7510
0
  PCRE2_SIZE max_size = 1024 * mb->heap_limit;
7511
0
  if (max_size < frame_size) return match_data->rc = PCRE2_ERROR_HEAPLIMIT;
7512
0
  heapframes_size = max_size;
7513
0
  }
7514
7515
/* If an existing frame vector in the match_data block is large enough, we can
7516
use it. Otherwise, free any pre-existing vector and get a new one. */
7517
7518
0
if (match_data->heapframes_size < heapframes_size)
7519
0
  {
7520
0
  match_data->memctl.free(match_data->heapframes,
7521
0
    match_data->memctl.memory_data);
7522
0
  match_data->heapframes = match_data->memctl.malloc(heapframes_size,
7523
0
    match_data->memctl.memory_data);
7524
0
  if (match_data->heapframes == NULL)
7525
0
    {
7526
0
    match_data->heapframes_size = 0;
7527
0
    return match_data->rc = PCRE2_ERROR_NOMEMORY;
7528
0
    }
7529
0
  match_data->heapframes_size = heapframes_size;
7530
0
  }
7531
7532
/* Write to the ovector within the first frame to mark every capture unset and
7533
to avoid uninitialized memory read errors when it is copied to a new frame. */
7534
7535
0
memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff,
7536
0
  frame_size - offsetof(heapframe, ovector));
7537
7538
/* Pointers to the individual character tables */
7539
7540
0
mb->lcc = re->tables + lcc_offset;
7541
0
mb->fcc = re->tables + fcc_offset;
7542
0
mb->ctypes = re->tables + ctypes_offset;
7543
7544
/* Set up the first code unit to match, if available. If there's no first code
7545
unit there may be a bitmap of possible first characters. */
7546
7547
0
if ((re->flags & PCRE2_FIRSTSET) != 0)
7548
0
  {
7549
0
  has_first_cu = TRUE;
7550
0
  first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
7551
0
  if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
7552
0
    {
7553
0
    first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
7554
0
#ifdef SUPPORT_UNICODE
7555
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7556
0
    if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
7557
#else
7558
    if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
7559
#endif
7560
0
#endif  /* SUPPORT_UNICODE */
7561
0
    }
7562
0
  }
7563
0
else
7564
0
  if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
7565
0
    start_bits = re->start_bitmap;
7566
7567
/* There may also be a "last known required character" set. */
7568
7569
0
if ((re->flags & PCRE2_LASTSET) != 0)
7570
0
  {
7571
0
  has_req_cu = TRUE;
7572
0
  req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
7573
0
  if ((re->flags & PCRE2_LASTCASELESS) != 0)
7574
0
    {
7575
0
    req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
7576
0
#ifdef SUPPORT_UNICODE
7577
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7578
0
    if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
7579
#else
7580
    if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
7581
#endif
7582
0
#endif  /* SUPPORT_UNICODE */
7583
0
    }
7584
0
  }
7585
7586
7587
/* ==========================================================================*/
7588
7589
/* Loop for handling unanchored repeated matching attempts; for anchored regexs
7590
the loop runs just once. */
7591
7592
0
#ifdef SUPPORT_UNICODE
7593
0
FRAGMENT_RESTART:
7594
0
#endif
7595
7596
0
start_partial = match_partial = NULL;
7597
0
mb->hitend = FALSE;
7598
7599
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7600
0
memchr_found_first_cu = NULL;
7601
0
memchr_found_first_cu2 = NULL;
7602
0
#endif
7603
7604
0
for(;;)
7605
0
  {
7606
0
  PCRE2_SPTR new_start_match;
7607
7608
  /* ----------------- Start of match optimizations ---------------- */
7609
7610
  /* There are some optimizations that avoid running the match if a known
7611
  starting point is not found, or if a known later code unit is not present.
7612
  However, there is an option (settable at compile time) that disables these,
7613
  for testing and for ensuring that all callouts do actually occur. */
7614
7615
0
  if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
7616
0
    {
7617
    /* If firstline is TRUE, the start of the match is constrained to the first
7618
    line of a multiline string. That is, the match must be before or at the
7619
    first newline following the start of matching. Temporarily adjust
7620
    end_subject so that we stop the scans for a first code unit at a newline.
7621
    If the match fails at the newline, later code breaks the loop. */
7622
7623
0
    if (firstline)
7624
0
      {
7625
0
      PCRE2_SPTR t = start_match;
7626
0
#ifdef SUPPORT_UNICODE
7627
0
      if (utf)
7628
0
        {
7629
0
        while (t < end_subject && !IS_NEWLINE(t))
7630
0
          {
7631
0
          t++;
7632
0
          ACROSSCHAR(t < end_subject, t, t++);
7633
0
          }
7634
0
        }
7635
0
      else
7636
0
#endif
7637
0
      while (t < end_subject && !IS_NEWLINE(t)) t++;
7638
0
      end_subject = t;
7639
0
      }
7640
7641
    /* Anchored: check the first code unit if one is recorded. This may seem
7642
    pointless but it can help in detecting a no match case without scanning for
7643
    the required code unit. */
7644
7645
0
    if (anchored)
7646
0
      {
7647
0
      if (has_first_cu || start_bits != NULL)
7648
0
        {
7649
0
        BOOL ok = start_match < end_subject;
7650
0
        if (ok)
7651
0
          {
7652
0
          PCRE2_UCHAR c = UCHAR21TEST(start_match);
7653
0
          ok = has_first_cu && (c == first_cu || c == first_cu2);
7654
0
          if (!ok && start_bits != NULL)
7655
0
            {
7656
#if PCRE2_CODE_UNIT_WIDTH != 8
7657
            if (c > 255) c = 255;
7658
#endif
7659
0
            ok = (start_bits[c/8] & (1u << (c&7))) != 0;
7660
0
            }
7661
0
          }
7662
0
        if (!ok)
7663
0
          {
7664
0
          rc = MATCH_NOMATCH;
7665
0
          break;
7666
0
          }
7667
0
        }
7668
0
      }
7669
7670
    /* Not anchored. Advance to a unique first code unit if there is one. */
7671
7672
0
    else
7673
0
      {
7674
0
      if (has_first_cu)
7675
0
        {
7676
0
        if (first_cu != first_cu2)  /* Caseless */
7677
0
          {
7678
          /* In 16-bit and 32_bit modes we have to do our own search, so can
7679
          look for both cases at once. */
7680
7681
#if PCRE2_CODE_UNIT_WIDTH != 8
7682
          PCRE2_UCHAR smc;
7683
          while (start_match < end_subject &&
7684
                (smc = UCHAR21TEST(start_match)) != first_cu &&
7685
                 smc != first_cu2)
7686
            start_match++;
7687
#else
7688
          /* In 8-bit mode, the use of memchr() gives a big speed up, even
7689
          though we have to call it twice in order to find the earliest
7690
          occurrence of the code unit in either of its cases. Caching is used
7691
          to remember the positions of previously found code units. This can
7692
          make a huge difference when the strings are very long and only one
7693
          case is actually present. */
7694
7695
0
          PCRE2_SPTR pp1 = NULL;
7696
0
          PCRE2_SPTR pp2 = NULL;
7697
0
          PCRE2_SIZE searchlength = end_subject - start_match;
7698
7699
          /* If we haven't got a previously found position for first_cu, or if
7700
          the current starting position is later, we need to do a search. If
7701
          the code unit is not found, set it to the end. */
7702
7703
0
          if (memchr_found_first_cu == NULL ||
7704
0
              start_match > memchr_found_first_cu)
7705
0
            {
7706
0
            pp1 = memchr(start_match, first_cu, searchlength);
7707
0
            memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
7708
0
            }
7709
7710
          /* If the start is before a previously found position, use the
7711
          previous position, or NULL if a previous search failed. */
7712
7713
0
          else pp1 = (memchr_found_first_cu == end_subject)? NULL :
7714
0
            memchr_found_first_cu;
7715
7716
          /* Do the same thing for the other case. */
7717
7718
0
          if (memchr_found_first_cu2 == NULL ||
7719
0
              start_match > memchr_found_first_cu2)
7720
0
            {
7721
0
            pp2 = memchr(start_match, first_cu2, searchlength);
7722
0
            memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
7723
0
            }
7724
7725
0
          else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
7726
0
            memchr_found_first_cu2;
7727
7728
          /* Set the start to the end of the subject if neither case was found.
7729
          Otherwise, use the earlier found point. */
7730
7731
0
          if (pp1 == NULL)
7732
0
            start_match = (pp2 == NULL)? end_subject : pp2;
7733
0
          else
7734
0
            start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
7735
7736
0
#endif  /* 8-bit handling */
7737
0
          }
7738
7739
        /* The caseful case is much simpler. */
7740
7741
0
        else
7742
0
          {
7743
#if PCRE2_CODE_UNIT_WIDTH != 8
7744
          while (start_match < end_subject && UCHAR21TEST(start_match) !=
7745
                 first_cu)
7746
            start_match++;
7747
#else
7748
0
          start_match = memchr(start_match, first_cu, end_subject - start_match);
7749
0
          if (start_match == NULL) start_match = end_subject;
7750
0
#endif
7751
0
          }
7752
7753
        /* If we can't find the required first code unit, having reached the
7754
        true end of the subject, break the bumpalong loop, to force a match
7755
        failure, except when doing partial matching, when we let the next cycle
7756
        run at the end of the subject. To see why, consider the pattern
7757
        /(?<=abc)def/, which partially matches "abc", even though the string
7758
        does not contain the starting character "d". If we have not reached the
7759
        true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
7760
        temporarily modified) we also let the cycle run, because the matching
7761
        string is legitimately allowed to start with the first code unit of a
7762
        newline. */
7763
7764
0
        if (mb->partial == 0 && start_match >= mb->end_subject)
7765
0
          {
7766
0
          rc = MATCH_NOMATCH;
7767
0
          break;
7768
0
          }
7769
0
        }
7770
7771
      /* If there's no first code unit, advance to just after a linebreak for a
7772
      multiline match if required. */
7773
7774
0
      else if (startline)
7775
0
        {
7776
0
        if (start_match > mb->start_subject + start_offset)
7777
0
          {
7778
0
#ifdef SUPPORT_UNICODE
7779
0
          if (utf)
7780
0
            {
7781
0
            while (start_match < end_subject && !WAS_NEWLINE(start_match))
7782
0
              {
7783
0
              start_match++;
7784
0
              ACROSSCHAR(start_match < end_subject, start_match, start_match++);
7785
0
              }
7786
0
            }
7787
0
          else
7788
0
#endif
7789
0
          while (start_match < end_subject && !WAS_NEWLINE(start_match))
7790
0
            start_match++;
7791
7792
          /* If we have just passed a CR and the newline option is ANY or
7793
          ANYCRLF, and we are now at a LF, advance the match position by one
7794
          more code unit. */
7795
7796
0
          if (start_match[-1] == CHAR_CR &&
7797
0
               (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
7798
0
               start_match < end_subject &&
7799
0
               UCHAR21TEST(start_match) == CHAR_NL)
7800
0
            start_match++;
7801
0
          }
7802
0
        }
7803
7804
      /* If there's no first code unit or a requirement for a multiline line
7805
      start, advance to a non-unique first code unit if any have been
7806
      identified. The bitmap contains only 256 bits. When code units are 16 or
7807
      32 bits wide, all code units greater than 254 set the 255 bit. */
7808
7809
0
      else if (start_bits != NULL)
7810
0
        {
7811
0
        while (start_match < end_subject)
7812
0
          {
7813
0
          uint32_t c = UCHAR21TEST(start_match);
7814
#if PCRE2_CODE_UNIT_WIDTH != 8
7815
          if (c > 255) c = 255;
7816
#endif
7817
0
          if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
7818
0
          start_match++;
7819
0
          }
7820
7821
        /* See comment above in first_cu checking about the next few lines. */
7822
7823
0
        if (mb->partial == 0 && start_match >= mb->end_subject)
7824
0
          {
7825
0
          rc = MATCH_NOMATCH;
7826
0
          break;
7827
0
          }
7828
0
        }
7829
0
      }   /* End first code unit handling */
7830
7831
    /* Restore fudged end_subject */
7832
7833
0
    end_subject = mb->end_subject;
7834
7835
    /* The following two optimizations must be disabled for partial matching. */
7836
7837
0
    if (mb->partial == 0)
7838
0
      {
7839
0
      PCRE2_SPTR p;
7840
7841
      /* The minimum matching length is a lower bound; no string of that length
7842
      may actually match the pattern. Although the value is, strictly, in
7843
      characters, we treat it as code units to avoid spending too much time in
7844
      this optimization. */
7845
7846
0
      if (end_subject - start_match < re->minlength)
7847
0
        {
7848
0
        rc = MATCH_NOMATCH;
7849
0
        break;
7850
0
        }
7851
7852
      /* If req_cu is set, we know that that code unit must appear in the
7853
      subject for the (non-partial) match to succeed. If the first code unit is
7854
      set, req_cu must be later in the subject; otherwise the test starts at
7855
      the match point. This optimization can save a huge amount of backtracking
7856
      in patterns with nested unlimited repeats that aren't going to match.
7857
      Writing separate code for caseful/caseless versions makes it go faster,
7858
      as does using an autoincrement and backing off on a match. As in the case
7859
      of the first code unit, using memchr() in the 8-bit library gives a big
7860
      speed up. Unlike the first_cu check above, we do not need to call
7861
      memchr() twice in the caseless case because we only need to check for the
7862
      presence of the character in either case, not find the first occurrence.
7863
7864
      The search can be skipped if the code unit was found later than the
7865
      current starting point in a previous iteration of the bumpalong loop.
7866
7867
      HOWEVER: when the subject string is very, very long, searching to its end
7868
      can take a long time, and give bad performance on quite ordinary
7869
      anchored patterns. This showed up when somebody was matching something
7870
      like /^\d+C/ on a 32-megabyte string... so we don't do this when the
7871
      string is sufficiently long, but it's worth searching a lot more for
7872
      unanchored patterns. */
7873
7874
0
      p = start_match + (has_first_cu? 1:0);
7875
0
      if (has_req_cu && p > req_cu_ptr)
7876
0
        {
7877
0
        PCRE2_SIZE check_length = end_subject - start_match;
7878
7879
0
        if (check_length < REQ_CU_MAX ||
7880
0
              (!anchored && check_length < REQ_CU_MAX * 1000))
7881
0
          {
7882
0
          if (req_cu != req_cu2)  /* Caseless */
7883
0
            {
7884
#if PCRE2_CODE_UNIT_WIDTH != 8
7885
            while (p < end_subject)
7886
              {
7887
              uint32_t pp = UCHAR21INCTEST(p);
7888
              if (pp == req_cu || pp == req_cu2) { p--; break; }
7889
              }
7890
#else  /* 8-bit code units */
7891
0
            PCRE2_SPTR pp = p;
7892
0
            p = memchr(pp, req_cu, end_subject - pp);
7893
0
            if (p == NULL)
7894
0
              {
7895
0
              p = memchr(pp, req_cu2, end_subject - pp);
7896
0
              if (p == NULL) p = end_subject;
7897
0
              }
7898
0
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
7899
0
            }
7900
7901
          /* The caseful case */
7902
7903
0
          else
7904
0
            {
7905
#if PCRE2_CODE_UNIT_WIDTH != 8
7906
            while (p < end_subject)
7907
              {
7908
              if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
7909
              }
7910
7911
#else  /* 8-bit code units */
7912
0
            p = memchr(p, req_cu, end_subject - p);
7913
0
            if (p == NULL) p = end_subject;
7914
0
#endif
7915
0
            }
7916
7917
          /* If we can't find the required code unit, break the bumpalong loop,
7918
          forcing a match failure. */
7919
7920
0
          if (p >= end_subject)
7921
0
            {
7922
0
            rc = MATCH_NOMATCH;
7923
0
            break;
7924
0
            }
7925
7926
          /* If we have found the required code unit, save the point where we
7927
          found it, so that we don't search again next time round the bumpalong
7928
          loop if the start hasn't yet passed this code unit. */
7929
7930
0
          req_cu_ptr = p;
7931
0
          }
7932
0
        }
7933
0
      }
7934
0
    }
7935
7936
  /* ------------ End of start of match optimizations ------------ */
7937
7938
  /* Give no match if we have passed the bumpalong limit. */
7939
7940
0
  if (start_match > bumpalong_limit)
7941
0
    {
7942
0
    rc = MATCH_NOMATCH;
7943
0
    break;
7944
0
    }
7945
7946
  /* OK, we can now run the match. If "hitend" is set afterwards, remember the
7947
  first starting point for which a partial match was found. */
7948
7949
0
  cb.start_match = (PCRE2_SIZE)(start_match - subject);
7950
0
  cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
7951
7952
0
  mb->start_used_ptr = start_match;
7953
0
  mb->last_used_ptr = start_match;
7954
0
#ifdef SUPPORT_UNICODE
7955
0
  mb->moptions = options | fragment_options;
7956
#else
7957
  mb->moptions = options;
7958
#endif
7959
0
  mb->match_call_count = 0;
7960
0
  mb->end_offset_top = 0;
7961
0
  mb->skip_arg_count = 0;
7962
7963
#ifdef DEBUG_SHOW_OPS
7964
  fprintf(stderr, "++ Calling match()\n");
7965
#endif
7966
7967
0
  rc = match(start_match, mb->start_code, re->top_bracket, frame_size,
7968
0
    match_data, mb);
7969
7970
#ifdef DEBUG_SHOW_OPS
7971
  fprintf(stderr, "++ match() returned %d\n\n", rc);
7972
#endif
7973
7974
0
  if (mb->hitend && start_partial == NULL)
7975
0
    {
7976
0
    start_partial = mb->start_used_ptr;
7977
0
    match_partial = start_match;
7978
0
    }
7979
7980
0
  switch(rc)
7981
0
    {
7982
    /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7983
    the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7984
    entirely. The only way we can do that is to re-do the match at the same
7985
    point, with a flag to force SKIP with an argument to be ignored. Just
7986
    treating this case as NOMATCH does not work because it does not check other
7987
    alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7988
7989
0
    case MATCH_SKIP_ARG:
7990
0
    new_start_match = start_match;
7991
0
    mb->ignore_skip_arg = mb->skip_arg_count;
7992
0
    break;
7993
7994
    /* SKIP passes back the next starting point explicitly, but if it is no
7995
    greater than the match we have just done, treat it as NOMATCH. */
7996
7997
0
    case MATCH_SKIP:
7998
0
    if (mb->verb_skip_ptr > start_match)
7999
0
      {
8000
0
      new_start_match = mb->verb_skip_ptr;
8001
0
      break;
8002
0
      }
8003
0
    PCRE2_FALLTHROUGH /* Fall through */
8004
0
8005
0
    /* NOMATCH and PRUNE advance by one character. THEN at this level acts
8006
0
    exactly like PRUNE. Unset ignore SKIP-with-argument. */
8007
0
8008
0
    case MATCH_NOMATCH:
8009
0
    case MATCH_PRUNE:
8010
0
    case MATCH_THEN:
8011
0
    mb->ignore_skip_arg = 0;
8012
0
    new_start_match = start_match + 1;
8013
0
#ifdef SUPPORT_UNICODE
8014
0
    if (utf)
8015
0
      ACROSSCHAR(new_start_match < end_subject, new_start_match,
8016
0
        new_start_match++);
8017
0
#endif
8018
0
    break;
8019
8020
    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
8021
8022
0
    case MATCH_COMMIT:
8023
0
    rc = MATCH_NOMATCH;
8024
0
    goto ENDLOOP;
8025
8026
    /* Any other return is either a match, or some kind of error. */
8027
8028
0
    default:
8029
0
    goto ENDLOOP;
8030
0
    }
8031
8032
  /* Control reaches here for the various types of "no match at this point"
8033
  result. Reset the code to MATCH_NOMATCH for subsequent checking. */
8034
8035
0
  rc = MATCH_NOMATCH;
8036
8037
  /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
8038
  newline in the subject (though it may continue over the newline). Therefore,
8039
  if we have just failed to match, starting at a newline, do not continue. */
8040
8041
0
  if (firstline && IS_NEWLINE(start_match)) break;
8042
8043
  /* Advance to new matching position */
8044
8045
0
  start_match = new_start_match;
8046
8047
  /* Break the loop if the pattern is anchored or if we have passed the end of
8048
  the subject. */
8049
8050
0
  if (anchored || start_match > end_subject) break;
8051
8052
  /* If we have just passed a CR and we are now at a LF, and the pattern does
8053
  not contain any explicit matches for \r or \n, and the newline option is CRLF
8054
  or ANY or ANYCRLF, advance the match position by one more code unit. In
8055
  normal matching start_match will aways be greater than the first position at
8056
  this stage, but a failed *SKIP can cause a return at the same point, which is
8057
  why the first test exists. */
8058
8059
0
  if (start_match > subject + start_offset &&
8060
0
      start_match[-1] == CHAR_CR &&
8061
0
      start_match < end_subject &&
8062
0
      *start_match == CHAR_NL &&
8063
0
      (re->flags & PCRE2_HASCRORLF) == 0 &&
8064
0
        (mb->nltype == NLTYPE_ANY ||
8065
0
         mb->nltype == NLTYPE_ANYCRLF ||
8066
0
         mb->nllen == 2))
8067
0
    start_match++;
8068
8069
0
  mb->mark = NULL;   /* Reset for start of next match attempt */
8070
0
  }                  /* End of for(;;) "bumpalong" loop */
8071
8072
/* ==========================================================================*/
8073
8074
/* When we reach here, one of the following stopping conditions is true:
8075
8076
(1) The match succeeded, either completely, or partially;
8077
8078
(2) The pattern is anchored or the match was failed after (*COMMIT);
8079
8080
(3) We are past the end of the subject or the bumpalong limit;
8081
8082
(4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
8083
    this option requests that a match occur at or before the first newline in
8084
    the subject.
8085
8086
(5) Some kind of error occurred.
8087
8088
*/
8089
8090
0
ENDLOOP:
8091
8092
/* If end_subject != true_end_subject, it means we are handling invalid UTF,
8093
and have just processed a non-terminal fragment. If this resulted in no match
8094
or a partial match we must carry on to the next fragment (a partial match is
8095
returned to the caller only at the very end of the subject). A loop is used to
8096
avoid trying to match against empty fragments; if the pattern can match an
8097
empty string it would have done so already. */
8098
8099
0
#ifdef SUPPORT_UNICODE
8100
0
if (utf && end_subject != true_end_subject &&
8101
0
    (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
8102
0
  {
8103
0
  for (;;)
8104
0
    {
8105
    /* Advance past the first bad code unit, and then skip invalid character
8106
    starting code units in 8-bit and 16-bit modes. */
8107
8108
0
    start_match = end_subject + 1;
8109
8110
0
#if PCRE2_CODE_UNIT_WIDTH != 32
8111
0
    while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
8112
0
      start_match++;
8113
0
#endif
8114
8115
    /* If we have hit the end of the subject, there isn't another non-empty
8116
    fragment, so give up. */
8117
8118
0
    if (start_match >= true_end_subject)
8119
0
      {
8120
0
      rc = MATCH_NOMATCH;  /* In case it was partial */
8121
0
      match_partial = NULL;
8122
0
      break;
8123
0
      }
8124
8125
    /* Check the rest of the subject */
8126
8127
0
    mb->check_subject = start_match;
8128
0
    rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
8129
0
      &(match_data->startchar));
8130
8131
    /* The rest of the subject is valid UTF. */
8132
8133
0
    if (rc == 0)
8134
0
      {
8135
0
      mb->end_subject = end_subject = true_end_subject;
8136
0
      fragment_options = PCRE2_NOTBOL;
8137
0
      goto FRAGMENT_RESTART;
8138
0
      }
8139
8140
    /* A subsequent UTF error has been found; if the next fragment is
8141
    non-empty, set up to process it. Otherwise, let the loop advance. */
8142
8143
0
    else if (rc < 0)
8144
0
      {
8145
0
      mb->end_subject = end_subject = start_match + match_data->startchar;
8146
0
      if (end_subject > start_match)
8147
0
        {
8148
0
        fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
8149
0
        goto FRAGMENT_RESTART;
8150
0
        }
8151
0
      }
8152
0
    }
8153
0
  }
8154
0
#endif  /* SUPPORT_UNICODE */
8155
8156
/* Fill in fields that are always returned in the match data. */
8157
8158
0
match_data->code = re;
8159
0
match_data->mark = mb->mark;
8160
0
match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
8161
0
match_data->options = original_options;
8162
8163
/* Handle a fully successful match. Set the return code to the number of
8164
captured strings, or 0 if there were too many to fit into the ovector, and then
8165
set the remaining returned values before returning. Make a copy of the subject
8166
string if requested. */
8167
8168
0
if (rc == MATCH_MATCH)
8169
0
  {
8170
0
  match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
8171
0
    0 : (int)mb->end_offset_top/2 + 1;
8172
0
  match_data->subject_length = length;
8173
0
  match_data->start_offset = start_offset;
8174
0
  match_data->startchar = start_match - subject;
8175
0
  match_data->leftchar = mb->start_used_ptr - subject;
8176
0
  match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
8177
0
    mb->last_used_ptr : mb->end_match_ptr) - subject;
8178
0
  if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
8179
0
    {
8180
0
    if (length != 0)
8181
0
      {
8182
0
      match_data->subject = match_data->memctl.malloc(CU2BYTES(length),
8183
0
        match_data->memctl.memory_data);
8184
0
      if (match_data->subject == NULL)
8185
0
        return match_data->rc = PCRE2_ERROR_NOMEMORY;
8186
0
      memcpy((void *)match_data->subject, subject, CU2BYTES(length));
8187
0
      }
8188
0
    else
8189
0
      match_data->subject = NULL;
8190
0
    match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
8191
0
    }
8192
0
  else match_data->subject = original_subject;
8193
8194
0
  return match_data->rc;
8195
0
  }
8196
8197
/* Control gets here if there has been a partial match, an error, or if the
8198
overall match attempt has failed at all permitted starting positions. Any mark
8199
data is in the nomatch_mark field. */
8200
8201
0
match_data->mark = mb->nomatch_mark;
8202
8203
/* For anything other than nomatch or partial match, just return the code. */
8204
8205
0
if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
8206
8207
/* Handle a partial match. If a "soft" partial match was requested, searching
8208
for a complete match will have continued, and the value of rc at this point
8209
will be MATCH_NOMATCH. For a "hard" partial match, it will already be
8210
PCRE2_ERROR_PARTIAL. */
8211
8212
0
else if (match_partial != NULL)
8213
0
  {
8214
0
  match_data->subject = original_subject;
8215
0
  match_data->subject_length = length;
8216
0
  match_data->start_offset = start_offset;
8217
0
  match_data->ovector[0] = match_partial - subject;
8218
0
  match_data->ovector[1] = end_subject - subject;
8219
0
  match_data->startchar = match_partial - subject;
8220
0
  match_data->leftchar = start_partial - subject;
8221
0
  match_data->rightchar = end_subject - subject;
8222
0
  match_data->rc = PCRE2_ERROR_PARTIAL;
8223
0
  }
8224
8225
/* Else this is the classic nomatch case. */
8226
8227
0
else
8228
0
  {
8229
0
  match_data->subject = original_subject;
8230
0
  match_data->subject_length = length;
8231
0
  match_data->start_offset = start_offset;
8232
0
  match_data->rc = PCRE2_ERROR_NOMATCH;
8233
0
  }
8234
8235
0
return match_data->rc;
8236
0
}
8237
8238
/* These #undefs are here to enable unity builds with CMake. */
8239
8240
#undef NLBLOCK /* Block containing newline information */
8241
#undef PSSTART /* Field containing processed string start */
8242
#undef PSEND   /* Field containing processed string end */
8243
8244
/* End of pcre2_match.c */