Coverage Report

Created: 2026-05-16 07:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/glib-2.80.0/subprojects/pcre2-10.42/src/pcre2_match.c
Line
Count
Source
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2015-2022 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
/* These defines enable debugging code */
47
48
/* #define DEBUG_FRAMES_DISPLAY */
49
/* #define DEBUG_SHOW_OPS */
50
/* #define DEBUG_SHOW_RMATCH */
51
52
#ifdef DEBUG_FRAMES_DISPLAY
53
#include <stdarg.h>
54
#endif
55
56
/* These defines identify the name of the block containing "static"
57
information, and fields within it. */
58
59
0
#define NLBLOCK mb              /* Block containing newline information */
60
0
#define PSSTART start_subject   /* Field containing processed string start */
61
0
#define PSEND   end_subject     /* Field containing processed string end */
62
63
#include "pcre2_internal.h"
64
65
0
#define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */
66
67
/* Masks for identifying the public options that are permitted at match time. */
68
69
#define PUBLIC_MATCH_OPTIONS \
70
0
  (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
71
0
   PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
72
0
   PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT)
73
74
#define PUBLIC_JIT_MATCH_OPTIONS \
75
   (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
76
    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
77
    PCRE2_COPY_MATCHED_SUBJECT)
78
79
/* Non-error returns from and within the match() function. Error returns are
80
externally defined PCRE2_ERROR_xxx codes, which are all negative. */
81
82
0
#define MATCH_MATCH        1
83
0
#define MATCH_NOMATCH      0
84
85
/* Special internal returns used in the match() function. Make them
86
sufficiently negative to avoid the external error codes. */
87
88
0
#define MATCH_ACCEPT       (-999)
89
0
#define MATCH_KETRPOS      (-998)
90
/* The next 5 must be kept together and in sequence so that a test that checks
91
for any one of them can use a range. */
92
0
#define MATCH_COMMIT       (-997)
93
0
#define MATCH_PRUNE        (-996)
94
0
#define MATCH_SKIP         (-995)
95
0
#define MATCH_SKIP_ARG     (-994)
96
0
#define MATCH_THEN         (-993)
97
0
#define MATCH_BACKTRACK_MAX MATCH_THEN
98
0
#define MATCH_BACKTRACK_MIN MATCH_COMMIT
99
100
/* Group frame type values. Zero means the frame is not a group frame. The
101
lower 16 bits are used for data (e.g. the capture number). Group frames are
102
used for most groups so that information about the start is easily available at
103
the end without having to scan back through intermediate frames (backtrack
104
points). */
105
106
0
#define GF_CAPTURE     0x00010000u
107
0
#define GF_NOCAPTURE   0x00020000u
108
0
#define GF_CONDASSERT  0x00030000u
109
0
#define GF_RECURSE     0x00040000u
110
111
/* Masks for the identity and data parts of the group frame type. */
112
113
0
#define GF_IDMASK(a)   ((a) & 0xffff0000u)
114
0
#define GF_DATAMASK(a) ((a) & 0x0000ffffu)
115
116
/* Repetition types */
117
118
enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
119
120
/* Min and max values for the common repeats; a maximum of UINT32_MAX =>
121
infinity. */
122
123
static const uint32_t rep_min[] = {
124
  0, 0,       /* * and *? */
125
  1, 1,       /* + and +? */
126
  0, 0,       /* ? and ?? */
127
  0, 0,       /* dummy placefillers for OP_CR[MIN]RANGE */
128
  0, 1, 0 };  /* OP_CRPOS{STAR, PLUS, QUERY} */
129
130
static const uint32_t rep_max[] = {
131
  UINT32_MAX, UINT32_MAX,      /* * and *? */
132
  UINT32_MAX, UINT32_MAX,      /* + and +? */
133
  1, 1,                        /* ? and ?? */
134
  0, 0,                        /* dummy placefillers for OP_CR[MIN]RANGE */
135
  UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
136
137
/* Repetition types - must include OP_CRPOSRANGE (not needed above) */
138
139
static const uint32_t rep_typ[] = {
140
  REPTYPE_MAX, REPTYPE_MIN,    /* * and *? */
141
  REPTYPE_MAX, REPTYPE_MIN,    /* + and +? */
142
  REPTYPE_MAX, REPTYPE_MIN,    /* ? and ?? */
143
  REPTYPE_MAX, REPTYPE_MIN,    /* OP_CRRANGE and OP_CRMINRANGE */
144
  REPTYPE_POS, REPTYPE_POS,    /* OP_CRPOSSTAR, OP_CRPOSPLUS */
145
  REPTYPE_POS, REPTYPE_POS };  /* OP_CRPOSQUERY, OP_CRPOSRANGE */
146
147
/* Numbers for RMATCH calls at backtracking points. When these lists are
148
changed, the code at RETURN_SWITCH below must be updated in sync.  */
149
150
enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
151
       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
152
       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
153
       RM31,  RM32, RM33, RM34, RM35, RM36 };
154
155
#ifdef SUPPORT_WIDE_CHARS
156
enum { RM100=100, RM101 };
157
#endif
158
159
#ifdef SUPPORT_UNICODE
160
enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
161
       RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
162
       RM216,     RM217, RM218, RM219, RM220, RM221, RM222, RM223,
163
       RM224,     RM225 };
164
#endif
165
166
/* Define short names for general fields in the current backtrack frame, which
167
is always pointed to by the F variable. Occasional references to fields in
168
other frames are written out explicitly. There are also some fields in the
169
current frame whose names start with "temp" that are used for short-term,
170
localised backtracking memory. These are #defined with Lxxx names at the point
171
of use and undefined afterwards. */
172
173
0
#define Fback_frame        F->back_frame
174
0
#define Fcapture_last      F->capture_last
175
0
#define Fcurrent_recurse   F->current_recurse
176
0
#define Fecode             F->ecode
177
0
#define Feptr              F->eptr
178
0
#define Fgroup_frame_type  F->group_frame_type
179
0
#define Flast_group_offset F->last_group_offset
180
0
#define Flength            F->length
181
0
#define Fmark              F->mark
182
0
#define Frdepth            F->rdepth
183
0
#define Fstart_match       F->start_match
184
0
#define Foffset_top        F->offset_top
185
0
#define Foccu              F->occu
186
0
#define Fop                F->op
187
0
#define Fovector           F->ovector
188
0
#define Freturn_id         F->return_id
189
190
191
#ifdef DEBUG_FRAMES_DISPLAY
192
/*************************************************
193
*      Display current frames and contents       *
194
*************************************************/
195
196
/* This debugging function displays the current set of frames and their
197
contents. It is not called automatically from anywhere, the intention being
198
that calls can be inserted where necessary when debugging frame-related
199
problems.
200
201
Arguments:
202
  f           the file to write to
203
  F           the current top frame
204
  P           a previous frame of interest
205
  frame_size  the frame size
206
  mb          points to the match block
207
  match_data  points to the match data block
208
  s           identification text
209
210
Returns:    nothing
211
*/
212
213
static void
214
display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
215
  match_block *mb, pcre2_match_data *match_data, const char *s, ...)
216
{
217
uint32_t i;
218
heapframe *Q;
219
va_list ap;
220
va_start(ap, s);
221
222
fprintf(f, "FRAMES ");
223
vfprintf(f, s, ap);
224
va_end(ap);
225
226
if (P != NULL) fprintf(f, " P=%lu",
227
  ((char *)P - (char *)(match_data->heapframes))/frame_size);
228
fprintf(f, "\n");
229
230
for (i = 0, Q = match_data->heapframes;
231
     Q <= F;
232
     i++, Q = (heapframe *)((char *)Q + frame_size))
233
  {
234
  fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
235
    i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
236
    Q->back_frame, Q->return_id);
237
238
  if (Q->last_group_offset == PCRE2_UNSET)
239
    fprintf(f, " lgoffset=unset\n");
240
  else
241
    fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
242
  }
243
}
244
245
#endif
246
247
248
249
/*************************************************
250
*                Process a callout               *
251
*************************************************/
252
253
/* This function is called for all callouts, whether "standalone" or at the
254
start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
255
OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
256
with fixed values.
257
258
Arguments:
259
  F          points to the current backtracking frame
260
  mb         points to the match block
261
  lengthptr  where to return the length of the callout item
262
263
Returns:     the return from the callout
264
             or 0 if no callout function exists
265
*/
266
267
static int
268
do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
269
0
{
270
0
int rc;
271
0
PCRE2_SIZE save0, save1;
272
0
PCRE2_SIZE *callout_ovector;
273
0
pcre2_callout_block *cb;
274
275
0
*lengthptr = (*Fecode == OP_CALLOUT)?
276
0
  PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
277
278
0
if (mb->callout == NULL) return 0;   /* No callout function provided */
279
280
/* The original matching code (pre 10.30) worked directly with the ovector
281
passed by the user, and this was passed to callouts. Now that the working
282
ovector is in the backtracking frame, it no longer needs to reserve space for
283
the overall match offsets (which would waste space in the frame). For backward
284
compatibility, however, we pass capture_top and offset_vector to the callout as
285
if for the extended ovector, and we ensure that the first two slots are unset
286
by preserving and restoring their current contents. Picky compilers complain if
287
references such as Fovector[-2] are use directly, so we set up a separate
288
pointer. */
289
290
0
callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
291
292
/* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
293
are set externally. The first 3 never change; the last is updated for each
294
bumpalong. */
295
296
0
cb = mb->cb;
297
0
cb->capture_top      = (uint32_t)Foffset_top/2 + 1;
298
0
cb->capture_last     = Fcapture_last;
299
0
cb->offset_vector    = callout_ovector;
300
0
cb->mark             = mb->nomatch_mark;
301
0
cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
302
0
cb->pattern_position = GET(Fecode, 1);
303
0
cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
304
305
0
if (*Fecode == OP_CALLOUT)  /* Numerical callout */
306
0
  {
307
0
  cb->callout_number = Fecode[1 + 2*LINK_SIZE];
308
0
  cb->callout_string_offset = 0;
309
0
  cb->callout_string = NULL;
310
0
  cb->callout_string_length = 0;
311
0
  }
312
0
else  /* String callout */
313
0
  {
314
0
  cb->callout_number = 0;
315
0
  cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
316
0
  cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
317
0
  cb->callout_string_length =
318
0
    *lengthptr - (1 + 4*LINK_SIZE) - 2;
319
0
  }
320
321
0
save0 = callout_ovector[0];
322
0
save1 = callout_ovector[1];
323
0
callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
324
0
rc = mb->callout(cb, mb->callout_data);
325
0
callout_ovector[0] = save0;
326
0
callout_ovector[1] = save1;
327
0
cb->callout_flags = 0;
328
0
return rc;
329
0
}
330
331
332
333
/*************************************************
334
*          Match a back-reference                *
335
*************************************************/
336
337
/* This function is called only when it is known that the offset lies within
338
the offsets that have so far been used in the match. Note that in caseless
339
UTF-8 mode, the number of subject bytes matched may be different to the number
340
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
341
seems unlikely.)
342
343
Arguments:
344
  offset      index into the offset vector
345
  caseless    TRUE if caseless
346
  F           the current backtracking frame pointer
347
  mb          points to match block
348
  lengthptr   pointer for returning the length matched
349
350
Returns:      = 0 sucessful match; number of code units matched is set
351
              < 0 no match
352
              > 0 partial match
353
*/
354
355
static int
356
match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb,
357
  PCRE2_SIZE *lengthptr)
358
0
{
359
0
PCRE2_SPTR p;
360
0
PCRE2_SIZE length;
361
0
PCRE2_SPTR eptr;
362
0
PCRE2_SPTR eptr_start;
363
364
/* Deal with an unset group. The default is no match, but there is an option to
365
match an empty string. */
366
367
0
if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
368
0
  {
369
0
  if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
370
0
    {
371
0
    *lengthptr = 0;
372
0
    return 0;      /* Match */
373
0
    }
374
0
  else return -1;  /* No match */
375
0
  }
376
377
/* Separate the caseless and UTF cases for speed. */
378
379
0
eptr = eptr_start = Feptr;
380
0
p = mb->start_subject + Fovector[offset];
381
0
length = Fovector[offset+1] - Fovector[offset];
382
383
0
if (caseless)
384
0
  {
385
0
#if defined SUPPORT_UNICODE
386
0
  BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
387
388
0
  if (utf || (mb->poptions & PCRE2_UCP) != 0)
389
0
    {
390
0
    PCRE2_SPTR endptr = p + length;
391
392
    /* Match characters up to the end of the reference. NOTE: the number of
393
    code units matched may differ, because in UTF-8 there are some characters
394
    whose upper and lower case codes have different numbers of bytes. For
395
    example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
396
    bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
397
    sequence of two of the latter. It is important, therefore, to check the
398
    length along the reference, not along the subject (earlier code did this
399
    wrong). UCP without uses Unicode properties but without UTF encoding. */
400
401
0
    while (p < endptr)
402
0
      {
403
0
      uint32_t c, d;
404
0
      const ucd_record *ur;
405
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
406
407
0
      if (utf)
408
0
        {
409
0
        GETCHARINC(c, eptr);
410
0
        GETCHARINC(d, p);
411
0
        }
412
0
      else
413
0
        {
414
0
        c = *eptr++;
415
0
        d = *p++;
416
0
        }
417
418
0
      ur = GET_UCD(d);
419
0
      if (c != d && c != (uint32_t)((int)d + ur->other_case))
420
0
        {
421
0
        const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
422
0
        for (;;)
423
0
          {
424
0
          if (c < *pp) return -1;  /* No match */
425
0
          if (c == *pp++) break;
426
0
          }
427
0
        }
428
0
      }
429
0
    }
430
0
  else
431
0
#endif
432
433
  /* Not in UTF or UCP mode */
434
0
    {
435
0
    for (; length > 0; length--)
436
0
      {
437
0
      uint32_t cc, cp;
438
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
439
0
      cc = UCHAR21TEST(eptr);
440
0
      cp = UCHAR21TEST(p);
441
0
      if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
442
0
        return -1;  /* No match */
443
0
      p++;
444
0
      eptr++;
445
0
      }
446
0
    }
447
0
  }
448
449
/* In the caseful case, we can just compare the code units, whether or not we
450
are in UTF and/or UCP mode. When partial matching, we have to do this unit by
451
unit. */
452
453
0
else
454
0
  {
455
0
  if (mb->partial != 0)
456
0
    {
457
0
    for (; length > 0; length--)
458
0
      {
459
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
460
0
      if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /* No match */
461
0
      }
462
0
    }
463
464
  /* Not partial matching */
465
466
0
  else
467
0
    {
468
0
    if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
469
0
    if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1;  /* No match */
470
0
    eptr += length;
471
0
    }
472
0
  }
473
474
0
*lengthptr = eptr - eptr_start;
475
0
return 0;  /* Match */
476
0
}
477
478
479
480
/******************************************************************************
481
*******************************************************************************
482
                   "Recursion" in the match() function
483
484
The original match() function was highly recursive, but this proved to be the
485
source of a number of problems over the years, mostly because of the relatively
486
small system stacks that are commonly found. As new features were added to
487
patterns, various kludges were invented to reduce the amount of stack used,
488
making the code hard to understand in places.
489
490
A version did exist that used individual frames on the heap instead of calling
491
match() recursively, but this ran substantially slower. The current version is
492
a refactoring that uses a vector of frames to remember backtracking points.
493
This runs no slower, and possibly even a bit faster than the original recursive
494
implementation.
495
496
At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50
497
frames) was allocated on the system stack. If this was not big enough, the heap
498
was used for a larger vector. However, it turns out that there are environments
499
where taking as little as 20KiB from the system stack is an embarrassment.
500
After another refactoring, the heap is used exclusively, but a pointer the
501
frames vector and its size are cached in the match_data block, so that there is
502
no new memory allocation if the same match_data block is used for multiple
503
matches (unless the frames vector has to be extended).
504
*******************************************************************************
505
******************************************************************************/
506
507
508
509
510
/*************************************************
511
*       Macros for the match() function          *
512
*************************************************/
513
514
/* These macros pack up tests that are used for partial matching several times
515
in the code. The second one is used when we already know we are past the end of
516
the subject. We set the "hit end" flag if the pointer is at the end of the
517
subject and either (a) the pointer is past the earliest inspected character
518
(i.e. something has been matched, even if not part of the actual matched
519
string), or (b) the pattern contains a lookbehind. These are the conditions for
520
which adding more characters may allow the current match to continue.
521
522
For hard partial matching, we immediately return a partial match. Otherwise,
523
carrying on means that a complete match on the current subject will be sought.
524
A partial match is returned only if no complete match can be found. */
525
526
#define CHECK_PARTIAL()\
527
0
  if (Feptr >= mb->end_subject) \
528
0
    { \
529
0
    SCHECK_PARTIAL(); \
530
0
    }
531
532
#define SCHECK_PARTIAL()\
533
0
  if (mb->partial != 0 && \
534
0
      (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
535
0
    { \
536
0
    mb->hitend = TRUE; \
537
0
    if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
538
0
    }
539
540
541
/* These macros are used to implement backtracking. They simulate a recursive
542
call to the match() function by means of a local vector of frames which
543
remember the backtracking points. */
544
545
#define RMATCH(ra,rb)\
546
0
  {\
547
0
  start_ecode = ra;\
548
0
  Freturn_id = rb;\
549
0
  goto MATCH_RECURSE;\
550
0
  L_##rb:;\
551
0
  }
552
553
#define RRETURN(ra)\
554
0
  {\
555
0
  rrc = ra;\
556
0
  goto RETURN_SWITCH;\
557
0
  }
558
559
560
561
/*************************************************
562
*         Match from current position            *
563
*************************************************/
564
565
/* This function is called to run one match attempt at a single starting point
566
in the subject.
567
568
Performance note: It might be tempting to extract commonly used fields from the
569
mb structure (e.g. end_subject) into individual variables to improve
570
performance. Tests using gcc on a SPARC disproved this; in the first case, it
571
made performance worse.
572
573
Arguments:
574
   start_eptr   starting character in subject
575
   start_ecode  starting position in compiled code
576
   top_bracket  number of capturing parentheses in the pattern
577
   frame_size   size of each backtracking frame
578
   match_data   pointer to the match_data block
579
   mb           pointer to "static" variables block
580
581
Returns:        MATCH_MATCH if matched            )  these values are >= 0
582
                MATCH_NOMATCH if failed to match  )
583
                negative MATCH_xxx value for PRUNE, SKIP, etc
584
                negative PCRE2_ERROR_xxx value if aborted by an error condition
585
                (e.g. stopped by repeated call or depth limit)
586
*/
587
588
static int
589
match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket,
590
  PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
591
0
{
592
/* Frame-handling variables */
593
594
0
heapframe *F;           /* Current frame pointer */
595
0
heapframe *N = NULL;    /* Temporary frame pointers */
596
0
heapframe *P = NULL;
597
598
0
heapframe *frames_top;  /* End of frames vector */
599
0
heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
600
0
PCRE2_SIZE heapframes_size;   /* Usable size of frames vector */
601
0
PCRE2_SIZE frame_copy_size;   /* Amount to copy when creating a new frame */
602
603
/* Local variables that do not need to be preserved over calls to RRMATCH(). */
604
605
0
PCRE2_SPTR bracode;     /* Temp pointer to start of group */
606
0
PCRE2_SIZE offset;      /* Used for group offsets */
607
0
PCRE2_SIZE length;      /* Used for various length calculations */
608
609
0
int rrc;                /* Return from functions & backtracking "recursions" */
610
0
#ifdef SUPPORT_UNICODE
611
0
int proptype;           /* Type of character property */
612
0
#endif
613
614
0
uint32_t i;             /* Used for local loops */
615
0
uint32_t fc;            /* Character values */
616
0
uint32_t number;        /* Used for group and other numbers */
617
0
uint32_t reptype = 0;   /* Type of repetition (0 to avoid compiler warning) */
618
0
uint32_t group_frame_type;  /* Specifies type for new group frames */
619
620
0
BOOL condition;         /* Used in conditional groups */
621
0
BOOL cur_is_word;       /* Used in "word" tests */
622
0
BOOL prev_is_word;      /* Used in "word" tests */
623
624
/* UTF and UCP flags */
625
626
0
#ifdef SUPPORT_UNICODE
627
0
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
628
0
BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
629
#else
630
BOOL utf = FALSE;  /* Required for convenience even when no Unicode support */
631
#endif
632
633
/* This is the length of the last part of a backtracking frame that must be
634
copied when a new frame is created. */
635
636
0
frame_copy_size = frame_size - offsetof(heapframe, eptr);
637
638
/* Set up the first frame and the end of the frames vector. We set the local
639
heapframes_size to the usuable amount of the vector, that is, a whole number of
640
frames. */
641
642
0
F = match_data->heapframes;
643
0
heapframes_size = (match_data->heapframes_size / frame_size) * frame_size;
644
0
frames_top = (heapframe *)((char *)F + heapframes_size);
645
646
0
Frdepth = 0;                        /* "Recursion" depth */
647
0
Fcapture_last = 0;                  /* Number of most recent capture */
648
0
Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
649
0
Fstart_match = Feptr = start_eptr;  /* Current data pointer and start match */
650
0
Fmark = NULL;                       /* Most recent mark */
651
0
Foffset_top = 0;                    /* End of captures within the frame */
652
0
Flast_group_offset = PCRE2_UNSET;   /* Saved frame of most recent group */
653
0
group_frame_type = 0;               /* Not a start of group frame */
654
0
goto NEW_FRAME;                     /* Start processing with this frame */
655
656
/* Come back here when we want to create a new frame for remembering a
657
backtracking point. */
658
659
0
MATCH_RECURSE:
660
661
/* Set up a new backtracking frame. If the vector is full, get a new one,
662
doubling the size, but constrained by the heap limit (which is in KiB). */
663
664
0
N = (heapframe *)((char *)F + frame_size);
665
0
if (N >= frames_top)
666
0
  {
667
0
  heapframe *new;
668
0
  PCRE2_SIZE newsize = match_data->heapframes_size * 2;
669
670
0
  if (newsize > mb->heap_limit)
671
0
    {
672
0
    PCRE2_SIZE maxsize = (mb->heap_limit/frame_size) * frame_size;
673
0
    if (match_data->heapframes_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
674
0
    newsize = maxsize;
675
0
    }
676
677
0
  new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data);
678
0
  if (new == NULL) return PCRE2_ERROR_NOMEMORY;
679
0
  memcpy(new, match_data->heapframes, heapframes_size);
680
681
0
  F = (heapframe *)((char *)new + ((char *)F - (char *)match_data->heapframes));
682
0
  N = (heapframe *)((char *)F + frame_size);
683
684
0
  match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data);
685
0
  match_data->heapframes = new;
686
0
  match_data->heapframes_size = newsize;
687
688
0
  heapframes_size = (newsize / frame_size) * frame_size;
689
0
  frames_top = (heapframe *)((char *)new + heapframes_size);
690
0
  }
691
692
#ifdef DEBUG_SHOW_RMATCH
693
fprintf(stderr, "++ RMATCH %2d frame=%d", Freturn_id, Frdepth + 1);
694
if (group_frame_type != 0)
695
  {
696
  fprintf(stderr, " type=%x ", group_frame_type);
697
  switch (GF_IDMASK(group_frame_type))
698
    {
699
    case GF_CAPTURE:
700
    fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
701
    break;
702
703
    case GF_NOCAPTURE:
704
    fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
705
    break;
706
707
    case GF_CONDASSERT:
708
    fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
709
    break;
710
711
    case GF_RECURSE:
712
    fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
713
    break;
714
715
    default:
716
    fprintf(stderr, "*** unknown ***");
717
    break;
718
    }
719
  }
720
fprintf(stderr, "\n");
721
#endif
722
723
/* Copy those fields that must be copied into the new frame, increase the
724
"recursion" depth (i.e. the new frame's index) and then make the new frame
725
current. */
726
727
0
memcpy((char *)N + offsetof(heapframe, eptr),
728
0
       (char *)F + offsetof(heapframe, eptr),
729
0
       frame_copy_size);
730
731
0
N->rdepth = Frdepth + 1;
732
0
F = N;
733
734
/* Carry on processing with a new frame. */
735
736
0
NEW_FRAME:
737
0
Fgroup_frame_type = group_frame_type;
738
0
Fecode = start_ecode;      /* Starting code pointer */
739
0
Fback_frame = frame_size;  /* Default is go back one frame */
740
741
/* If this is a special type of group frame, remember its offset for quick
742
access at the end of the group. If this is a recursion, set a new current
743
recursion value. */
744
745
0
if (group_frame_type != 0)
746
0
  {
747
0
  Flast_group_offset = (char *)F - (char *)match_data->heapframes;
748
0
  if (GF_IDMASK(group_frame_type) == GF_RECURSE)
749
0
    Fcurrent_recurse = GF_DATAMASK(group_frame_type);
750
0
  group_frame_type = 0;
751
0
  }
752
753
754
/* ========================================================================= */
755
/* This is the main processing loop. First check that we haven't recorded too
756
many backtracks (search tree is too large), or that we haven't exceeded the
757
recursive depth limit (used too many backtracking frames). If not, process the
758
opcodes. */
759
760
0
if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
761
0
if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
762
763
0
for (;;)
764
0
  {
765
#ifdef DEBUG_SHOW_OPS
766
fprintf(stderr, "++ op=%d\n", *Fecode);
767
#endif
768
769
0
  Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
770
0
  switch(Fop)
771
0
    {
772
    /* ===================================================================== */
773
    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
774
    any currently open capturing brackets. Unlike reaching the end of a group,
775
    where we know the starting frame is at the top of the chained frames, in
776
    this case we have to search back for the relevant frame in case other types
777
    of group that use chained frames have intervened. Multiple OP_CLOSEs always
778
    come innermost first, which matches the chain order. We can ignore this in
779
    a recursion, because captures are not passed out of recursions. */
780
781
0
    case OP_CLOSE:
782
0
    if (Fcurrent_recurse == RECURSE_UNSET)
783
0
      {
784
0
      number = GET2(Fecode, 1);
785
0
      offset = Flast_group_offset;
786
0
      for(;;)
787
0
        {
788
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
789
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
790
0
        P = (heapframe *)((char *)N - frame_size);
791
0
        if (N->group_frame_type == (GF_CAPTURE | number)) break;
792
0
        offset = P->last_group_offset;
793
0
        }
794
0
      offset = (number << 1) - 2;
795
0
      Fcapture_last = number;
796
0
      Fovector[offset] = P->eptr - mb->start_subject;
797
0
      Fovector[offset+1] = Feptr - mb->start_subject;
798
0
      if (offset >= Foffset_top) Foffset_top = offset + 2;
799
0
      }
800
0
    Fecode += PRIV(OP_lengths)[*Fecode];
801
0
    break;
802
803
804
    /* ===================================================================== */
805
    /* Real or forced end of the pattern, assertion, or recursion. In an
806
    assertion ACCEPT, update the last used pointer and remember the current
807
    frame so that the captures and mark can be fished out of it. */
808
809
0
    case OP_ASSERT_ACCEPT:
810
0
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
811
0
    assert_accept_frame = F;
812
0
    RRETURN(MATCH_ACCEPT);
813
814
    /* If recursing, we have to find the most recent recursion. */
815
816
0
    case OP_ACCEPT:
817
0
    case OP_END:
818
819
    /* Handle end of a recursion. */
820
821
0
    if (Fcurrent_recurse != RECURSE_UNSET)
822
0
      {
823
0
      offset = Flast_group_offset;
824
0
      for(;;)
825
0
        {
826
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
827
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
828
0
        P = (heapframe *)((char *)N - frame_size);
829
0
        if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
830
0
        offset = P->last_group_offset;
831
0
        }
832
833
      /* N is now the frame of the recursion; the previous frame is at the
834
      OP_RECURSE position. Go back there, copying the current subject position
835
      and mark, and the start_match position (\K might have changed it), and
836
      then move on past the OP_RECURSE. */
837
838
0
      P->eptr = Feptr;
839
0
      P->mark = Fmark;
840
0
      P->start_match = Fstart_match;
841
0
      F = P;
842
0
      Fecode += 1 + LINK_SIZE;
843
0
      continue;
844
0
      }
845
846
    /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY
847
    is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the
848
    start of the subject. In both cases, backtracking will then try other
849
    alternatives, if any. */
850
851
0
    if (Feptr == Fstart_match &&
852
0
         ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
853
0
           ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
854
0
             Fstart_match == mb->start_subject + mb->start_offset)))
855
0
      RRETURN(MATCH_NOMATCH);
856
857
    /* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not
858
    the end of the subject. After (*ACCEPT) we fail the entire match (at this
859
    position) but backtrack on reaching the end of the pattern. */
860
861
0
    if (Feptr < mb->end_subject &&
862
0
        ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
863
0
      {
864
0
      if (Fop == OP_END) RRETURN(MATCH_NOMATCH);
865
0
      return MATCH_NOMATCH;
866
0
      }
867
868
    /* We have a successful match of the whole pattern. Record the result and
869
    then do a direct return from the function. If there is space in the offset
870
    vector, set any pairs that follow the highest-numbered captured string but
871
    are less than the number of capturing groups in the pattern to PCRE2_UNSET.
872
    It is documented that this happens. "Gaps" are set to PCRE2_UNSET
873
    dynamically. It is only those at the end that need setting here. */
874
875
0
    mb->end_match_ptr = Feptr;           /* Record where we ended */
876
0
    mb->end_offset_top = Foffset_top;    /* and how many extracts were taken */
877
0
    mb->mark = Fmark;                    /* and the last success mark */
878
0
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
879
880
0
    match_data->ovector[0] = Fstart_match - mb->start_subject;
881
0
    match_data->ovector[1] = Feptr - mb->start_subject;
882
883
    /* Set i to the smaller of the sizes of the external and frame ovectors. */
884
885
0
    i = 2 * ((top_bracket + 1 > match_data->oveccount)?
886
0
      match_data->oveccount : top_bracket + 1);
887
0
    memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
888
0
    while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET;
889
0
    return MATCH_MATCH;  /* Note: NOT RRETURN */
890
891
892
    /*===================================================================== */
893
    /* Match any single character type except newline; have to take care with
894
    CRLF newlines and partial matching. */
895
896
0
    case OP_ANY:
897
0
    if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
898
0
    if (mb->partial != 0 &&
899
0
        Feptr == mb->end_subject - 1 &&
900
0
        NLBLOCK->nltype == NLTYPE_FIXED &&
901
0
        NLBLOCK->nllen == 2 &&
902
0
        UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
903
0
      {
904
0
      mb->hitend = TRUE;
905
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
906
0
      }
907
    /* Fall through */
908
909
    /* Match any single character whatsoever. */
910
911
0
    case OP_ALLANY:
912
0
    if (Feptr >= mb->end_subject)  /* DO NOT merge the Feptr++ here; it must */
913
0
      {                            /* not be updated before SCHECK_PARTIAL. */
914
0
      SCHECK_PARTIAL();
915
0
      RRETURN(MATCH_NOMATCH);
916
0
      }
917
0
    Feptr++;
918
0
#ifdef SUPPORT_UNICODE
919
0
    if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
920
0
#endif
921
0
    Fecode++;
922
0
    break;
923
924
925
    /* ===================================================================== */
926
    /* Match a single code unit, even in UTF mode. This opcode really does
927
    match any code unit, even newline. (It really should be called ANYCODEUNIT,
928
    of course - the byte name is from pre-16 bit days.) */
929
930
0
    case OP_ANYBYTE:
931
0
    if (Feptr >= mb->end_subject)   /* DO NOT merge the Feptr++ here; it must */
932
0
      {                             /* not be updated before SCHECK_PARTIAL. */
933
0
      SCHECK_PARTIAL();
934
0
      RRETURN(MATCH_NOMATCH);
935
0
      }
936
0
    Feptr++;
937
0
    Fecode++;
938
0
    break;
939
940
941
    /* ===================================================================== */
942
    /* Match a single character, casefully */
943
944
0
    case OP_CHAR:
945
0
#ifdef SUPPORT_UNICODE
946
0
    if (utf)
947
0
      {
948
0
      Flength = 1;
949
0
      Fecode++;
950
0
      GETCHARLEN(fc, Fecode, Flength);
951
0
      if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
952
0
        {
953
0
        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
954
0
        RRETURN(MATCH_NOMATCH);
955
0
        }
956
0
      for (; Flength > 0; Flength--)
957
0
        {
958
0
        if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
959
0
        }
960
0
      }
961
0
    else
962
0
#endif
963
964
    /* Not UTF mode */
965
0
      {
966
0
      if (mb->end_subject - Feptr < 1)
967
0
        {
968
0
        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
969
0
        RRETURN(MATCH_NOMATCH);
970
0
        }
971
0
      if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
972
0
      Fecode += 2;
973
0
      }
974
0
    break;
975
976
977
    /* ===================================================================== */
978
    /* Match a single character, caselessly. If we are at the end of the
979
    subject, give up immediately. We get here only when the pattern character
980
    has at most one other case. Characters with more than two cases are coded
981
    as OP_PROP with the pseudo-property PT_CLIST. */
982
983
0
    case OP_CHARI:
984
0
    if (Feptr >= mb->end_subject)
985
0
      {
986
0
      SCHECK_PARTIAL();
987
0
      RRETURN(MATCH_NOMATCH);
988
0
      }
989
990
0
#ifdef SUPPORT_UNICODE
991
0
    if (utf)
992
0
      {
993
0
      Flength = 1;
994
0
      Fecode++;
995
0
      GETCHARLEN(fc, Fecode, Flength);
996
997
      /* If the pattern character's value is < 128, we know that its other case
998
      (if any) is also < 128 (and therefore only one code unit long in all
999
      code-unit widths), so we can use the fast lookup table. We checked above
1000
      that there is at least one character left in the subject. */
1001
1002
0
      if (fc < 128)
1003
0
        {
1004
0
        uint32_t cc = UCHAR21(Feptr);
1005
0
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1006
0
        Fecode++;
1007
0
        Feptr++;
1008
0
        }
1009
1010
      /* Otherwise we must pick up the subject character and use Unicode
1011
      property support to test its other case. Note that we cannot use the
1012
      value of "Flength" to check for sufficient bytes left, because the other
1013
      case of the character may have more or fewer code units. */
1014
1015
0
      else
1016
0
        {
1017
0
        uint32_t dc;
1018
0
        GETCHARINC(dc, Feptr);
1019
0
        Fecode += Flength;
1020
0
        if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1021
0
        }
1022
0
      }
1023
1024
    /* If UCP is set without UTF we must do the same as above, but with one
1025
    character per code unit. */
1026
1027
0
    else if (ucp)
1028
0
      {
1029
0
      uint32_t cc = UCHAR21(Feptr);
1030
0
      fc = Fecode[1];
1031
0
      if (fc < 128)
1032
0
        {
1033
0
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1034
0
        }
1035
0
      else
1036
0
        {
1037
0
        if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1038
0
        }
1039
0
      Feptr++;
1040
0
      Fecode += 2;
1041
0
      }
1042
1043
0
    else
1044
0
#endif   /* SUPPORT_UNICODE */
1045
1046
    /* Not UTF or UCP mode; use the table for characters < 256. */
1047
0
      {
1048
0
      if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
1049
0
          != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
1050
0
      Feptr++;
1051
0
      Fecode += 2;
1052
0
      }
1053
0
    break;
1054
1055
1056
    /* ===================================================================== */
1057
    /* Match not a single character. */
1058
1059
0
    case OP_NOT:
1060
0
    case OP_NOTI:
1061
0
    if (Feptr >= mb->end_subject)
1062
0
      {
1063
0
      SCHECK_PARTIAL();
1064
0
      RRETURN(MATCH_NOMATCH);
1065
0
      }
1066
1067
0
#ifdef SUPPORT_UNICODE
1068
0
    if (utf)
1069
0
      {
1070
0
      uint32_t ch;
1071
0
      Fecode++;
1072
0
      GETCHARINC(ch, Fecode);
1073
0
      GETCHARINC(fc, Feptr);
1074
0
      if (ch == fc)
1075
0
        {
1076
0
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1077
0
        }
1078
0
      else if (Fop == OP_NOTI)   /* If caseless */
1079
0
        {
1080
0
        if (ch > 127)
1081
0
          ch = UCD_OTHERCASE(ch);
1082
0
        else
1083
0
          ch = (mb->fcc)[ch];
1084
0
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1085
0
        }
1086
0
      }
1087
1088
    /* UCP without UTF is as above, but with one character per code unit. */
1089
1090
0
    else if (ucp)
1091
0
      {
1092
0
      uint32_t ch;
1093
0
      fc = UCHAR21INC(Feptr);
1094
0
      ch = Fecode[1];
1095
0
      Fecode += 2;
1096
1097
0
      if (ch == fc)
1098
0
        {
1099
0
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1100
0
        }
1101
0
      else if (Fop == OP_NOTI)   /* If caseless */
1102
0
        {
1103
0
        if (ch > 127)
1104
0
          ch = UCD_OTHERCASE(ch);
1105
0
        else
1106
0
          ch = (mb->fcc)[ch];
1107
0
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1108
0
        }
1109
0
      }
1110
1111
0
    else
1112
0
#endif  /* SUPPORT_UNICODE */
1113
1114
    /* Neither UTF nor UCP is set */
1115
1116
0
      {
1117
0
      uint32_t ch = Fecode[1];
1118
0
      fc = UCHAR21INC(Feptr);
1119
0
      if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
1120
0
        RRETURN(MATCH_NOMATCH);
1121
0
      Fecode += 2;
1122
0
      }
1123
0
    break;
1124
1125
1126
    /* ===================================================================== */
1127
    /* Match a single character repeatedly. */
1128
1129
0
#define Loclength    F->temp_size
1130
0
#define Lstart_eptr  F->temp_sptr[0]
1131
0
#define Lcharptr     F->temp_sptr[1]
1132
0
#define Lmin         F->temp_32[0]
1133
0
#define Lmax         F->temp_32[1]
1134
0
#define Lc           F->temp_32[2]
1135
0
#define Loc          F->temp_32[3]
1136
1137
0
    case OP_EXACT:
1138
0
    case OP_EXACTI:
1139
0
    Lmin = Lmax = GET2(Fecode, 1);
1140
0
    Fecode += 1 + IMM2_SIZE;
1141
0
    goto REPEATCHAR;
1142
1143
0
    case OP_POSUPTO:
1144
0
    case OP_POSUPTOI:
1145
0
    reptype = REPTYPE_POS;
1146
0
    Lmin = 0;
1147
0
    Lmax = GET2(Fecode, 1);
1148
0
    Fecode += 1 + IMM2_SIZE;
1149
0
    goto REPEATCHAR;
1150
1151
0
    case OP_UPTO:
1152
0
    case OP_UPTOI:
1153
0
    reptype = REPTYPE_MAX;
1154
0
    Lmin = 0;
1155
0
    Lmax = GET2(Fecode, 1);
1156
0
    Fecode += 1 + IMM2_SIZE;
1157
0
    goto REPEATCHAR;
1158
1159
0
    case OP_MINUPTO:
1160
0
    case OP_MINUPTOI:
1161
0
    reptype = REPTYPE_MIN;
1162
0
    Lmin = 0;
1163
0
    Lmax = GET2(Fecode, 1);
1164
0
    Fecode += 1 + IMM2_SIZE;
1165
0
    goto REPEATCHAR;
1166
1167
0
    case OP_POSSTAR:
1168
0
    case OP_POSSTARI:
1169
0
    reptype = REPTYPE_POS;
1170
0
    Lmin = 0;
1171
0
    Lmax = UINT32_MAX;
1172
0
    Fecode++;
1173
0
    goto REPEATCHAR;
1174
1175
0
    case OP_POSPLUS:
1176
0
    case OP_POSPLUSI:
1177
0
    reptype = REPTYPE_POS;
1178
0
    Lmin = 1;
1179
0
    Lmax = UINT32_MAX;
1180
0
    Fecode++;
1181
0
    goto REPEATCHAR;
1182
1183
0
    case OP_POSQUERY:
1184
0
    case OP_POSQUERYI:
1185
0
    reptype = REPTYPE_POS;
1186
0
    Lmin = 0;
1187
0
    Lmax = 1;
1188
0
    Fecode++;
1189
0
    goto REPEATCHAR;
1190
1191
0
    case OP_STAR:
1192
0
    case OP_STARI:
1193
0
    case OP_MINSTAR:
1194
0
    case OP_MINSTARI:
1195
0
    case OP_PLUS:
1196
0
    case OP_PLUSI:
1197
0
    case OP_MINPLUS:
1198
0
    case OP_MINPLUSI:
1199
0
    case OP_QUERY:
1200
0
    case OP_QUERYI:
1201
0
    case OP_MINQUERY:
1202
0
    case OP_MINQUERYI:
1203
0
    fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
1204
0
    Lmin = rep_min[fc];
1205
0
    Lmax = rep_max[fc];
1206
0
    reptype = rep_typ[fc];
1207
1208
    /* Common code for all repeated single-character matches. We first check
1209
    for the minimum number of characters. If the minimum equals the maximum, we
1210
    are done. Otherwise, if minimizing, check the rest of the pattern for a
1211
    match; if there isn't one, advance up to the maximum, one character at a
1212
    time.
1213
1214
    If maximizing, advance up to the maximum number of matching characters,
1215
    until Feptr is past the end of the maximum run. If possessive, we are
1216
    then done (no backing up). Otherwise, match at this position; anything
1217
    other than no match is immediately returned. For nomatch, back up one
1218
    character, unless we are matching \R and the last thing matched was
1219
    \r\n, in which case, back up two code units until we reach the first
1220
    optional character position.
1221
1222
    The various UTF/non-UTF and caseful/caseless cases are handled separately,
1223
    for speed. */
1224
1225
0
    REPEATCHAR:
1226
0
#ifdef SUPPORT_UNICODE
1227
0
    if (utf)
1228
0
      {
1229
0
      Flength = 1;
1230
0
      Lcharptr = Fecode;
1231
0
      GETCHARLEN(fc, Fecode, Flength);
1232
0
      Fecode += Flength;
1233
1234
      /* Handle multi-code-unit character matching, caseful and caseless. */
1235
1236
0
      if (Flength > 1)
1237
0
        {
1238
0
        uint32_t othercase;
1239
1240
0
        if (Fop >= OP_STARI &&     /* Caseless */
1241
0
            (othercase = UCD_OTHERCASE(fc)) != fc)
1242
0
          Loclength = PRIV(ord2utf)(othercase, Foccu);
1243
0
        else Loclength = 0;
1244
1245
0
        for (i = 1; i <= Lmin; i++)
1246
0
          {
1247
0
          if (Feptr <= mb->end_subject - Flength &&
1248
0
            memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1249
0
          else if (Loclength > 0 &&
1250
0
                   Feptr <= mb->end_subject - Loclength &&
1251
0
                   memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1252
0
            Feptr += Loclength;
1253
0
          else
1254
0
            {
1255
0
            CHECK_PARTIAL();
1256
0
            RRETURN(MATCH_NOMATCH);
1257
0
            }
1258
0
          }
1259
1260
0
        if (Lmin == Lmax) continue;
1261
1262
0
        if (reptype == REPTYPE_MIN)
1263
0
          {
1264
0
          for (;;)
1265
0
            {
1266
0
            RMATCH(Fecode, RM202);
1267
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1268
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1269
0
            if (Feptr <= mb->end_subject - Flength &&
1270
0
              memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1271
0
            else if (Loclength > 0 &&
1272
0
                     Feptr <= mb->end_subject - Loclength &&
1273
0
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1274
0
              Feptr += Loclength;
1275
0
            else
1276
0
              {
1277
0
              CHECK_PARTIAL();
1278
0
              RRETURN(MATCH_NOMATCH);
1279
0
              }
1280
0
            }
1281
          /* Control never gets here */
1282
0
          }
1283
1284
0
        else  /* Maximize */
1285
0
          {
1286
0
          Lstart_eptr = Feptr;
1287
0
          for (i = Lmin; i < Lmax; i++)
1288
0
            {
1289
0
            if (Feptr <= mb->end_subject - Flength &&
1290
0
                memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
1291
0
              Feptr += Flength;
1292
0
            else if (Loclength > 0 &&
1293
0
                     Feptr <= mb->end_subject - Loclength &&
1294
0
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1295
0
              Feptr += Loclength;
1296
0
            else
1297
0
              {
1298
0
              CHECK_PARTIAL();
1299
0
              break;
1300
0
              }
1301
0
            }
1302
1303
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1304
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1305
          go too far. */
1306
1307
0
          if (reptype != REPTYPE_POS) for(;;)
1308
0
            {
1309
0
            if (Feptr <= Lstart_eptr) break;
1310
0
            RMATCH(Fecode, RM203);
1311
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1312
0
            Feptr--;
1313
0
            BACKCHAR(Feptr);
1314
0
            }
1315
0
          }
1316
0
        break;   /* End of repeated wide character handling */
1317
0
        }
1318
1319
      /* Length of UTF character is 1. Put it into the preserved variable and
1320
      fall through to the non-UTF code. */
1321
1322
0
      Lc = fc;
1323
0
      }
1324
0
    else
1325
0
#endif  /* SUPPORT_UNICODE */
1326
1327
    /* When not in UTF mode, load a single-code-unit character. Then proceed as
1328
    above, using Unicode casing if either UTF or UCP is set. */
1329
1330
0
    Lc = *Fecode++;
1331
1332
    /* Caseless comparison */
1333
1334
0
    if (Fop >= OP_STARI)
1335
0
      {
1336
0
#if PCRE2_CODE_UNIT_WIDTH == 8
1337
0
#ifdef SUPPORT_UNICODE
1338
0
      if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1339
0
      else
1340
0
#endif  /* SUPPORT_UNICODE */
1341
      /* Lc will be < 128 in UTF-8 mode. */
1342
0
      Loc = mb->fcc[Lc];
1343
#else /* 16-bit & 32-bit */
1344
#ifdef SUPPORT_UNICODE
1345
      if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1346
      else
1347
#endif  /* SUPPORT_UNICODE */
1348
      Loc = TABLE_GET(Lc, mb->fcc, Lc);
1349
#endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
1350
1351
0
      for (i = 1; i <= Lmin; i++)
1352
0
        {
1353
0
        uint32_t cc;                 /* Faster than PCRE2_UCHAR */
1354
0
        if (Feptr >= mb->end_subject)
1355
0
          {
1356
0
          SCHECK_PARTIAL();
1357
0
          RRETURN(MATCH_NOMATCH);
1358
0
          }
1359
0
        cc = UCHAR21TEST(Feptr);
1360
0
        if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1361
0
        Feptr++;
1362
0
        }
1363
0
      if (Lmin == Lmax) continue;
1364
1365
0
      if (reptype == REPTYPE_MIN)
1366
0
        {
1367
0
        for (;;)
1368
0
          {
1369
0
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1370
0
          RMATCH(Fecode, RM25);
1371
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1372
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1373
0
          if (Feptr >= mb->end_subject)
1374
0
            {
1375
0
            SCHECK_PARTIAL();
1376
0
            RRETURN(MATCH_NOMATCH);
1377
0
            }
1378
0
          cc = UCHAR21TEST(Feptr);
1379
0
          if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1380
0
          Feptr++;
1381
0
          }
1382
        /* Control never gets here */
1383
0
        }
1384
1385
0
      else  /* Maximize */
1386
0
        {
1387
0
        Lstart_eptr = Feptr;
1388
0
        for (i = Lmin; i < Lmax; i++)
1389
0
          {
1390
0
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1391
0
          if (Feptr >= mb->end_subject)
1392
0
            {
1393
0
            SCHECK_PARTIAL();
1394
0
            break;
1395
0
            }
1396
0
          cc = UCHAR21TEST(Feptr);
1397
0
          if (Lc != cc && Loc != cc) break;
1398
0
          Feptr++;
1399
0
          }
1400
0
        if (reptype != REPTYPE_POS) for (;;)
1401
0
          {
1402
0
          if (Feptr == Lstart_eptr) break;
1403
0
          RMATCH(Fecode, RM26);
1404
0
          Feptr--;
1405
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1406
0
          }
1407
0
        }
1408
0
      }
1409
1410
    /* Caseful comparisons (includes all multi-byte characters) */
1411
1412
0
    else
1413
0
      {
1414
0
      for (i = 1; i <= Lmin; i++)
1415
0
        {
1416
0
        if (Feptr >= mb->end_subject)
1417
0
          {
1418
0
          SCHECK_PARTIAL();
1419
0
          RRETURN(MATCH_NOMATCH);
1420
0
          }
1421
0
        if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1422
0
        }
1423
1424
0
      if (Lmin == Lmax) continue;
1425
1426
0
      if (reptype == REPTYPE_MIN)
1427
0
        {
1428
0
        for (;;)
1429
0
          {
1430
0
          RMATCH(Fecode, RM27);
1431
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1432
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1433
0
          if (Feptr >= mb->end_subject)
1434
0
            {
1435
0
            SCHECK_PARTIAL();
1436
0
            RRETURN(MATCH_NOMATCH);
1437
0
            }
1438
0
          if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1439
0
          }
1440
        /* Control never gets here */
1441
0
        }
1442
0
      else  /* Maximize */
1443
0
        {
1444
0
        Lstart_eptr = Feptr;
1445
0
        for (i = Lmin; i < Lmax; i++)
1446
0
          {
1447
0
          if (Feptr >= mb->end_subject)
1448
0
            {
1449
0
            SCHECK_PARTIAL();
1450
0
            break;
1451
0
            }
1452
1453
0
          if (Lc != UCHAR21TEST(Feptr)) break;
1454
0
          Feptr++;
1455
0
          }
1456
1457
0
        if (reptype != REPTYPE_POS) for (;;)
1458
0
          {
1459
0
          if (Feptr <= Lstart_eptr) break;
1460
0
          RMATCH(Fecode, RM28);
1461
0
          Feptr--;
1462
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1463
0
          }
1464
0
        }
1465
0
      }
1466
0
    break;
1467
1468
0
#undef Loclength
1469
0
#undef Lstart_eptr
1470
0
#undef Lcharptr
1471
0
#undef Lmin
1472
0
#undef Lmax
1473
0
#undef Lc
1474
0
#undef Loc
1475
1476
1477
    /* ===================================================================== */
1478
    /* Match a negated single one-byte character repeatedly. This is almost a
1479
    repeat of the code for a repeated single character, but I haven't found a
1480
    nice way of commoning these up that doesn't require a test of the
1481
    positive/negative option for each character match. Maybe that wouldn't add
1482
    very much to the time taken, but character matching *is* what this is all
1483
    about... */
1484
1485
0
#define Lstart_eptr  F->temp_sptr[0]
1486
0
#define Lmin         F->temp_32[0]
1487
0
#define Lmax         F->temp_32[1]
1488
0
#define Lc           F->temp_32[2]
1489
0
#define Loc          F->temp_32[3]
1490
1491
0
    case OP_NOTEXACT:
1492
0
    case OP_NOTEXACTI:
1493
0
    Lmin = Lmax = GET2(Fecode, 1);
1494
0
    Fecode += 1 + IMM2_SIZE;
1495
0
    goto REPEATNOTCHAR;
1496
1497
0
    case OP_NOTUPTO:
1498
0
    case OP_NOTUPTOI:
1499
0
    Lmin = 0;
1500
0
    Lmax = GET2(Fecode, 1);
1501
0
    reptype = REPTYPE_MAX;
1502
0
    Fecode += 1 + IMM2_SIZE;
1503
0
    goto REPEATNOTCHAR;
1504
1505
0
    case OP_NOTMINUPTO:
1506
0
    case OP_NOTMINUPTOI:
1507
0
    Lmin = 0;
1508
0
    Lmax = GET2(Fecode, 1);
1509
0
    reptype = REPTYPE_MIN;
1510
0
    Fecode += 1 + IMM2_SIZE;
1511
0
    goto REPEATNOTCHAR;
1512
1513
0
    case OP_NOTPOSSTAR:
1514
0
    case OP_NOTPOSSTARI:
1515
0
    reptype = REPTYPE_POS;
1516
0
    Lmin = 0;
1517
0
    Lmax = UINT32_MAX;
1518
0
    Fecode++;
1519
0
    goto REPEATNOTCHAR;
1520
1521
0
    case OP_NOTPOSPLUS:
1522
0
    case OP_NOTPOSPLUSI:
1523
0
    reptype = REPTYPE_POS;
1524
0
    Lmin = 1;
1525
0
    Lmax = UINT32_MAX;
1526
0
    Fecode++;
1527
0
    goto REPEATNOTCHAR;
1528
1529
0
    case OP_NOTPOSQUERY:
1530
0
    case OP_NOTPOSQUERYI:
1531
0
    reptype = REPTYPE_POS;
1532
0
    Lmin = 0;
1533
0
    Lmax = 1;
1534
0
    Fecode++;
1535
0
    goto REPEATNOTCHAR;
1536
1537
0
    case OP_NOTPOSUPTO:
1538
0
    case OP_NOTPOSUPTOI:
1539
0
    reptype = REPTYPE_POS;
1540
0
    Lmin = 0;
1541
0
    Lmax = GET2(Fecode, 1);
1542
0
    Fecode += 1 + IMM2_SIZE;
1543
0
    goto REPEATNOTCHAR;
1544
1545
0
    case OP_NOTSTAR:
1546
0
    case OP_NOTSTARI:
1547
0
    case OP_NOTMINSTAR:
1548
0
    case OP_NOTMINSTARI:
1549
0
    case OP_NOTPLUS:
1550
0
    case OP_NOTPLUSI:
1551
0
    case OP_NOTMINPLUS:
1552
0
    case OP_NOTMINPLUSI:
1553
0
    case OP_NOTQUERY:
1554
0
    case OP_NOTQUERYI:
1555
0
    case OP_NOTMINQUERY:
1556
0
    case OP_NOTMINQUERYI:
1557
0
    fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1558
0
    Lmin = rep_min[fc];
1559
0
    Lmax = rep_max[fc];
1560
0
    reptype = rep_typ[fc];
1561
1562
    /* Common code for all repeated single-character non-matches. */
1563
1564
0
    REPEATNOTCHAR:
1565
0
    GETCHARINCTEST(Lc, Fecode);
1566
1567
    /* The code is duplicated for the caseless and caseful cases, for speed,
1568
    since matching characters is likely to be quite common. First, ensure the
1569
    minimum number of matches are present. If Lmin = Lmax, we are done.
1570
    Otherwise, if minimizing, keep trying the rest of the expression and
1571
    advancing one matching character if failing, up to the maximum.
1572
    Alternatively, if maximizing, find the maximum number of characters and
1573
    work backwards. */
1574
1575
0
    if (Fop >= OP_NOTSTARI)     /* Caseless */
1576
0
      {
1577
0
#ifdef SUPPORT_UNICODE
1578
0
      if ((utf || ucp) && Lc > 127)
1579
0
        Loc = UCD_OTHERCASE(Lc);
1580
0
      else
1581
0
#endif /* SUPPORT_UNICODE */
1582
1583
0
      Loc = TABLE_GET(Lc, mb->fcc, Lc);  /* Other case from table */
1584
1585
0
#ifdef SUPPORT_UNICODE
1586
0
      if (utf)
1587
0
        {
1588
0
        uint32_t d;
1589
0
        for (i = 1; i <= Lmin; i++)
1590
0
          {
1591
0
          if (Feptr >= mb->end_subject)
1592
0
            {
1593
0
            SCHECK_PARTIAL();
1594
0
            RRETURN(MATCH_NOMATCH);
1595
0
            }
1596
0
          GETCHARINC(d, Feptr);
1597
0
          if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1598
0
          }
1599
0
        }
1600
0
      else
1601
0
#endif  /* SUPPORT_UNICODE */
1602
1603
      /* Not UTF mode */
1604
0
        {
1605
0
        for (i = 1; i <= Lmin; i++)
1606
0
          {
1607
0
          if (Feptr >= mb->end_subject)
1608
0
            {
1609
0
            SCHECK_PARTIAL();
1610
0
            RRETURN(MATCH_NOMATCH);
1611
0
            }
1612
0
          if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1613
0
          Feptr++;
1614
0
          }
1615
0
        }
1616
1617
0
      if (Lmin == Lmax) continue;  /* Finished for exact count */
1618
1619
0
      if (reptype == REPTYPE_MIN)
1620
0
        {
1621
0
#ifdef SUPPORT_UNICODE
1622
0
        if (utf)
1623
0
          {
1624
0
          uint32_t d;
1625
0
          for (;;)
1626
0
            {
1627
0
            RMATCH(Fecode, RM204);
1628
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1629
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1630
0
            if (Feptr >= mb->end_subject)
1631
0
              {
1632
0
              SCHECK_PARTIAL();
1633
0
              RRETURN(MATCH_NOMATCH);
1634
0
              }
1635
0
            GETCHARINC(d, Feptr);
1636
0
            if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1637
0
            }
1638
0
          }
1639
0
        else
1640
0
#endif  /*SUPPORT_UNICODE */
1641
1642
        /* Not UTF mode */
1643
0
          {
1644
0
          for (;;)
1645
0
            {
1646
0
            RMATCH(Fecode, RM29);
1647
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1648
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1649
0
            if (Feptr >= mb->end_subject)
1650
0
              {
1651
0
              SCHECK_PARTIAL();
1652
0
              RRETURN(MATCH_NOMATCH);
1653
0
              }
1654
0
            if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1655
0
            Feptr++;
1656
0
            }
1657
0
          }
1658
        /* Control never gets here */
1659
0
        }
1660
1661
      /* Maximize case */
1662
1663
0
      else
1664
0
        {
1665
0
        Lstart_eptr = Feptr;
1666
1667
0
#ifdef SUPPORT_UNICODE
1668
0
        if (utf)
1669
0
          {
1670
0
          uint32_t d;
1671
0
          for (i = Lmin; i < Lmax; i++)
1672
0
            {
1673
0
            int len = 1;
1674
0
            if (Feptr >= mb->end_subject)
1675
0
              {
1676
0
              SCHECK_PARTIAL();
1677
0
              break;
1678
0
              }
1679
0
            GETCHARLEN(d, Feptr, len);
1680
0
            if (Lc == d || Loc == d) break;
1681
0
            Feptr += len;
1682
0
            }
1683
1684
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1685
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1686
          go too far. */
1687
1688
0
          if (reptype != REPTYPE_POS) for(;;)
1689
0
            {
1690
0
            if (Feptr <= Lstart_eptr) break;
1691
0
            RMATCH(Fecode, RM205);
1692
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1693
0
            Feptr--;
1694
0
            BACKCHAR(Feptr);
1695
0
            }
1696
0
          }
1697
0
        else
1698
0
#endif  /* SUPPORT_UNICODE */
1699
1700
        /* Not UTF mode */
1701
0
          {
1702
0
          for (i = Lmin; i < Lmax; i++)
1703
0
            {
1704
0
            if (Feptr >= mb->end_subject)
1705
0
              {
1706
0
              SCHECK_PARTIAL();
1707
0
              break;
1708
0
              }
1709
0
            if (Lc == *Feptr || Loc == *Feptr) break;
1710
0
            Feptr++;
1711
0
            }
1712
0
          if (reptype != REPTYPE_POS) for (;;)
1713
0
            {
1714
0
            if (Feptr == Lstart_eptr) break;
1715
0
            RMATCH(Fecode, RM30);
1716
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1717
0
            Feptr--;
1718
0
            }
1719
0
          }
1720
0
        }
1721
0
      }
1722
1723
    /* Caseful comparisons */
1724
1725
0
    else
1726
0
      {
1727
0
#ifdef SUPPORT_UNICODE
1728
0
      if (utf)
1729
0
        {
1730
0
        uint32_t d;
1731
0
        for (i = 1; i <= Lmin; i++)
1732
0
          {
1733
0
          if (Feptr >= mb->end_subject)
1734
0
            {
1735
0
            SCHECK_PARTIAL();
1736
0
            RRETURN(MATCH_NOMATCH);
1737
0
            }
1738
0
          GETCHARINC(d, Feptr);
1739
0
          if (Lc == d) RRETURN(MATCH_NOMATCH);
1740
0
          }
1741
0
        }
1742
0
      else
1743
0
#endif
1744
      /* Not UTF mode */
1745
0
        {
1746
0
        for (i = 1; i <= Lmin; i++)
1747
0
          {
1748
0
          if (Feptr >= mb->end_subject)
1749
0
            {
1750
0
            SCHECK_PARTIAL();
1751
0
            RRETURN(MATCH_NOMATCH);
1752
0
            }
1753
0
          if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1754
0
          }
1755
0
        }
1756
1757
0
      if (Lmin == Lmax) continue;
1758
1759
0
      if (reptype == REPTYPE_MIN)
1760
0
        {
1761
0
#ifdef SUPPORT_UNICODE
1762
0
        if (utf)
1763
0
          {
1764
0
          uint32_t d;
1765
0
          for (;;)
1766
0
            {
1767
0
            RMATCH(Fecode, RM206);
1768
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1769
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1770
0
            if (Feptr >= mb->end_subject)
1771
0
              {
1772
0
              SCHECK_PARTIAL();
1773
0
              RRETURN(MATCH_NOMATCH);
1774
0
              }
1775
0
            GETCHARINC(d, Feptr);
1776
0
            if (Lc == d) RRETURN(MATCH_NOMATCH);
1777
0
            }
1778
0
          }
1779
0
        else
1780
0
#endif
1781
        /* Not UTF mode */
1782
0
          {
1783
0
          for (;;)
1784
0
            {
1785
0
            RMATCH(Fecode, RM31);
1786
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1787
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1788
0
            if (Feptr >= mb->end_subject)
1789
0
              {
1790
0
              SCHECK_PARTIAL();
1791
0
              RRETURN(MATCH_NOMATCH);
1792
0
              }
1793
0
            if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1794
0
            }
1795
0
          }
1796
        /* Control never gets here */
1797
0
        }
1798
1799
      /* Maximize case */
1800
1801
0
      else
1802
0
        {
1803
0
        Lstart_eptr = Feptr;
1804
1805
0
#ifdef SUPPORT_UNICODE
1806
0
        if (utf)
1807
0
          {
1808
0
          uint32_t d;
1809
0
          for (i = Lmin; i < Lmax; i++)
1810
0
            {
1811
0
            int len = 1;
1812
0
            if (Feptr >= mb->end_subject)
1813
0
              {
1814
0
              SCHECK_PARTIAL();
1815
0
              break;
1816
0
              }
1817
0
            GETCHARLEN(d, Feptr, len);
1818
0
            if (Lc == d) break;
1819
0
            Feptr += len;
1820
0
            }
1821
1822
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1823
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1824
          go too far. */
1825
1826
0
          if (reptype != REPTYPE_POS) for(;;)
1827
0
            {
1828
0
            if (Feptr <= Lstart_eptr) break;
1829
0
            RMATCH(Fecode, RM207);
1830
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1831
0
            Feptr--;
1832
0
            BACKCHAR(Feptr);
1833
0
            }
1834
0
          }
1835
0
        else
1836
0
#endif
1837
        /* Not UTF mode */
1838
0
          {
1839
0
          for (i = Lmin; i < Lmax; i++)
1840
0
            {
1841
0
            if (Feptr >= mb->end_subject)
1842
0
              {
1843
0
              SCHECK_PARTIAL();
1844
0
              break;
1845
0
              }
1846
0
            if (Lc == *Feptr) break;
1847
0
            Feptr++;
1848
0
            }
1849
0
          if (reptype != REPTYPE_POS) for (;;)
1850
0
            {
1851
0
            if (Feptr == Lstart_eptr) break;
1852
0
            RMATCH(Fecode, RM32);
1853
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1854
0
            Feptr--;
1855
0
            }
1856
0
          }
1857
0
        }
1858
0
      }
1859
0
    break;
1860
1861
0
#undef Lstart_eptr
1862
0
#undef Lmin
1863
0
#undef Lmax
1864
0
#undef Lc
1865
0
#undef Loc
1866
1867
1868
    /* ===================================================================== */
1869
    /* Match a bit-mapped character class, possibly repeatedly. These opcodes
1870
    are used when all the characters in the class have values in the range
1871
    0-255, and either the matching is caseful, or the characters are in the
1872
    range 0-127 when UTF processing is enabled. The only difference between
1873
    OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1874
    encountered. */
1875
1876
0
#define Lmin               F->temp_32[0]
1877
0
#define Lmax               F->temp_32[1]
1878
0
#define Lstart_eptr        F->temp_sptr[0]
1879
0
#define Lbyte_map_address  F->temp_sptr[1]
1880
0
#define Lbyte_map          ((unsigned char *)Lbyte_map_address)
1881
1882
0
    case OP_NCLASS:
1883
0
    case OP_CLASS:
1884
0
      {
1885
0
      Lbyte_map_address = Fecode + 1;           /* Save for matching */
1886
0
      Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
1887
1888
      /* Look past the end of the item to see if there is repeat information
1889
      following. Then obey similar code to character type repeats. */
1890
1891
0
      switch (*Fecode)
1892
0
        {
1893
0
        case OP_CRSTAR:
1894
0
        case OP_CRMINSTAR:
1895
0
        case OP_CRPLUS:
1896
0
        case OP_CRMINPLUS:
1897
0
        case OP_CRQUERY:
1898
0
        case OP_CRMINQUERY:
1899
0
        case OP_CRPOSSTAR:
1900
0
        case OP_CRPOSPLUS:
1901
0
        case OP_CRPOSQUERY:
1902
0
        fc = *Fecode++ - OP_CRSTAR;
1903
0
        Lmin = rep_min[fc];
1904
0
        Lmax = rep_max[fc];
1905
0
        reptype = rep_typ[fc];
1906
0
        break;
1907
1908
0
        case OP_CRRANGE:
1909
0
        case OP_CRMINRANGE:
1910
0
        case OP_CRPOSRANGE:
1911
0
        Lmin = GET2(Fecode, 1);
1912
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
1913
0
        if (Lmax == 0) Lmax = UINT32_MAX;       /* Max 0 => infinity */
1914
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
1915
0
        Fecode += 1 + 2 * IMM2_SIZE;
1916
0
        break;
1917
1918
0
        default:               /* No repeat follows */
1919
0
        Lmin = Lmax = 1;
1920
0
        break;
1921
0
        }
1922
1923
      /* First, ensure the minimum number of matches are present. */
1924
1925
0
#ifdef SUPPORT_UNICODE
1926
0
      if (utf)
1927
0
        {
1928
0
        for (i = 1; i <= Lmin; i++)
1929
0
          {
1930
0
          if (Feptr >= mb->end_subject)
1931
0
            {
1932
0
            SCHECK_PARTIAL();
1933
0
            RRETURN(MATCH_NOMATCH);
1934
0
            }
1935
0
          GETCHARINC(fc, Feptr);
1936
0
          if (fc > 255)
1937
0
            {
1938
0
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1939
0
            }
1940
0
          else
1941
0
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1942
0
          }
1943
0
        }
1944
0
      else
1945
0
#endif
1946
      /* Not UTF mode */
1947
0
        {
1948
0
        for (i = 1; i <= Lmin; i++)
1949
0
          {
1950
0
          if (Feptr >= mb->end_subject)
1951
0
            {
1952
0
            SCHECK_PARTIAL();
1953
0
            RRETURN(MATCH_NOMATCH);
1954
0
            }
1955
0
          fc = *Feptr++;
1956
#if PCRE2_CODE_UNIT_WIDTH != 8
1957
          if (fc > 255)
1958
            {
1959
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1960
            }
1961
          else
1962
#endif
1963
0
          if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1964
0
          }
1965
0
        }
1966
1967
      /* If Lmax == Lmin we are done. Continue with main loop. */
1968
1969
0
      if (Lmin == Lmax) continue;
1970
1971
      /* If minimizing, keep testing the rest of the expression and advancing
1972
      the pointer while it matches the class. */
1973
1974
0
      if (reptype == REPTYPE_MIN)
1975
0
        {
1976
0
#ifdef SUPPORT_UNICODE
1977
0
        if (utf)
1978
0
          {
1979
0
          for (;;)
1980
0
            {
1981
0
            RMATCH(Fecode, RM200);
1982
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1983
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1984
0
            if (Feptr >= mb->end_subject)
1985
0
              {
1986
0
              SCHECK_PARTIAL();
1987
0
              RRETURN(MATCH_NOMATCH);
1988
0
              }
1989
0
            GETCHARINC(fc, Feptr);
1990
0
            if (fc > 255)
1991
0
              {
1992
0
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
1993
0
              }
1994
0
            else
1995
0
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
1996
0
            }
1997
0
          }
1998
0
        else
1999
0
#endif
2000
        /* Not UTF mode */
2001
0
          {
2002
0
          for (;;)
2003
0
            {
2004
0
            RMATCH(Fecode, RM23);
2005
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2006
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2007
0
            if (Feptr >= mb->end_subject)
2008
0
              {
2009
0
              SCHECK_PARTIAL();
2010
0
              RRETURN(MATCH_NOMATCH);
2011
0
              }
2012
0
            fc = *Feptr++;
2013
#if PCRE2_CODE_UNIT_WIDTH != 8
2014
            if (fc > 255)
2015
              {
2016
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2017
              }
2018
            else
2019
#endif
2020
0
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2021
0
            }
2022
0
          }
2023
        /* Control never gets here */
2024
0
        }
2025
2026
      /* If maximizing, find the longest possible run, then work backwards. */
2027
2028
0
      else
2029
0
        {
2030
0
        Lstart_eptr = Feptr;
2031
2032
0
#ifdef SUPPORT_UNICODE
2033
0
        if (utf)
2034
0
          {
2035
0
          for (i = Lmin; i < Lmax; i++)
2036
0
            {
2037
0
            int len = 1;
2038
0
            if (Feptr >= mb->end_subject)
2039
0
              {
2040
0
              SCHECK_PARTIAL();
2041
0
              break;
2042
0
              }
2043
0
            GETCHARLEN(fc, Feptr, len);
2044
0
            if (fc > 255)
2045
0
              {
2046
0
              if (Fop == OP_CLASS) break;
2047
0
              }
2048
0
            else
2049
0
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2050
0
            Feptr += len;
2051
0
            }
2052
2053
0
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2054
2055
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2056
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2057
          go too far. */
2058
2059
0
          for (;;)
2060
0
            {
2061
0
            RMATCH(Fecode, RM201);
2062
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2063
0
            if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2064
0
            BACKCHAR(Feptr);
2065
0
            }
2066
0
          }
2067
0
        else
2068
0
#endif
2069
          /* Not UTF mode */
2070
0
          {
2071
0
          for (i = Lmin; i < Lmax; i++)
2072
0
            {
2073
0
            if (Feptr >= mb->end_subject)
2074
0
              {
2075
0
              SCHECK_PARTIAL();
2076
0
              break;
2077
0
              }
2078
0
            fc = *Feptr;
2079
#if PCRE2_CODE_UNIT_WIDTH != 8
2080
            if (fc > 255)
2081
              {
2082
              if (Fop == OP_CLASS) break;
2083
              }
2084
            else
2085
#endif
2086
0
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2087
0
            Feptr++;
2088
0
            }
2089
2090
0
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2091
2092
0
          while (Feptr >= Lstart_eptr)
2093
0
            {
2094
0
            RMATCH(Fecode, RM24);
2095
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2096
0
            Feptr--;
2097
0
            }
2098
0
          }
2099
2100
0
        RRETURN(MATCH_NOMATCH);
2101
0
        }
2102
0
      }
2103
    /* Control never gets here */
2104
2105
0
#undef Lbyte_map_address
2106
0
#undef Lbyte_map
2107
0
#undef Lstart_eptr
2108
0
#undef Lmin
2109
0
#undef Lmax
2110
2111
2112
    /* ===================================================================== */
2113
    /* Match an extended character class. In the 8-bit library, this opcode is
2114
    encountered only when UTF-8 mode mode is supported. In the 16-bit and
2115
    32-bit libraries, codepoints greater than 255 may be encountered even when
2116
    UTF is not supported. */
2117
2118
0
#define Lstart_eptr  F->temp_sptr[0]
2119
0
#define Lxclass_data F->temp_sptr[1]
2120
0
#define Lmin         F->temp_32[0]
2121
0
#define Lmax         F->temp_32[1]
2122
2123
0
#ifdef SUPPORT_WIDE_CHARS
2124
0
    case OP_XCLASS:
2125
0
      {
2126
0
      Lxclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2127
0
      Fecode += GET(Fecode, 1);               /* Advance past the item */
2128
2129
0
      switch (*Fecode)
2130
0
        {
2131
0
        case OP_CRSTAR:
2132
0
        case OP_CRMINSTAR:
2133
0
        case OP_CRPLUS:
2134
0
        case OP_CRMINPLUS:
2135
0
        case OP_CRQUERY:
2136
0
        case OP_CRMINQUERY:
2137
0
        case OP_CRPOSSTAR:
2138
0
        case OP_CRPOSPLUS:
2139
0
        case OP_CRPOSQUERY:
2140
0
        fc = *Fecode++ - OP_CRSTAR;
2141
0
        Lmin = rep_min[fc];
2142
0
        Lmax = rep_max[fc];
2143
0
        reptype = rep_typ[fc];
2144
0
        break;
2145
2146
0
        case OP_CRRANGE:
2147
0
        case OP_CRMINRANGE:
2148
0
        case OP_CRPOSRANGE:
2149
0
        Lmin = GET2(Fecode, 1);
2150
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2151
0
        if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2152
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
2153
0
        Fecode += 1 + 2 * IMM2_SIZE;
2154
0
        break;
2155
2156
0
        default:               /* No repeat follows */
2157
0
        Lmin = Lmax = 1;
2158
0
        break;
2159
0
        }
2160
2161
      /* First, ensure the minimum number of matches are present. */
2162
2163
0
      for (i = 1; i <= Lmin; i++)
2164
0
        {
2165
0
        if (Feptr >= mb->end_subject)
2166
0
          {
2167
0
          SCHECK_PARTIAL();
2168
0
          RRETURN(MATCH_NOMATCH);
2169
0
          }
2170
0
        GETCHARINCTEST(fc, Feptr);
2171
0
        if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2172
0
        }
2173
2174
      /* If Lmax == Lmin we can just continue with the main loop. */
2175
2176
0
      if (Lmin == Lmax) continue;
2177
2178
      /* If minimizing, keep testing the rest of the expression and advancing
2179
      the pointer while it matches the class. */
2180
2181
0
      if (reptype == REPTYPE_MIN)
2182
0
        {
2183
0
        for (;;)
2184
0
          {
2185
0
          RMATCH(Fecode, RM100);
2186
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2187
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2188
0
          if (Feptr >= mb->end_subject)
2189
0
            {
2190
0
            SCHECK_PARTIAL();
2191
0
            RRETURN(MATCH_NOMATCH);
2192
0
            }
2193
0
          GETCHARINCTEST(fc, Feptr);
2194
0
          if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
2195
0
          }
2196
        /* Control never gets here */
2197
0
        }
2198
2199
      /* If maximizing, find the longest possible run, then work backwards. */
2200
2201
0
      else
2202
0
        {
2203
0
        Lstart_eptr = Feptr;
2204
0
        for (i = Lmin; i < Lmax; i++)
2205
0
          {
2206
0
          int len = 1;
2207
0
          if (Feptr >= mb->end_subject)
2208
0
            {
2209
0
            SCHECK_PARTIAL();
2210
0
            break;
2211
0
            }
2212
0
#ifdef SUPPORT_UNICODE
2213
0
          GETCHARLENTEST(fc, Feptr, len);
2214
#else
2215
          fc = *Feptr;
2216
#endif
2217
0
          if (!PRIV(xclass)(fc, Lxclass_data, utf)) break;
2218
0
          Feptr += len;
2219
0
          }
2220
2221
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2222
2223
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2224
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2225
        go too far. */
2226
2227
0
        for(;;)
2228
0
          {
2229
0
          RMATCH(Fecode, RM101);
2230
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2231
0
          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2232
0
#ifdef SUPPORT_UNICODE
2233
0
          if (utf) BACKCHAR(Feptr);
2234
0
#endif
2235
0
          }
2236
0
        RRETURN(MATCH_NOMATCH);
2237
0
        }
2238
2239
      /* Control never gets here */
2240
0
      }
2241
0
#endif  /* SUPPORT_WIDE_CHARS: end of XCLASS */
2242
2243
0
#undef Lstart_eptr
2244
0
#undef Lxclass_data
2245
0
#undef Lmin
2246
0
#undef Lmax
2247
2248
2249
    /* ===================================================================== */
2250
    /* Match various character types when PCRE2_UCP is not set. These opcodes
2251
    are not generated when PCRE2_UCP is set - instead appropriate property
2252
    tests are compiled. */
2253
2254
0
    case OP_NOT_DIGIT:
2255
0
    if (Feptr >= mb->end_subject)
2256
0
      {
2257
0
      SCHECK_PARTIAL();
2258
0
      RRETURN(MATCH_NOMATCH);
2259
0
      }
2260
0
    GETCHARINCTEST(fc, Feptr);
2261
0
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
2262
0
      RRETURN(MATCH_NOMATCH);
2263
0
    Fecode++;
2264
0
    break;
2265
2266
0
    case OP_DIGIT:
2267
0
    if (Feptr >= mb->end_subject)
2268
0
      {
2269
0
      SCHECK_PARTIAL();
2270
0
      RRETURN(MATCH_NOMATCH);
2271
0
      }
2272
0
    GETCHARINCTEST(fc, Feptr);
2273
0
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
2274
0
      RRETURN(MATCH_NOMATCH);
2275
0
    Fecode++;
2276
0
    break;
2277
2278
0
    case OP_NOT_WHITESPACE:
2279
0
    if (Feptr >= mb->end_subject)
2280
0
      {
2281
0
      SCHECK_PARTIAL();
2282
0
      RRETURN(MATCH_NOMATCH);
2283
0
      }
2284
0
    GETCHARINCTEST(fc, Feptr);
2285
0
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
2286
0
      RRETURN(MATCH_NOMATCH);
2287
0
    Fecode++;
2288
0
    break;
2289
2290
0
    case OP_WHITESPACE:
2291
0
    if (Feptr >= mb->end_subject)
2292
0
      {
2293
0
      SCHECK_PARTIAL();
2294
0
      RRETURN(MATCH_NOMATCH);
2295
0
      }
2296
0
    GETCHARINCTEST(fc, Feptr);
2297
0
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
2298
0
      RRETURN(MATCH_NOMATCH);
2299
0
    Fecode++;
2300
0
    break;
2301
2302
0
    case OP_NOT_WORDCHAR:
2303
0
    if (Feptr >= mb->end_subject)
2304
0
      {
2305
0
      SCHECK_PARTIAL();
2306
0
      RRETURN(MATCH_NOMATCH);
2307
0
      }
2308
0
    GETCHARINCTEST(fc, Feptr);
2309
0
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
2310
0
      RRETURN(MATCH_NOMATCH);
2311
0
    Fecode++;
2312
0
    break;
2313
2314
0
    case OP_WORDCHAR:
2315
0
    if (Feptr >= mb->end_subject)
2316
0
      {
2317
0
      SCHECK_PARTIAL();
2318
0
      RRETURN(MATCH_NOMATCH);
2319
0
      }
2320
0
    GETCHARINCTEST(fc, Feptr);
2321
0
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
2322
0
      RRETURN(MATCH_NOMATCH);
2323
0
    Fecode++;
2324
0
    break;
2325
2326
0
    case OP_ANYNL:
2327
0
    if (Feptr >= mb->end_subject)
2328
0
      {
2329
0
      SCHECK_PARTIAL();
2330
0
      RRETURN(MATCH_NOMATCH);
2331
0
      }
2332
0
    GETCHARINCTEST(fc, Feptr);
2333
0
    switch(fc)
2334
0
      {
2335
0
      default: RRETURN(MATCH_NOMATCH);
2336
2337
0
      case CHAR_CR:
2338
0
      if (Feptr >= mb->end_subject)
2339
0
        {
2340
0
        SCHECK_PARTIAL();
2341
0
        }
2342
0
      else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
2343
0
      break;
2344
2345
0
      case CHAR_LF:
2346
0
      break;
2347
2348
0
      case CHAR_VT:
2349
0
      case CHAR_FF:
2350
0
      case CHAR_NEL:
2351
0
#ifndef EBCDIC
2352
0
      case 0x2028:
2353
0
      case 0x2029:
2354
0
#endif  /* Not EBCDIC */
2355
0
      if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2356
0
      break;
2357
0
      }
2358
0
    Fecode++;
2359
0
    break;
2360
2361
0
    case OP_NOT_HSPACE:
2362
0
    if (Feptr >= mb->end_subject)
2363
0
      {
2364
0
      SCHECK_PARTIAL();
2365
0
      RRETURN(MATCH_NOMATCH);
2366
0
      }
2367
0
    GETCHARINCTEST(fc, Feptr);
2368
0
    switch(fc)
2369
0
      {
2370
0
      HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2371
0
      default: break;
2372
0
      }
2373
0
    Fecode++;
2374
0
    break;
2375
2376
0
    case OP_HSPACE:
2377
0
    if (Feptr >= mb->end_subject)
2378
0
      {
2379
0
      SCHECK_PARTIAL();
2380
0
      RRETURN(MATCH_NOMATCH);
2381
0
      }
2382
0
    GETCHARINCTEST(fc, Feptr);
2383
0
    switch(fc)
2384
0
      {
2385
0
      HSPACE_CASES: break;  /* Byte and multibyte cases */
2386
0
      default: RRETURN(MATCH_NOMATCH);
2387
0
      }
2388
0
    Fecode++;
2389
0
    break;
2390
2391
0
    case OP_NOT_VSPACE:
2392
0
    if (Feptr >= mb->end_subject)
2393
0
      {
2394
0
      SCHECK_PARTIAL();
2395
0
      RRETURN(MATCH_NOMATCH);
2396
0
      }
2397
0
    GETCHARINCTEST(fc, Feptr);
2398
0
    switch(fc)
2399
0
      {
2400
0
      VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2401
0
      default: break;
2402
0
      }
2403
0
    Fecode++;
2404
0
    break;
2405
2406
0
    case OP_VSPACE:
2407
0
    if (Feptr >= mb->end_subject)
2408
0
      {
2409
0
      SCHECK_PARTIAL();
2410
0
      RRETURN(MATCH_NOMATCH);
2411
0
      }
2412
0
    GETCHARINCTEST(fc, Feptr);
2413
0
    switch(fc)
2414
0
      {
2415
0
      VSPACE_CASES: break;
2416
0
      default: RRETURN(MATCH_NOMATCH);
2417
0
      }
2418
0
    Fecode++;
2419
0
    break;
2420
2421
2422
0
#ifdef SUPPORT_UNICODE
2423
2424
    /* ===================================================================== */
2425
    /* Check the next character by Unicode property. We will get here only
2426
    if the support is in the binary; otherwise a compile-time error occurs. */
2427
2428
0
    case OP_PROP:
2429
0
    case OP_NOTPROP:
2430
0
    if (Feptr >= mb->end_subject)
2431
0
      {
2432
0
      SCHECK_PARTIAL();
2433
0
      RRETURN(MATCH_NOMATCH);
2434
0
      }
2435
0
    GETCHARINCTEST(fc, Feptr);
2436
0
      {
2437
0
      const uint32_t *cp;
2438
0
      const ucd_record *prop = GET_UCD(fc);
2439
0
      BOOL notmatch = Fop == OP_NOTPROP;
2440
2441
0
      switch(Fecode[1])
2442
0
        {
2443
0
        case PT_ANY:
2444
0
        if (notmatch) RRETURN(MATCH_NOMATCH);
2445
0
        break;
2446
2447
0
        case PT_LAMP:
2448
0
        if ((prop->chartype == ucp_Lu ||
2449
0
             prop->chartype == ucp_Ll ||
2450
0
             prop->chartype == ucp_Lt) == notmatch)
2451
0
          RRETURN(MATCH_NOMATCH);
2452
0
        break;
2453
2454
0
        case PT_GC:
2455
0
        if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch)
2456
0
          RRETURN(MATCH_NOMATCH);
2457
0
        break;
2458
2459
0
        case PT_PC:
2460
0
        if ((Fecode[2] == prop->chartype) == notmatch)
2461
0
          RRETURN(MATCH_NOMATCH);
2462
0
        break;
2463
2464
0
        case PT_SC:
2465
0
        if ((Fecode[2] == prop->script) == notmatch)
2466
0
          RRETURN(MATCH_NOMATCH);
2467
0
        break;
2468
2469
0
        case PT_SCX:
2470
0
          {
2471
0
          BOOL ok = (Fecode[2] == prop->script ||
2472
0
                     MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
2473
0
          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2474
0
          }
2475
0
        break;
2476
2477
        /* These are specials */
2478
2479
0
        case PT_ALNUM:
2480
0
        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2481
0
             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == notmatch)
2482
0
          RRETURN(MATCH_NOMATCH);
2483
0
        break;
2484
2485
        /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2486
        which means that Perl space and POSIX space are now identical. PCRE
2487
        was changed at release 8.34. */
2488
2489
0
        case PT_SPACE:    /* Perl space */
2490
0
        case PT_PXSPACE:  /* POSIX space */
2491
0
        switch(fc)
2492
0
          {
2493
0
          HSPACE_CASES:
2494
0
          VSPACE_CASES:
2495
0
          if (notmatch) RRETURN(MATCH_NOMATCH);
2496
0
          break;
2497
2498
0
          default:
2499
0
          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch)
2500
0
            RRETURN(MATCH_NOMATCH);
2501
0
          break;
2502
0
          }
2503
0
        break;
2504
2505
0
        case PT_WORD:
2506
0
        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2507
0
             PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2508
0
             fc == CHAR_UNDERSCORE) == notmatch)
2509
0
          RRETURN(MATCH_NOMATCH);
2510
0
        break;
2511
2512
0
        case PT_CLIST:
2513
0
        cp = PRIV(ucd_caseless_sets) + Fecode[2];
2514
0
        for (;;)
2515
0
          {
2516
0
          if (fc < *cp)
2517
0
            { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } }
2518
0
          if (fc == *cp++)
2519
0
            { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; }
2520
0
          }
2521
0
        break;
2522
2523
0
        case PT_UCNC:
2524
0
        if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2525
0
             fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2526
0
             fc >= 0xe000) == notmatch)
2527
0
          RRETURN(MATCH_NOMATCH);
2528
0
        break;
2529
2530
0
        case PT_BIDICL:
2531
0
        if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
2532
0
          RRETURN(MATCH_NOMATCH);
2533
0
        break;
2534
2535
0
        case PT_BOOL:
2536
0
          {
2537
0
          BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2538
0
            UCD_BPROPS_PROP(prop), Fecode[2]) != 0;
2539
0
          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2540
0
          }
2541
0
        break;
2542
2543
        /* This should never occur */
2544
2545
0
        default:
2546
0
        return PCRE2_ERROR_INTERNAL;
2547
0
        }
2548
2549
0
      Fecode += 3;
2550
0
      }
2551
0
    break;
2552
2553
2554
    /* ===================================================================== */
2555
    /* Match an extended Unicode sequence. We will get here only if the support
2556
    is in the binary; otherwise a compile-time error occurs. */
2557
2558
0
    case OP_EXTUNI:
2559
0
    if (Feptr >= mb->end_subject)
2560
0
      {
2561
0
      SCHECK_PARTIAL();
2562
0
      RRETURN(MATCH_NOMATCH);
2563
0
      }
2564
0
    else
2565
0
      {
2566
0
      GETCHARINCTEST(fc, Feptr);
2567
0
      Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
2568
0
        NULL);
2569
0
      }
2570
0
    CHECK_PARTIAL();
2571
0
    Fecode++;
2572
0
    break;
2573
2574
0
#endif  /* SUPPORT_UNICODE */
2575
2576
2577
    /* ===================================================================== */
2578
    /* Match a single character type repeatedly. Note that the property type
2579
    does not need to be in a stack frame as it is not used within an RMATCH()
2580
    loop. */
2581
2582
0
#define Lstart_eptr  F->temp_sptr[0]
2583
0
#define Lmin         F->temp_32[0]
2584
0
#define Lmax         F->temp_32[1]
2585
0
#define Lctype       F->temp_32[2]
2586
0
#define Lpropvalue   F->temp_32[3]
2587
2588
0
    case OP_TYPEEXACT:
2589
0
    Lmin = Lmax = GET2(Fecode, 1);
2590
0
    Fecode += 1 + IMM2_SIZE;
2591
0
    goto REPEATTYPE;
2592
2593
0
    case OP_TYPEUPTO:
2594
0
    case OP_TYPEMINUPTO:
2595
0
    Lmin = 0;
2596
0
    Lmax = GET2(Fecode, 1);
2597
0
    reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
2598
0
    Fecode += 1 + IMM2_SIZE;
2599
0
    goto REPEATTYPE;
2600
2601
0
    case OP_TYPEPOSSTAR:
2602
0
    reptype = REPTYPE_POS;
2603
0
    Lmin = 0;
2604
0
    Lmax = UINT32_MAX;
2605
0
    Fecode++;
2606
0
    goto REPEATTYPE;
2607
2608
0
    case OP_TYPEPOSPLUS:
2609
0
    reptype = REPTYPE_POS;
2610
0
    Lmin = 1;
2611
0
    Lmax = UINT32_MAX;
2612
0
    Fecode++;
2613
0
    goto REPEATTYPE;
2614
2615
0
    case OP_TYPEPOSQUERY:
2616
0
    reptype = REPTYPE_POS;
2617
0
    Lmin = 0;
2618
0
    Lmax = 1;
2619
0
    Fecode++;
2620
0
    goto REPEATTYPE;
2621
2622
0
    case OP_TYPEPOSUPTO:
2623
0
    reptype = REPTYPE_POS;
2624
0
    Lmin = 0;
2625
0
    Lmax = GET2(Fecode, 1);
2626
0
    Fecode += 1 + IMM2_SIZE;
2627
0
    goto REPEATTYPE;
2628
2629
0
    case OP_TYPESTAR:
2630
0
    case OP_TYPEMINSTAR:
2631
0
    case OP_TYPEPLUS:
2632
0
    case OP_TYPEMINPLUS:
2633
0
    case OP_TYPEQUERY:
2634
0
    case OP_TYPEMINQUERY:
2635
0
    fc = *Fecode++ - OP_TYPESTAR;
2636
0
    Lmin = rep_min[fc];
2637
0
    Lmax = rep_max[fc];
2638
0
    reptype = rep_typ[fc];
2639
2640
    /* Common code for all repeated character type matches. */
2641
2642
0
    REPEATTYPE:
2643
0
    Lctype = *Fecode++;      /* Code for the character type */
2644
2645
0
#ifdef SUPPORT_UNICODE
2646
0
    if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
2647
0
      {
2648
0
      proptype = *Fecode++;
2649
0
      Lpropvalue = *Fecode++;
2650
0
      }
2651
0
    else proptype = -1;
2652
0
#endif
2653
2654
    /* First, ensure the minimum number of matches are present. Use inline
2655
    code for maximizing the speed, and do the type test once at the start
2656
    (i.e. keep it out of the loops). As there are no calls to RMATCH in the
2657
    loops, we can use an ordinary variable for "notmatch". The code for UTF
2658
    mode is separated out for tidiness, except for Unicode property tests. */
2659
2660
0
    if (Lmin > 0)
2661
0
      {
2662
0
#ifdef SUPPORT_UNICODE
2663
0
      if (proptype >= 0)  /* Property tests in all modes */
2664
0
        {
2665
0
        BOOL notmatch = Lctype == OP_NOTPROP;
2666
0
        switch(proptype)
2667
0
          {
2668
0
          case PT_ANY:
2669
0
          if (notmatch) RRETURN(MATCH_NOMATCH);
2670
0
          for (i = 1; i <= Lmin; i++)
2671
0
            {
2672
0
            if (Feptr >= mb->end_subject)
2673
0
              {
2674
0
              SCHECK_PARTIAL();
2675
0
              RRETURN(MATCH_NOMATCH);
2676
0
              }
2677
0
            GETCHARINCTEST(fc, Feptr);
2678
0
            }
2679
0
          break;
2680
2681
0
          case PT_LAMP:
2682
0
          for (i = 1; i <= Lmin; i++)
2683
0
            {
2684
0
            int chartype;
2685
0
            if (Feptr >= mb->end_subject)
2686
0
              {
2687
0
              SCHECK_PARTIAL();
2688
0
              RRETURN(MATCH_NOMATCH);
2689
0
              }
2690
0
            GETCHARINCTEST(fc, Feptr);
2691
0
            chartype = UCD_CHARTYPE(fc);
2692
0
            if ((chartype == ucp_Lu ||
2693
0
                 chartype == ucp_Ll ||
2694
0
                 chartype == ucp_Lt) == notmatch)
2695
0
              RRETURN(MATCH_NOMATCH);
2696
0
            }
2697
0
          break;
2698
2699
0
          case PT_GC:
2700
0
          for (i = 1; i <= Lmin; i++)
2701
0
            {
2702
0
            if (Feptr >= mb->end_subject)
2703
0
              {
2704
0
              SCHECK_PARTIAL();
2705
0
              RRETURN(MATCH_NOMATCH);
2706
0
              }
2707
0
            GETCHARINCTEST(fc, Feptr);
2708
0
            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch)
2709
0
              RRETURN(MATCH_NOMATCH);
2710
0
            }
2711
0
          break;
2712
2713
0
          case PT_PC:
2714
0
          for (i = 1; i <= Lmin; i++)
2715
0
            {
2716
0
            if (Feptr >= mb->end_subject)
2717
0
              {
2718
0
              SCHECK_PARTIAL();
2719
0
              RRETURN(MATCH_NOMATCH);
2720
0
              }
2721
0
            GETCHARINCTEST(fc, Feptr);
2722
0
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch)
2723
0
              RRETURN(MATCH_NOMATCH);
2724
0
            }
2725
0
          break;
2726
2727
0
          case PT_SC:
2728
0
          for (i = 1; i <= Lmin; i++)
2729
0
            {
2730
0
            if (Feptr >= mb->end_subject)
2731
0
              {
2732
0
              SCHECK_PARTIAL();
2733
0
              RRETURN(MATCH_NOMATCH);
2734
0
              }
2735
0
            GETCHARINCTEST(fc, Feptr);
2736
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch)
2737
0
              RRETURN(MATCH_NOMATCH);
2738
0
            }
2739
0
          break;
2740
2741
0
          case PT_SCX:
2742
0
          for (i = 1; i <= Lmin; i++)
2743
0
            {
2744
0
            BOOL ok;
2745
0
            const ucd_record *prop;
2746
0
            if (Feptr >= mb->end_subject)
2747
0
              {
2748
0
              SCHECK_PARTIAL();
2749
0
              RRETURN(MATCH_NOMATCH);
2750
0
              }
2751
0
            GETCHARINCTEST(fc, Feptr);
2752
0
            prop = GET_UCD(fc);
2753
0
            ok = (prop->script == Lpropvalue ||
2754
0
                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
2755
0
            if (ok == notmatch)
2756
0
              RRETURN(MATCH_NOMATCH);
2757
0
            }
2758
0
          break;
2759
2760
0
          case PT_ALNUM:
2761
0
          for (i = 1; i <= Lmin; i++)
2762
0
            {
2763
0
            int category;
2764
0
            if (Feptr >= mb->end_subject)
2765
0
              {
2766
0
              SCHECK_PARTIAL();
2767
0
              RRETURN(MATCH_NOMATCH);
2768
0
              }
2769
0
            GETCHARINCTEST(fc, Feptr);
2770
0
            category = UCD_CATEGORY(fc);
2771
0
            if ((category == ucp_L || category == ucp_N) == notmatch)
2772
0
              RRETURN(MATCH_NOMATCH);
2773
0
            }
2774
0
          break;
2775
2776
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2777
          which means that Perl space and POSIX space are now identical. PCRE
2778
          was changed at release 8.34. */
2779
2780
0
          case PT_SPACE:    /* Perl space */
2781
0
          case PT_PXSPACE:  /* POSIX space */
2782
0
          for (i = 1; i <= Lmin; i++)
2783
0
            {
2784
0
            if (Feptr >= mb->end_subject)
2785
0
              {
2786
0
              SCHECK_PARTIAL();
2787
0
              RRETURN(MATCH_NOMATCH);
2788
0
              }
2789
0
            GETCHARINCTEST(fc, Feptr);
2790
0
            switch(fc)
2791
0
              {
2792
0
              HSPACE_CASES:
2793
0
              VSPACE_CASES:
2794
0
              if (notmatch) RRETURN(MATCH_NOMATCH);
2795
0
              break;
2796
2797
0
              default:
2798
0
              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
2799
0
                RRETURN(MATCH_NOMATCH);
2800
0
              break;
2801
0
              }
2802
0
            }
2803
0
          break;
2804
2805
0
          case PT_WORD:
2806
0
          for (i = 1; i <= Lmin; i++)
2807
0
            {
2808
0
            int category;
2809
0
            if (Feptr >= mb->end_subject)
2810
0
              {
2811
0
              SCHECK_PARTIAL();
2812
0
              RRETURN(MATCH_NOMATCH);
2813
0
              }
2814
0
            GETCHARINCTEST(fc, Feptr);
2815
0
            category = UCD_CATEGORY(fc);
2816
0
            if ((category == ucp_L || category == ucp_N ||
2817
0
                fc == CHAR_UNDERSCORE) == notmatch)
2818
0
              RRETURN(MATCH_NOMATCH);
2819
0
            }
2820
0
          break;
2821
2822
0
          case PT_CLIST:
2823
0
          for (i = 1; i <= Lmin; i++)
2824
0
            {
2825
0
            const uint32_t *cp;
2826
0
            if (Feptr >= mb->end_subject)
2827
0
              {
2828
0
              SCHECK_PARTIAL();
2829
0
              RRETURN(MATCH_NOMATCH);
2830
0
              }
2831
0
            GETCHARINCTEST(fc, Feptr);
2832
0
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
2833
0
            for (;;)
2834
0
              {
2835
0
              if (fc < *cp)
2836
0
                {
2837
0
                if (notmatch) break;
2838
0
                RRETURN(MATCH_NOMATCH);
2839
0
                }
2840
0
              if (fc == *cp++)
2841
0
                {
2842
0
                if (notmatch) RRETURN(MATCH_NOMATCH);
2843
0
                break;
2844
0
                }
2845
0
              }
2846
0
            }
2847
0
          break;
2848
2849
0
          case PT_UCNC:
2850
0
          for (i = 1; i <= Lmin; i++)
2851
0
            {
2852
0
            if (Feptr >= mb->end_subject)
2853
0
              {
2854
0
              SCHECK_PARTIAL();
2855
0
              RRETURN(MATCH_NOMATCH);
2856
0
              }
2857
0
            GETCHARINCTEST(fc, Feptr);
2858
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2859
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2860
0
                 fc >= 0xe000) == notmatch)
2861
0
              RRETURN(MATCH_NOMATCH);
2862
0
            }
2863
0
          break;
2864
2865
0
          case PT_BIDICL:
2866
0
          for (i = 1; i <= Lmin; i++)
2867
0
            {
2868
0
            if (Feptr >= mb->end_subject)
2869
0
              {
2870
0
              SCHECK_PARTIAL();
2871
0
              RRETURN(MATCH_NOMATCH);
2872
0
              }
2873
0
            GETCHARINCTEST(fc, Feptr);
2874
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch)
2875
0
              RRETURN(MATCH_NOMATCH);
2876
0
            }
2877
0
          break;
2878
2879
0
          case PT_BOOL:
2880
0
          for (i = 1; i <= Lmin; i++)
2881
0
            {
2882
0
            BOOL ok;
2883
0
            const ucd_record *prop;
2884
0
            if (Feptr >= mb->end_subject)
2885
0
              {
2886
0
              SCHECK_PARTIAL();
2887
0
              RRETURN(MATCH_NOMATCH);
2888
0
              }
2889
0
            GETCHARINCTEST(fc, Feptr);
2890
0
            prop = GET_UCD(fc);
2891
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2892
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
2893
0
            if (ok == notmatch)
2894
0
              RRETURN(MATCH_NOMATCH);
2895
0
            }
2896
0
          break;
2897
2898
          /* This should not occur */
2899
2900
0
          default:
2901
0
          return PCRE2_ERROR_INTERNAL;
2902
0
          }
2903
0
        }
2904
2905
      /* Match extended Unicode sequences. We will get here only if the
2906
      support is in the binary; otherwise a compile-time error occurs. */
2907
2908
0
      else if (Lctype == OP_EXTUNI)
2909
0
        {
2910
0
        for (i = 1; i <= Lmin; i++)
2911
0
          {
2912
0
          if (Feptr >= mb->end_subject)
2913
0
            {
2914
0
            SCHECK_PARTIAL();
2915
0
            RRETURN(MATCH_NOMATCH);
2916
0
            }
2917
0
          else
2918
0
            {
2919
0
            GETCHARINCTEST(fc, Feptr);
2920
0
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
2921
0
              mb->end_subject, utf, NULL);
2922
0
            }
2923
0
          CHECK_PARTIAL();
2924
0
          }
2925
0
        }
2926
0
      else
2927
0
#endif     /* SUPPORT_UNICODE */
2928
2929
/* Handle all other cases in UTF mode */
2930
2931
0
#ifdef SUPPORT_UNICODE
2932
0
      if (utf) switch(Lctype)
2933
0
        {
2934
0
        case OP_ANY:
2935
0
        for (i = 1; i <= Lmin; i++)
2936
0
          {
2937
0
          if (Feptr >= mb->end_subject)
2938
0
            {
2939
0
            SCHECK_PARTIAL();
2940
0
            RRETURN(MATCH_NOMATCH);
2941
0
            }
2942
0
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
2943
0
          if (mb->partial != 0 &&
2944
0
              Feptr + 1 >= mb->end_subject &&
2945
0
              NLBLOCK->nltype == NLTYPE_FIXED &&
2946
0
              NLBLOCK->nllen == 2 &&
2947
0
              UCHAR21(Feptr) == NLBLOCK->nl[0])
2948
0
            {
2949
0
            mb->hitend = TRUE;
2950
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
2951
0
            }
2952
0
          Feptr++;
2953
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2954
0
          }
2955
0
        break;
2956
2957
0
        case OP_ALLANY:
2958
0
        for (i = 1; i <= Lmin; i++)
2959
0
          {
2960
0
          if (Feptr >= mb->end_subject)
2961
0
            {
2962
0
            SCHECK_PARTIAL();
2963
0
            RRETURN(MATCH_NOMATCH);
2964
0
            }
2965
0
          Feptr++;
2966
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
2967
0
          }
2968
0
        break;
2969
2970
0
        case OP_ANYBYTE:
2971
0
        if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
2972
0
        Feptr += Lmin;
2973
0
        break;
2974
2975
0
        case OP_ANYNL:
2976
0
        for (i = 1; i <= Lmin; i++)
2977
0
          {
2978
0
          if (Feptr >= mb->end_subject)
2979
0
            {
2980
0
            SCHECK_PARTIAL();
2981
0
            RRETURN(MATCH_NOMATCH);
2982
0
            }
2983
0
          GETCHARINC(fc, Feptr);
2984
0
          switch(fc)
2985
0
            {
2986
0
            default: RRETURN(MATCH_NOMATCH);
2987
2988
0
            case CHAR_CR:
2989
0
            if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
2990
0
            break;
2991
2992
0
            case CHAR_LF:
2993
0
            break;
2994
2995
0
            case CHAR_VT:
2996
0
            case CHAR_FF:
2997
0
            case CHAR_NEL:
2998
0
#ifndef EBCDIC
2999
0
            case 0x2028:
3000
0
            case 0x2029:
3001
0
#endif  /* Not EBCDIC */
3002
0
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3003
0
            break;
3004
0
            }
3005
0
          }
3006
0
        break;
3007
3008
0
        case OP_NOT_HSPACE:
3009
0
        for (i = 1; i <= Lmin; i++)
3010
0
          {
3011
0
          if (Feptr >= mb->end_subject)
3012
0
            {
3013
0
            SCHECK_PARTIAL();
3014
0
            RRETURN(MATCH_NOMATCH);
3015
0
            }
3016
0
          GETCHARINC(fc, Feptr);
3017
0
          switch(fc)
3018
0
            {
3019
0
            HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3020
0
            default: break;
3021
0
            }
3022
0
          }
3023
0
        break;
3024
3025
0
        case OP_HSPACE:
3026
0
        for (i = 1; i <= Lmin; i++)
3027
0
          {
3028
0
          if (Feptr >= mb->end_subject)
3029
0
            {
3030
0
            SCHECK_PARTIAL();
3031
0
            RRETURN(MATCH_NOMATCH);
3032
0
            }
3033
0
          GETCHARINC(fc, Feptr);
3034
0
          switch(fc)
3035
0
            {
3036
0
            HSPACE_CASES: break;
3037
0
            default: RRETURN(MATCH_NOMATCH);
3038
0
            }
3039
0
          }
3040
0
        break;
3041
3042
0
        case OP_NOT_VSPACE:
3043
0
        for (i = 1; i <= Lmin; i++)
3044
0
          {
3045
0
          if (Feptr >= mb->end_subject)
3046
0
            {
3047
0
            SCHECK_PARTIAL();
3048
0
            RRETURN(MATCH_NOMATCH);
3049
0
            }
3050
0
          GETCHARINC(fc, Feptr);
3051
0
          switch(fc)
3052
0
            {
3053
0
            VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3054
0
            default: break;
3055
0
            }
3056
0
          }
3057
0
        break;
3058
3059
0
        case OP_VSPACE:
3060
0
        for (i = 1; i <= Lmin; i++)
3061
0
          {
3062
0
          if (Feptr >= mb->end_subject)
3063
0
            {
3064
0
            SCHECK_PARTIAL();
3065
0
            RRETURN(MATCH_NOMATCH);
3066
0
            }
3067
0
          GETCHARINC(fc, Feptr);
3068
0
          switch(fc)
3069
0
            {
3070
0
            VSPACE_CASES: break;
3071
0
            default: RRETURN(MATCH_NOMATCH);
3072
0
            }
3073
0
          }
3074
0
        break;
3075
3076
0
        case OP_NOT_DIGIT:
3077
0
        for (i = 1; i <= Lmin; i++)
3078
0
          {
3079
0
          if (Feptr >= mb->end_subject)
3080
0
            {
3081
0
            SCHECK_PARTIAL();
3082
0
            RRETURN(MATCH_NOMATCH);
3083
0
            }
3084
0
          GETCHARINC(fc, Feptr);
3085
0
          if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
3086
0
            RRETURN(MATCH_NOMATCH);
3087
0
          }
3088
0
        break;
3089
3090
0
        case OP_DIGIT:
3091
0
        for (i = 1; i <= Lmin; i++)
3092
0
          {
3093
0
          uint32_t cc;
3094
0
          if (Feptr >= mb->end_subject)
3095
0
            {
3096
0
            SCHECK_PARTIAL();
3097
0
            RRETURN(MATCH_NOMATCH);
3098
0
            }
3099
0
          cc = UCHAR21(Feptr);
3100
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
3101
0
            RRETURN(MATCH_NOMATCH);
3102
0
          Feptr++;
3103
          /* No need to skip more code units - we know it has only one. */
3104
0
          }
3105
0
        break;
3106
3107
0
        case OP_NOT_WHITESPACE:
3108
0
        for (i = 1; i <= Lmin; i++)
3109
0
          {
3110
0
          uint32_t cc;
3111
0
          if (Feptr >= mb->end_subject)
3112
0
            {
3113
0
            SCHECK_PARTIAL();
3114
0
            RRETURN(MATCH_NOMATCH);
3115
0
            }
3116
0
          cc = UCHAR21(Feptr);
3117
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
3118
0
            RRETURN(MATCH_NOMATCH);
3119
0
          Feptr++;
3120
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3121
0
          }
3122
0
        break;
3123
3124
0
        case OP_WHITESPACE:
3125
0
        for (i = 1; i <= Lmin; i++)
3126
0
          {
3127
0
          uint32_t cc;
3128
0
          if (Feptr >= mb->end_subject)
3129
0
            {
3130
0
            SCHECK_PARTIAL();
3131
0
            RRETURN(MATCH_NOMATCH);
3132
0
            }
3133
0
          cc = UCHAR21(Feptr);
3134
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
3135
0
            RRETURN(MATCH_NOMATCH);
3136
0
          Feptr++;
3137
          /* No need to skip more code units - we know it has only one. */
3138
0
          }
3139
0
        break;
3140
3141
0
        case OP_NOT_WORDCHAR:
3142
0
        for (i = 1; i <= Lmin; i++)
3143
0
          {
3144
0
          uint32_t cc;
3145
0
          if (Feptr >= mb->end_subject)
3146
0
            {
3147
0
            SCHECK_PARTIAL();
3148
0
            RRETURN(MATCH_NOMATCH);
3149
0
            }
3150
0
          cc = UCHAR21(Feptr);
3151
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
3152
0
            RRETURN(MATCH_NOMATCH);
3153
0
          Feptr++;
3154
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3155
0
          }
3156
0
        break;
3157
3158
0
        case OP_WORDCHAR:
3159
0
        for (i = 1; i <= Lmin; i++)
3160
0
          {
3161
0
          uint32_t cc;
3162
0
          if (Feptr >= mb->end_subject)
3163
0
            {
3164
0
            SCHECK_PARTIAL();
3165
0
            RRETURN(MATCH_NOMATCH);
3166
0
            }
3167
0
          cc = UCHAR21(Feptr);
3168
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
3169
0
            RRETURN(MATCH_NOMATCH);
3170
0
          Feptr++;
3171
          /* No need to skip more code units - we know it has only one. */
3172
0
          }
3173
0
        break;
3174
3175
0
        default:
3176
0
        return PCRE2_ERROR_INTERNAL;
3177
0
        }  /* End switch(Lctype) */
3178
3179
0
      else
3180
0
#endif     /* SUPPORT_UNICODE */
3181
3182
      /* Code for the non-UTF case for minimum matching of operators other
3183
      than OP_PROP and OP_NOTPROP. */
3184
3185
0
      switch(Lctype)
3186
0
        {
3187
0
        case OP_ANY:
3188
0
        for (i = 1; i <= Lmin; i++)
3189
0
          {
3190
0
          if (Feptr >= mb->end_subject)
3191
0
            {
3192
0
            SCHECK_PARTIAL();
3193
0
            RRETURN(MATCH_NOMATCH);
3194
0
            }
3195
0
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3196
0
          if (mb->partial != 0 &&
3197
0
              Feptr + 1 >= mb->end_subject &&
3198
0
              NLBLOCK->nltype == NLTYPE_FIXED &&
3199
0
              NLBLOCK->nllen == 2 &&
3200
0
              *Feptr == NLBLOCK->nl[0])
3201
0
            {
3202
0
            mb->hitend = TRUE;
3203
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3204
0
            }
3205
0
          Feptr++;
3206
0
          }
3207
0
        break;
3208
3209
0
        case OP_ALLANY:
3210
0
        if (Feptr > mb->end_subject - Lmin)
3211
0
          {
3212
0
          SCHECK_PARTIAL();
3213
0
          RRETURN(MATCH_NOMATCH);
3214
0
          }
3215
0
        Feptr += Lmin;
3216
0
        break;
3217
3218
        /* This OP_ANYBYTE case will never be reached because \C gets turned
3219
        into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
3220
        reports don't complain about it's never being used. */
3221
3222
/*        case OP_ANYBYTE:
3223
*        if (Feptr > mb->end_subject - Lmin)
3224
*          {
3225
*          SCHECK_PARTIAL();
3226
*          RRETURN(MATCH_NOMATCH);
3227
*          }
3228
*        Feptr += Lmin;
3229
*        break;
3230
*/
3231
0
        case OP_ANYNL:
3232
0
        for (i = 1; i <= Lmin; i++)
3233
0
          {
3234
0
          if (Feptr >= mb->end_subject)
3235
0
            {
3236
0
            SCHECK_PARTIAL();
3237
0
            RRETURN(MATCH_NOMATCH);
3238
0
            }
3239
0
          switch(*Feptr++)
3240
0
            {
3241
0
            default: RRETURN(MATCH_NOMATCH);
3242
3243
0
            case CHAR_CR:
3244
0
            if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3245
0
            break;
3246
3247
0
            case CHAR_LF:
3248
0
            break;
3249
3250
0
            case CHAR_VT:
3251
0
            case CHAR_FF:
3252
0
            case CHAR_NEL:
3253
#if PCRE2_CODE_UNIT_WIDTH != 8
3254
            case 0x2028:
3255
            case 0x2029:
3256
#endif
3257
0
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3258
0
            break;
3259
0
            }
3260
0
          }
3261
0
        break;
3262
3263
0
        case OP_NOT_HSPACE:
3264
0
        for (i = 1; i <= Lmin; i++)
3265
0
          {
3266
0
          if (Feptr >= mb->end_subject)
3267
0
            {
3268
0
            SCHECK_PARTIAL();
3269
0
            RRETURN(MATCH_NOMATCH);
3270
0
            }
3271
0
          switch(*Feptr++)
3272
0
            {
3273
0
            default: break;
3274
0
            HSPACE_BYTE_CASES:
3275
#if PCRE2_CODE_UNIT_WIDTH != 8
3276
            HSPACE_MULTIBYTE_CASES:
3277
#endif
3278
0
            RRETURN(MATCH_NOMATCH);
3279
0
            }
3280
0
          }
3281
0
        break;
3282
3283
0
        case OP_HSPACE:
3284
0
        for (i = 1; i <= Lmin; i++)
3285
0
          {
3286
0
          if (Feptr >= mb->end_subject)
3287
0
            {
3288
0
            SCHECK_PARTIAL();
3289
0
            RRETURN(MATCH_NOMATCH);
3290
0
            }
3291
0
          switch(*Feptr++)
3292
0
            {
3293
0
            default: RRETURN(MATCH_NOMATCH);
3294
0
            HSPACE_BYTE_CASES:
3295
#if PCRE2_CODE_UNIT_WIDTH != 8
3296
            HSPACE_MULTIBYTE_CASES:
3297
#endif
3298
0
            break;
3299
0
            }
3300
0
          }
3301
0
        break;
3302
3303
0
        case OP_NOT_VSPACE:
3304
0
        for (i = 1; i <= Lmin; i++)
3305
0
          {
3306
0
          if (Feptr >= mb->end_subject)
3307
0
            {
3308
0
            SCHECK_PARTIAL();
3309
0
            RRETURN(MATCH_NOMATCH);
3310
0
            }
3311
0
          switch(*Feptr++)
3312
0
            {
3313
0
            VSPACE_BYTE_CASES:
3314
#if PCRE2_CODE_UNIT_WIDTH != 8
3315
            VSPACE_MULTIBYTE_CASES:
3316
#endif
3317
0
            RRETURN(MATCH_NOMATCH);
3318
0
            default: break;
3319
0
            }
3320
0
          }
3321
0
        break;
3322
3323
0
        case OP_VSPACE:
3324
0
        for (i = 1; i <= Lmin; i++)
3325
0
          {
3326
0
          if (Feptr >= mb->end_subject)
3327
0
            {
3328
0
            SCHECK_PARTIAL();
3329
0
            RRETURN(MATCH_NOMATCH);
3330
0
            }
3331
0
          switch(*Feptr++)
3332
0
            {
3333
0
            default: RRETURN(MATCH_NOMATCH);
3334
0
            VSPACE_BYTE_CASES:
3335
#if PCRE2_CODE_UNIT_WIDTH != 8
3336
            VSPACE_MULTIBYTE_CASES:
3337
#endif
3338
0
            break;
3339
0
            }
3340
0
          }
3341
0
        break;
3342
3343
0
        case OP_NOT_DIGIT:
3344
0
        for (i = 1; i <= Lmin; i++)
3345
0
          {
3346
0
          if (Feptr >= mb->end_subject)
3347
0
            {
3348
0
            SCHECK_PARTIAL();
3349
0
            RRETURN(MATCH_NOMATCH);
3350
0
            }
3351
0
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
3352
0
            RRETURN(MATCH_NOMATCH);
3353
0
          Feptr++;
3354
0
          }
3355
0
        break;
3356
3357
0
        case OP_DIGIT:
3358
0
        for (i = 1; i <= Lmin; i++)
3359
0
          {
3360
0
          if (Feptr >= mb->end_subject)
3361
0
            {
3362
0
            SCHECK_PARTIAL();
3363
0
            RRETURN(MATCH_NOMATCH);
3364
0
            }
3365
0
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
3366
0
            RRETURN(MATCH_NOMATCH);
3367
0
          Feptr++;
3368
0
          }
3369
0
        break;
3370
3371
0
        case OP_NOT_WHITESPACE:
3372
0
        for (i = 1; i <= Lmin; i++)
3373
0
          {
3374
0
          if (Feptr >= mb->end_subject)
3375
0
            {
3376
0
            SCHECK_PARTIAL();
3377
0
            RRETURN(MATCH_NOMATCH);
3378
0
            }
3379
0
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
3380
0
            RRETURN(MATCH_NOMATCH);
3381
0
          Feptr++;
3382
0
          }
3383
0
        break;
3384
3385
0
        case OP_WHITESPACE:
3386
0
        for (i = 1; i <= Lmin; i++)
3387
0
          {
3388
0
          if (Feptr >= mb->end_subject)
3389
0
            {
3390
0
            SCHECK_PARTIAL();
3391
0
            RRETURN(MATCH_NOMATCH);
3392
0
            }
3393
0
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
3394
0
            RRETURN(MATCH_NOMATCH);
3395
0
          Feptr++;
3396
0
          }
3397
0
        break;
3398
3399
0
        case OP_NOT_WORDCHAR:
3400
0
        for (i = 1; i <= Lmin; i++)
3401
0
          {
3402
0
          if (Feptr >= mb->end_subject)
3403
0
            {
3404
0
            SCHECK_PARTIAL();
3405
0
            RRETURN(MATCH_NOMATCH);
3406
0
            }
3407
0
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
3408
0
            RRETURN(MATCH_NOMATCH);
3409
0
          Feptr++;
3410
0
          }
3411
0
        break;
3412
3413
0
        case OP_WORDCHAR:
3414
0
        for (i = 1; i <= Lmin; i++)
3415
0
          {
3416
0
          if (Feptr >= mb->end_subject)
3417
0
            {
3418
0
            SCHECK_PARTIAL();
3419
0
            RRETURN(MATCH_NOMATCH);
3420
0
            }
3421
0
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
3422
0
            RRETURN(MATCH_NOMATCH);
3423
0
          Feptr++;
3424
0
          }
3425
0
        break;
3426
3427
0
        default:
3428
0
        return PCRE2_ERROR_INTERNAL;
3429
0
        }
3430
0
      }
3431
3432
    /* If Lmin = Lmax we are done. Continue with the main loop. */
3433
3434
0
    if (Lmin == Lmax) continue;
3435
3436
    /* If minimizing, we have to test the rest of the pattern before each
3437
    subsequent match. This means we cannot use a local "notmatch" variable as
3438
    in the other cases. As all 4 temporary 32-bit values in the frame are
3439
    already in use, just test the type each time. */
3440
3441
0
    if (reptype == REPTYPE_MIN)
3442
0
      {
3443
0
#ifdef SUPPORT_UNICODE
3444
0
      if (proptype >= 0)
3445
0
        {
3446
0
        switch(proptype)
3447
0
          {
3448
0
          case PT_ANY:
3449
0
          for (;;)
3450
0
            {
3451
0
            RMATCH(Fecode, RM208);
3452
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3453
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3454
0
            if (Feptr >= mb->end_subject)
3455
0
              {
3456
0
              SCHECK_PARTIAL();
3457
0
              RRETURN(MATCH_NOMATCH);
3458
0
              }
3459
0
            GETCHARINCTEST(fc, Feptr);
3460
0
            if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3461
0
            }
3462
          /* Control never gets here */
3463
3464
0
          case PT_LAMP:
3465
0
          for (;;)
3466
0
            {
3467
0
            int chartype;
3468
0
            RMATCH(Fecode, RM209);
3469
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3470
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3471
0
            if (Feptr >= mb->end_subject)
3472
0
              {
3473
0
              SCHECK_PARTIAL();
3474
0
              RRETURN(MATCH_NOMATCH);
3475
0
              }
3476
0
            GETCHARINCTEST(fc, Feptr);
3477
0
            chartype = UCD_CHARTYPE(fc);
3478
0
            if ((chartype == ucp_Lu ||
3479
0
                 chartype == ucp_Ll ||
3480
0
                 chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3481
0
              RRETURN(MATCH_NOMATCH);
3482
0
            }
3483
          /* Control never gets here */
3484
3485
0
          case PT_GC:
3486
0
          for (;;)
3487
0
            {
3488
0
            RMATCH(Fecode, RM210);
3489
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3490
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3491
0
            if (Feptr >= mb->end_subject)
3492
0
              {
3493
0
              SCHECK_PARTIAL();
3494
0
              RRETURN(MATCH_NOMATCH);
3495
0
              }
3496
0
            GETCHARINCTEST(fc, Feptr);
3497
0
            if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3498
0
              RRETURN(MATCH_NOMATCH);
3499
0
            }
3500
          /* Control never gets here */
3501
3502
0
          case PT_PC:
3503
0
          for (;;)
3504
0
            {
3505
0
            RMATCH(Fecode, RM211);
3506
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3507
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3508
0
            if (Feptr >= mb->end_subject)
3509
0
              {
3510
0
              SCHECK_PARTIAL();
3511
0
              RRETURN(MATCH_NOMATCH);
3512
0
              }
3513
0
            GETCHARINCTEST(fc, Feptr);
3514
0
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3515
0
              RRETURN(MATCH_NOMATCH);
3516
0
            }
3517
          /* Control never gets here */
3518
3519
0
          case PT_SC:
3520
0
          for (;;)
3521
0
            {
3522
0
            RMATCH(Fecode, RM212);
3523
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3524
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3525
0
            if (Feptr >= mb->end_subject)
3526
0
              {
3527
0
              SCHECK_PARTIAL();
3528
0
              RRETURN(MATCH_NOMATCH);
3529
0
              }
3530
0
            GETCHARINCTEST(fc, Feptr);
3531
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3532
0
              RRETURN(MATCH_NOMATCH);
3533
0
            }
3534
          /* Control never gets here */
3535
3536
0
          case PT_SCX:
3537
0
          for (;;)
3538
0
            {
3539
0
            BOOL ok;
3540
0
            const ucd_record *prop;
3541
0
            RMATCH(Fecode, RM225);
3542
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3543
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3544
0
            if (Feptr >= mb->end_subject)
3545
0
              {
3546
0
              SCHECK_PARTIAL();
3547
0
              RRETURN(MATCH_NOMATCH);
3548
0
              }
3549
0
            GETCHARINCTEST(fc, Feptr);
3550
0
            prop = GET_UCD(fc);
3551
0
            ok = (prop->script == Lpropvalue
3552
0
                  || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
3553
0
            if (ok == (Lctype == OP_NOTPROP))
3554
0
              RRETURN(MATCH_NOMATCH);
3555
0
            }
3556
          /* Control never gets here */
3557
3558
0
          case PT_ALNUM:
3559
0
          for (;;)
3560
0
            {
3561
0
            int category;
3562
0
            RMATCH(Fecode, RM213);
3563
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3564
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3565
0
            if (Feptr >= mb->end_subject)
3566
0
              {
3567
0
              SCHECK_PARTIAL();
3568
0
              RRETURN(MATCH_NOMATCH);
3569
0
              }
3570
0
            GETCHARINCTEST(fc, Feptr);
3571
0
            category = UCD_CATEGORY(fc);
3572
0
            if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
3573
0
              RRETURN(MATCH_NOMATCH);
3574
0
            }
3575
          /* Control never gets here */
3576
3577
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3578
          which means that Perl space and POSIX space are now identical. PCRE
3579
          was changed at release 8.34. */
3580
3581
0
          case PT_SPACE:    /* Perl space */
3582
0
          case PT_PXSPACE:  /* POSIX space */
3583
0
          for (;;)
3584
0
            {
3585
0
            RMATCH(Fecode, RM214);
3586
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3587
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3588
0
            if (Feptr >= mb->end_subject)
3589
0
              {
3590
0
              SCHECK_PARTIAL();
3591
0
              RRETURN(MATCH_NOMATCH);
3592
0
              }
3593
0
            GETCHARINCTEST(fc, Feptr);
3594
0
            switch(fc)
3595
0
              {
3596
0
              HSPACE_CASES:
3597
0
              VSPACE_CASES:
3598
0
              if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3599
0
              break;
3600
3601
0
              default:
3602
0
              if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3603
0
                RRETURN(MATCH_NOMATCH);
3604
0
              break;
3605
0
              }
3606
0
            }
3607
          /* Control never gets here */
3608
3609
0
          case PT_WORD:
3610
0
          for (;;)
3611
0
            {
3612
0
            int category;
3613
0
            RMATCH(Fecode, RM215);
3614
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3615
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3616
0
            if (Feptr >= mb->end_subject)
3617
0
              {
3618
0
              SCHECK_PARTIAL();
3619
0
              RRETURN(MATCH_NOMATCH);
3620
0
              }
3621
0
            GETCHARINCTEST(fc, Feptr);
3622
0
            category = UCD_CATEGORY(fc);
3623
0
            if ((category == ucp_L ||
3624
0
                 category == ucp_N ||
3625
0
                 fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
3626
0
              RRETURN(MATCH_NOMATCH);
3627
0
            }
3628
          /* Control never gets here */
3629
3630
0
          case PT_CLIST:
3631
0
          for (;;)
3632
0
            {
3633
0
            const uint32_t *cp;
3634
0
            RMATCH(Fecode, RM216);
3635
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3636
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3637
0
            if (Feptr >= mb->end_subject)
3638
0
              {
3639
0
              SCHECK_PARTIAL();
3640
0
              RRETURN(MATCH_NOMATCH);
3641
0
              }
3642
0
            GETCHARINCTEST(fc, Feptr);
3643
0
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3644
0
            for (;;)
3645
0
              {
3646
0
              if (fc < *cp)
3647
0
                {
3648
0
                if (Lctype == OP_NOTPROP) break;
3649
0
                RRETURN(MATCH_NOMATCH);
3650
0
                }
3651
0
              if (fc == *cp++)
3652
0
                {
3653
0
                if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3654
0
                break;
3655
0
                }
3656
0
              }
3657
0
            }
3658
          /* Control never gets here */
3659
3660
0
          case PT_UCNC:
3661
0
          for (;;)
3662
0
            {
3663
0
            RMATCH(Fecode, RM217);
3664
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3665
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3666
0
            if (Feptr >= mb->end_subject)
3667
0
              {
3668
0
              SCHECK_PARTIAL();
3669
0
              RRETURN(MATCH_NOMATCH);
3670
0
              }
3671
0
            GETCHARINCTEST(fc, Feptr);
3672
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3673
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3674
0
                 fc >= 0xe000) == (Lctype == OP_NOTPROP))
3675
0
              RRETURN(MATCH_NOMATCH);
3676
0
            }
3677
          /* Control never gets here */
3678
3679
0
          case PT_BIDICL:
3680
0
          for (;;)
3681
0
            {
3682
0
            RMATCH(Fecode, RM224);
3683
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3684
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3685
0
            if (Feptr >= mb->end_subject)
3686
0
              {
3687
0
              SCHECK_PARTIAL();
3688
0
              RRETURN(MATCH_NOMATCH);
3689
0
              }
3690
0
            GETCHARINCTEST(fc, Feptr);
3691
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3692
0
              RRETURN(MATCH_NOMATCH);
3693
0
            }
3694
          /* Control never gets here */
3695
3696
0
          case PT_BOOL:
3697
0
          for (;;)
3698
0
            {
3699
0
            BOOL ok;
3700
0
            const ucd_record *prop;
3701
0
            RMATCH(Fecode, RM223);
3702
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3703
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3704
0
            if (Feptr >= mb->end_subject)
3705
0
              {
3706
0
              SCHECK_PARTIAL();
3707
0
              RRETURN(MATCH_NOMATCH);
3708
0
              }
3709
0
            GETCHARINCTEST(fc, Feptr);
3710
0
            prop = GET_UCD(fc);
3711
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
3712
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
3713
0
            if (ok == (Lctype == OP_NOTPROP))
3714
0
              RRETURN(MATCH_NOMATCH);
3715
0
            }
3716
          /* Control never gets here */
3717
3718
          /* This should never occur */
3719
0
          default:
3720
0
          return PCRE2_ERROR_INTERNAL;
3721
0
          }
3722
0
        }
3723
3724
      /* Match extended Unicode sequences. We will get here only if the
3725
      support is in the binary; otherwise a compile-time error occurs. */
3726
3727
0
      else if (Lctype == OP_EXTUNI)
3728
0
        {
3729
0
        for (;;)
3730
0
          {
3731
0
          RMATCH(Fecode, RM218);
3732
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3733
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3734
0
          if (Feptr >= mb->end_subject)
3735
0
            {
3736
0
            SCHECK_PARTIAL();
3737
0
            RRETURN(MATCH_NOMATCH);
3738
0
            }
3739
0
          else
3740
0
            {
3741
0
            GETCHARINCTEST(fc, Feptr);
3742
0
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
3743
0
              utf, NULL);
3744
0
            }
3745
0
          CHECK_PARTIAL();
3746
0
          }
3747
0
        }
3748
0
      else
3749
0
#endif     /* SUPPORT_UNICODE */
3750
3751
      /* UTF mode for non-property testing character types. */
3752
3753
0
#ifdef SUPPORT_UNICODE
3754
0
      if (utf)
3755
0
        {
3756
0
        for (;;)
3757
0
          {
3758
0
          RMATCH(Fecode, RM219);
3759
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3760
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3761
0
          if (Feptr >= mb->end_subject)
3762
0
            {
3763
0
            SCHECK_PARTIAL();
3764
0
            RRETURN(MATCH_NOMATCH);
3765
0
            }
3766
0
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3767
0
          GETCHARINC(fc, Feptr);
3768
0
          switch(Lctype)
3769
0
            {
3770
0
            case OP_ANY:               /* This is the non-NL case */
3771
0
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
3772
0
                Feptr >= mb->end_subject &&
3773
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
3774
0
                NLBLOCK->nllen == 2 &&
3775
0
                fc == NLBLOCK->nl[0])
3776
0
              {
3777
0
              mb->hitend = TRUE;
3778
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3779
0
              }
3780
0
            break;
3781
3782
0
            case OP_ALLANY:
3783
0
            case OP_ANYBYTE:
3784
0
            break;
3785
3786
0
            case OP_ANYNL:
3787
0
            switch(fc)
3788
0
              {
3789
0
              default: RRETURN(MATCH_NOMATCH);
3790
3791
0
              case CHAR_CR:
3792
0
              if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3793
0
              break;
3794
3795
0
              case CHAR_LF:
3796
0
              break;
3797
3798
0
              case CHAR_VT:
3799
0
              case CHAR_FF:
3800
0
              case CHAR_NEL:
3801
0
#ifndef EBCDIC
3802
0
              case 0x2028:
3803
0
              case 0x2029:
3804
0
#endif  /* Not EBCDIC */
3805
0
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3806
0
                RRETURN(MATCH_NOMATCH);
3807
0
              break;
3808
0
              }
3809
0
            break;
3810
3811
0
            case OP_NOT_HSPACE:
3812
0
            switch(fc)
3813
0
              {
3814
0
              HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3815
0
              default: break;
3816
0
              }
3817
0
            break;
3818
3819
0
            case OP_HSPACE:
3820
0
            switch(fc)
3821
0
              {
3822
0
              HSPACE_CASES: break;
3823
0
              default: RRETURN(MATCH_NOMATCH);
3824
0
              }
3825
0
            break;
3826
3827
0
            case OP_NOT_VSPACE:
3828
0
            switch(fc)
3829
0
              {
3830
0
              VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3831
0
              default: break;
3832
0
              }
3833
0
            break;
3834
3835
0
            case OP_VSPACE:
3836
0
            switch(fc)
3837
0
              {
3838
0
              VSPACE_CASES: break;
3839
0
              default: RRETURN(MATCH_NOMATCH);
3840
0
              }
3841
0
            break;
3842
3843
0
            case OP_NOT_DIGIT:
3844
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
3845
0
              RRETURN(MATCH_NOMATCH);
3846
0
            break;
3847
3848
0
            case OP_DIGIT:
3849
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
3850
0
              RRETURN(MATCH_NOMATCH);
3851
0
            break;
3852
3853
0
            case OP_NOT_WHITESPACE:
3854
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
3855
0
              RRETURN(MATCH_NOMATCH);
3856
0
            break;
3857
3858
0
            case OP_WHITESPACE:
3859
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
3860
0
              RRETURN(MATCH_NOMATCH);
3861
0
            break;
3862
3863
0
            case OP_NOT_WORDCHAR:
3864
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
3865
0
              RRETURN(MATCH_NOMATCH);
3866
0
            break;
3867
3868
0
            case OP_WORDCHAR:
3869
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
3870
0
              RRETURN(MATCH_NOMATCH);
3871
0
            break;
3872
3873
0
            default:
3874
0
            return PCRE2_ERROR_INTERNAL;
3875
0
            }
3876
0
          }
3877
0
        }
3878
0
      else
3879
0
#endif  /* SUPPORT_UNICODE */
3880
3881
      /* Not UTF mode */
3882
0
        {
3883
0
        for (;;)
3884
0
          {
3885
0
          RMATCH(Fecode, RM33);
3886
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3887
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3888
0
          if (Feptr >= mb->end_subject)
3889
0
            {
3890
0
            SCHECK_PARTIAL();
3891
0
            RRETURN(MATCH_NOMATCH);
3892
0
            }
3893
0
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
3894
0
            RRETURN(MATCH_NOMATCH);
3895
0
          fc = *Feptr++;
3896
0
          switch(Lctype)
3897
0
            {
3898
0
            case OP_ANY:               /* This is the non-NL case */
3899
0
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
3900
0
                Feptr >= mb->end_subject &&
3901
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
3902
0
                NLBLOCK->nllen == 2 &&
3903
0
                fc == NLBLOCK->nl[0])
3904
0
              {
3905
0
              mb->hitend = TRUE;
3906
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3907
0
              }
3908
0
            break;
3909
3910
0
            case OP_ALLANY:
3911
0
            case OP_ANYBYTE:
3912
0
            break;
3913
3914
0
            case OP_ANYNL:
3915
0
            switch(fc)
3916
0
              {
3917
0
              default: RRETURN(MATCH_NOMATCH);
3918
3919
0
              case CHAR_CR:
3920
0
              if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3921
0
              break;
3922
3923
0
              case CHAR_LF:
3924
0
              break;
3925
3926
0
              case CHAR_VT:
3927
0
              case CHAR_FF:
3928
0
              case CHAR_NEL:
3929
#if PCRE2_CODE_UNIT_WIDTH != 8
3930
              case 0x2028:
3931
              case 0x2029:
3932
#endif
3933
0
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
3934
0
                RRETURN(MATCH_NOMATCH);
3935
0
              break;
3936
0
              }
3937
0
            break;
3938
3939
0
            case OP_NOT_HSPACE:
3940
0
            switch(fc)
3941
0
              {
3942
0
              default: break;
3943
0
              HSPACE_BYTE_CASES:
3944
#if PCRE2_CODE_UNIT_WIDTH != 8
3945
              HSPACE_MULTIBYTE_CASES:
3946
#endif
3947
0
              RRETURN(MATCH_NOMATCH);
3948
0
              }
3949
0
            break;
3950
3951
0
            case OP_HSPACE:
3952
0
            switch(fc)
3953
0
              {
3954
0
              default: RRETURN(MATCH_NOMATCH);
3955
0
              HSPACE_BYTE_CASES:
3956
#if PCRE2_CODE_UNIT_WIDTH != 8
3957
              HSPACE_MULTIBYTE_CASES:
3958
#endif
3959
0
              break;
3960
0
              }
3961
0
            break;
3962
3963
0
            case OP_NOT_VSPACE:
3964
0
            switch(fc)
3965
0
              {
3966
0
              default: break;
3967
0
              VSPACE_BYTE_CASES:
3968
#if PCRE2_CODE_UNIT_WIDTH != 8
3969
              VSPACE_MULTIBYTE_CASES:
3970
#endif
3971
0
              RRETURN(MATCH_NOMATCH);
3972
0
              }
3973
0
            break;
3974
3975
0
            case OP_VSPACE:
3976
0
            switch(fc)
3977
0
              {
3978
0
              default: RRETURN(MATCH_NOMATCH);
3979
0
              VSPACE_BYTE_CASES:
3980
#if PCRE2_CODE_UNIT_WIDTH != 8
3981
              VSPACE_MULTIBYTE_CASES:
3982
#endif
3983
0
              break;
3984
0
              }
3985
0
            break;
3986
3987
0
            case OP_NOT_DIGIT:
3988
0
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
3989
0
              RRETURN(MATCH_NOMATCH);
3990
0
            break;
3991
3992
0
            case OP_DIGIT:
3993
0
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
3994
0
              RRETURN(MATCH_NOMATCH);
3995
0
            break;
3996
3997
0
            case OP_NOT_WHITESPACE:
3998
0
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
3999
0
              RRETURN(MATCH_NOMATCH);
4000
0
            break;
4001
4002
0
            case OP_WHITESPACE:
4003
0
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
4004
0
              RRETURN(MATCH_NOMATCH);
4005
0
            break;
4006
4007
0
            case OP_NOT_WORDCHAR:
4008
0
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
4009
0
              RRETURN(MATCH_NOMATCH);
4010
0
            break;
4011
4012
0
            case OP_WORDCHAR:
4013
0
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
4014
0
              RRETURN(MATCH_NOMATCH);
4015
0
            break;
4016
4017
0
            default:
4018
0
            return PCRE2_ERROR_INTERNAL;
4019
0
            }
4020
0
          }
4021
0
        }
4022
      /* Control never gets here */
4023
0
      }
4024
4025
    /* If maximizing, it is worth using inline code for speed, doing the type
4026
    test once at the start (i.e. keep it out of the loops). Once again,
4027
    "notmatch" can be an ordinary local variable because the loops do not call
4028
    RMATCH. */
4029
4030
0
    else
4031
0
      {
4032
0
      Lstart_eptr = Feptr;  /* Remember where we started */
4033
4034
0
#ifdef SUPPORT_UNICODE
4035
0
      if (proptype >= 0)
4036
0
        {
4037
0
        BOOL notmatch = Lctype == OP_NOTPROP;
4038
0
        switch(proptype)
4039
0
          {
4040
0
          case PT_ANY:
4041
0
          for (i = Lmin; i < Lmax; i++)
4042
0
            {
4043
0
            int len = 1;
4044
0
            if (Feptr >= mb->end_subject)
4045
0
              {
4046
0
              SCHECK_PARTIAL();
4047
0
              break;
4048
0
              }
4049
0
            GETCHARLENTEST(fc, Feptr, len);
4050
0
            if (notmatch) break;
4051
0
            Feptr+= len;
4052
0
            }
4053
0
          break;
4054
4055
0
          case PT_LAMP:
4056
0
          for (i = Lmin; i < Lmax; i++)
4057
0
            {
4058
0
            int chartype;
4059
0
            int len = 1;
4060
0
            if (Feptr >= mb->end_subject)
4061
0
              {
4062
0
              SCHECK_PARTIAL();
4063
0
              break;
4064
0
              }
4065
0
            GETCHARLENTEST(fc, Feptr, len);
4066
0
            chartype = UCD_CHARTYPE(fc);
4067
0
            if ((chartype == ucp_Lu ||
4068
0
                 chartype == ucp_Ll ||
4069
0
                 chartype == ucp_Lt) == notmatch)
4070
0
              break;
4071
0
            Feptr+= len;
4072
0
            }
4073
0
          break;
4074
4075
0
          case PT_GC:
4076
0
          for (i = Lmin; i < Lmax; i++)
4077
0
            {
4078
0
            int len = 1;
4079
0
            if (Feptr >= mb->end_subject)
4080
0
              {
4081
0
              SCHECK_PARTIAL();
4082
0
              break;
4083
0
              }
4084
0
            GETCHARLENTEST(fc, Feptr, len);
4085
0
            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break;
4086
0
            Feptr+= len;
4087
0
            }
4088
0
          break;
4089
4090
0
          case PT_PC:
4091
0
          for (i = Lmin; i < Lmax; i++)
4092
0
            {
4093
0
            int len = 1;
4094
0
            if (Feptr >= mb->end_subject)
4095
0
              {
4096
0
              SCHECK_PARTIAL();
4097
0
              break;
4098
0
              }
4099
0
            GETCHARLENTEST(fc, Feptr, len);
4100
0
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break;
4101
0
            Feptr+= len;
4102
0
            }
4103
0
          break;
4104
4105
0
          case PT_SC:
4106
0
          for (i = Lmin; i < Lmax; i++)
4107
0
            {
4108
0
            int len = 1;
4109
0
            if (Feptr >= mb->end_subject)
4110
0
              {
4111
0
              SCHECK_PARTIAL();
4112
0
              break;
4113
0
              }
4114
0
            GETCHARLENTEST(fc, Feptr, len);
4115
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break;
4116
0
            Feptr+= len;
4117
0
            }
4118
0
          break;
4119
4120
0
          case PT_SCX:
4121
0
          for (i = Lmin; i < Lmax; i++)
4122
0
            {
4123
0
            BOOL ok;
4124
0
            const ucd_record *prop;
4125
0
            int len = 1;
4126
0
            if (Feptr >= mb->end_subject)
4127
0
              {
4128
0
              SCHECK_PARTIAL();
4129
0
              break;
4130
0
              }
4131
0
            GETCHARLENTEST(fc, Feptr, len);
4132
0
            prop = GET_UCD(fc);
4133
0
            ok = (prop->script == Lpropvalue ||
4134
0
                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
4135
0
            if (ok == notmatch) break;
4136
0
            Feptr+= len;
4137
0
            }
4138
0
          break;
4139
4140
0
          case PT_ALNUM:
4141
0
          for (i = Lmin; i < Lmax; i++)
4142
0
            {
4143
0
            int category;
4144
0
            int len = 1;
4145
0
            if (Feptr >= mb->end_subject)
4146
0
              {
4147
0
              SCHECK_PARTIAL();
4148
0
              break;
4149
0
              }
4150
0
            GETCHARLENTEST(fc, Feptr, len);
4151
0
            category = UCD_CATEGORY(fc);
4152
0
            if ((category == ucp_L || category == ucp_N) == notmatch)
4153
0
              break;
4154
0
            Feptr+= len;
4155
0
            }
4156
0
          break;
4157
4158
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4159
          which means that Perl space and POSIX space are now identical. PCRE
4160
          was changed at release 8.34. */
4161
4162
0
          case PT_SPACE:    /* Perl space */
4163
0
          case PT_PXSPACE:  /* POSIX space */
4164
0
          for (i = Lmin; i < Lmax; i++)
4165
0
            {
4166
0
            int len = 1;
4167
0
            if (Feptr >= mb->end_subject)
4168
0
              {
4169
0
              SCHECK_PARTIAL();
4170
0
              break;
4171
0
              }
4172
0
            GETCHARLENTEST(fc, Feptr, len);
4173
0
            switch(fc)
4174
0
              {
4175
0
              HSPACE_CASES:
4176
0
              VSPACE_CASES:
4177
0
              if (notmatch) goto ENDLOOP99;  /* Break the loop */
4178
0
              break;
4179
4180
0
              default:
4181
0
              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
4182
0
                goto ENDLOOP99;   /* Break the loop */
4183
0
              break;
4184
0
              }
4185
0
            Feptr+= len;
4186
0
            }
4187
0
          ENDLOOP99:
4188
0
          break;
4189
4190
0
          case PT_WORD:
4191
0
          for (i = Lmin; i < Lmax; i++)
4192
0
            {
4193
0
            int category;
4194
0
            int len = 1;
4195
0
            if (Feptr >= mb->end_subject)
4196
0
              {
4197
0
              SCHECK_PARTIAL();
4198
0
              break;
4199
0
              }
4200
0
            GETCHARLENTEST(fc, Feptr, len);
4201
0
            category = UCD_CATEGORY(fc);
4202
0
            if ((category == ucp_L || category == ucp_N ||
4203
0
                 fc == CHAR_UNDERSCORE) == notmatch)
4204
0
              break;
4205
0
            Feptr+= len;
4206
0
            }
4207
0
          break;
4208
4209
0
          case PT_CLIST:
4210
0
          for (i = Lmin; i < Lmax; i++)
4211
0
            {
4212
0
            const uint32_t *cp;
4213
0
            int len = 1;
4214
0
            if (Feptr >= mb->end_subject)
4215
0
              {
4216
0
              SCHECK_PARTIAL();
4217
0
              break;
4218
0
              }
4219
0
            GETCHARLENTEST(fc, Feptr, len);
4220
0
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
4221
0
            for (;;)
4222
0
              {
4223
0
              if (fc < *cp)
4224
0
                { if (notmatch) break; else goto GOT_MAX; }
4225
0
              if (fc == *cp++)
4226
0
                { if (notmatch) goto GOT_MAX; else break; }
4227
0
              }
4228
0
            Feptr += len;
4229
0
            }
4230
0
          GOT_MAX:
4231
0
          break;
4232
4233
0
          case PT_UCNC:
4234
0
          for (i = Lmin; i < Lmax; i++)
4235
0
            {
4236
0
            int len = 1;
4237
0
            if (Feptr >= mb->end_subject)
4238
0
              {
4239
0
              SCHECK_PARTIAL();
4240
0
              break;
4241
0
              }
4242
0
            GETCHARLENTEST(fc, Feptr, len);
4243
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
4244
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
4245
0
                 fc >= 0xe000) == notmatch)
4246
0
              break;
4247
0
            Feptr += len;
4248
0
            }
4249
0
          break;
4250
4251
0
          case PT_BIDICL:
4252
0
          for (i = Lmin; i < Lmax; i++)
4253
0
            {
4254
0
            int len = 1;
4255
0
            if (Feptr >= mb->end_subject)
4256
0
              {
4257
0
              SCHECK_PARTIAL();
4258
0
              break;
4259
0
              }
4260
0
            GETCHARLENTEST(fc, Feptr, len);
4261
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break;
4262
0
            Feptr+= len;
4263
0
            }
4264
0
          break;
4265
4266
0
          case PT_BOOL:
4267
0
          for (i = Lmin; i < Lmax; i++)
4268
0
            {
4269
0
            BOOL ok;
4270
0
            const ucd_record *prop;
4271
0
            int len = 1;
4272
0
            if (Feptr >= mb->end_subject)
4273
0
              {
4274
0
              SCHECK_PARTIAL();
4275
0
              break;
4276
0
              }
4277
0
            GETCHARLENTEST(fc, Feptr, len);
4278
0
            prop = GET_UCD(fc);
4279
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
4280
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
4281
0
            if (ok == notmatch) break;
4282
0
            Feptr+= len;
4283
0
            }
4284
0
          break;
4285
4286
0
          default:
4287
0
          return PCRE2_ERROR_INTERNAL;
4288
0
          }
4289
4290
        /* Feptr is now past the end of the maximum run */
4291
4292
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4293
4294
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4295
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
4296
        go too far. */
4297
4298
0
        for(;;)
4299
0
          {
4300
0
          if (Feptr <= Lstart_eptr) break;
4301
0
          RMATCH(Fecode, RM222);
4302
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4303
0
          Feptr--;
4304
0
          if (utf) BACKCHAR(Feptr);
4305
0
          }
4306
0
        }
4307
4308
      /* Match extended Unicode grapheme clusters. We will get here only if the
4309
      support is in the binary; otherwise a compile-time error occurs. */
4310
4311
0
      else if (Lctype == OP_EXTUNI)
4312
0
        {
4313
0
        for (i = Lmin; i < Lmax; i++)
4314
0
          {
4315
0
          if (Feptr >= mb->end_subject)
4316
0
            {
4317
0
            SCHECK_PARTIAL();
4318
0
            break;
4319
0
            }
4320
0
          else
4321
0
            {
4322
0
            GETCHARINCTEST(fc, Feptr);
4323
0
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4324
0
              utf, NULL);
4325
0
            }
4326
0
          CHECK_PARTIAL();
4327
0
          }
4328
4329
        /* Feptr is now past the end of the maximum run */
4330
4331
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4332
4333
        /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
4334
        of the run while backtracking because the use of \C in UTF mode can
4335
        cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
4336
        the use of \C in UTF mode is fraught with danger. */
4337
4338
0
        for(;;)
4339
0
          {
4340
0
          int lgb, rgb;
4341
0
          PCRE2_SPTR fptr;
4342
4343
0
          if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4344
0
          RMATCH(Fecode, RM220);
4345
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4346
4347
          /* Backtracking over an extended grapheme cluster involves inspecting
4348
          the previous two characters (if present) to see if a break is
4349
          permitted between them. */
4350
4351
0
          Feptr--;
4352
0
          if (!utf) fc = *Feptr; else
4353
0
            {
4354
0
            BACKCHAR(Feptr);
4355
0
            GETCHAR(fc, Feptr);
4356
0
            }
4357
0
          rgb = UCD_GRAPHBREAK(fc);
4358
4359
0
          for (;;)
4360
0
            {
4361
0
            if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4362
0
            fptr = Feptr - 1;
4363
0
            if (!utf) fc = *fptr; else
4364
0
              {
4365
0
              BACKCHAR(fptr);
4366
0
              GETCHAR(fc, fptr);
4367
0
              }
4368
0
            lgb = UCD_GRAPHBREAK(fc);
4369
0
            if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
4370
0
            Feptr = fptr;
4371
0
            rgb = lgb;
4372
0
            }
4373
0
          }
4374
0
        }
4375
4376
0
      else
4377
0
#endif   /* SUPPORT_UNICODE */
4378
4379
0
#ifdef SUPPORT_UNICODE
4380
0
      if (utf)
4381
0
        {
4382
0
        switch(Lctype)
4383
0
          {
4384
0
          case OP_ANY:
4385
0
          for (i = Lmin; i < Lmax; i++)
4386
0
            {
4387
0
            if (Feptr >= mb->end_subject)
4388
0
              {
4389
0
              SCHECK_PARTIAL();
4390
0
              break;
4391
0
              }
4392
0
            if (IS_NEWLINE(Feptr)) break;
4393
0
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4394
0
                Feptr + 1 >= mb->end_subject &&
4395
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
4396
0
                NLBLOCK->nllen == 2 &&
4397
0
                UCHAR21(Feptr) == NLBLOCK->nl[0])
4398
0
              {
4399
0
              mb->hitend = TRUE;
4400
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4401
0
              }
4402
0
            Feptr++;
4403
0
            ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4404
0
            }
4405
0
          break;
4406
4407
0
          case OP_ALLANY:
4408
0
          if (Lmax < UINT32_MAX)
4409
0
            {
4410
0
            for (i = Lmin; i < Lmax; i++)
4411
0
              {
4412
0
              if (Feptr >= mb->end_subject)
4413
0
                {
4414
0
                SCHECK_PARTIAL();
4415
0
                break;
4416
0
                }
4417
0
              Feptr++;
4418
0
              ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4419
0
              }
4420
0
            }
4421
0
          else
4422
0
            {
4423
0
            Feptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
4424
0
            SCHECK_PARTIAL();
4425
0
            }
4426
0
          break;
4427
4428
          /* The "byte" (i.e. "code unit") case is the same as non-UTF */
4429
4430
0
          case OP_ANYBYTE:
4431
0
          fc = Lmax - Lmin;
4432
0
          if (fc > (uint32_t)(mb->end_subject - Feptr))
4433
0
            {
4434
0
            Feptr = mb->end_subject;
4435
0
            SCHECK_PARTIAL();
4436
0
            }
4437
0
          else Feptr += fc;
4438
0
          break;
4439
4440
0
          case OP_ANYNL:
4441
0
          for (i = Lmin; i < Lmax; i++)
4442
0
            {
4443
0
            int len = 1;
4444
0
            if (Feptr >= mb->end_subject)
4445
0
              {
4446
0
              SCHECK_PARTIAL();
4447
0
              break;
4448
0
              }
4449
0
            GETCHARLEN(fc, Feptr, len);
4450
0
            if (fc == CHAR_CR)
4451
0
              {
4452
0
              if (++Feptr >= mb->end_subject) break;
4453
0
              if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
4454
0
              }
4455
0
            else
4456
0
              {
4457
0
              if (fc != CHAR_LF &&
4458
0
                  (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4459
0
                   (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4460
0
#ifndef EBCDIC
4461
0
                    && fc != 0x2028 && fc != 0x2029
4462
0
#endif  /* Not EBCDIC */
4463
0
                    )))
4464
0
                break;
4465
0
              Feptr += len;
4466
0
              }
4467
0
            }
4468
0
          break;
4469
4470
0
          case OP_NOT_HSPACE:
4471
0
          case OP_HSPACE:
4472
0
          for (i = Lmin; i < Lmax; i++)
4473
0
            {
4474
0
            BOOL gotspace;
4475
0
            int len = 1;
4476
0
            if (Feptr >= mb->end_subject)
4477
0
              {
4478
0
              SCHECK_PARTIAL();
4479
0
              break;
4480
0
              }
4481
0
            GETCHARLEN(fc, Feptr, len);
4482
0
            switch(fc)
4483
0
              {
4484
0
              HSPACE_CASES: gotspace = TRUE; break;
4485
0
              default: gotspace = FALSE; break;
4486
0
              }
4487
0
            if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
4488
0
            Feptr += len;
4489
0
            }
4490
0
          break;
4491
4492
0
          case OP_NOT_VSPACE:
4493
0
          case OP_VSPACE:
4494
0
          for (i = Lmin; i < Lmax; i++)
4495
0
            {
4496
0
            BOOL gotspace;
4497
0
            int len = 1;
4498
0
            if (Feptr >= mb->end_subject)
4499
0
              {
4500
0
              SCHECK_PARTIAL();
4501
0
              break;
4502
0
              }
4503
0
            GETCHARLEN(fc, Feptr, len);
4504
0
            switch(fc)
4505
0
              {
4506
0
              VSPACE_CASES: gotspace = TRUE; break;
4507
0
              default: gotspace = FALSE; break;
4508
0
              }
4509
0
            if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
4510
0
            Feptr += len;
4511
0
            }
4512
0
          break;
4513
4514
0
          case OP_NOT_DIGIT:
4515
0
          for (i = Lmin; i < Lmax; i++)
4516
0
            {
4517
0
            int len = 1;
4518
0
            if (Feptr >= mb->end_subject)
4519
0
              {
4520
0
              SCHECK_PARTIAL();
4521
0
              break;
4522
0
              }
4523
0
            GETCHARLEN(fc, Feptr, len);
4524
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
4525
0
            Feptr+= len;
4526
0
            }
4527
0
          break;
4528
4529
0
          case OP_DIGIT:
4530
0
          for (i = Lmin; i < Lmax; i++)
4531
0
            {
4532
0
            int len = 1;
4533
0
            if (Feptr >= mb->end_subject)
4534
0
              {
4535
0
              SCHECK_PARTIAL();
4536
0
              break;
4537
0
              }
4538
0
            GETCHARLEN(fc, Feptr, len);
4539
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
4540
0
            Feptr+= len;
4541
0
            }
4542
0
          break;
4543
4544
0
          case OP_NOT_WHITESPACE:
4545
0
          for (i = Lmin; i < Lmax; i++)
4546
0
            {
4547
0
            int len = 1;
4548
0
            if (Feptr >= mb->end_subject)
4549
0
              {
4550
0
              SCHECK_PARTIAL();
4551
0
              break;
4552
0
              }
4553
0
            GETCHARLEN(fc, Feptr, len);
4554
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
4555
0
            Feptr+= len;
4556
0
            }
4557
0
          break;
4558
4559
0
          case OP_WHITESPACE:
4560
0
          for (i = Lmin; i < Lmax; i++)
4561
0
            {
4562
0
            int len = 1;
4563
0
            if (Feptr >= mb->end_subject)
4564
0
              {
4565
0
              SCHECK_PARTIAL();
4566
0
              break;
4567
0
              }
4568
0
            GETCHARLEN(fc, Feptr, len);
4569
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
4570
0
            Feptr+= len;
4571
0
            }
4572
0
          break;
4573
4574
0
          case OP_NOT_WORDCHAR:
4575
0
          for (i = Lmin; i < Lmax; i++)
4576
0
            {
4577
0
            int len = 1;
4578
0
            if (Feptr >= mb->end_subject)
4579
0
              {
4580
0
              SCHECK_PARTIAL();
4581
0
              break;
4582
0
              }
4583
0
            GETCHARLEN(fc, Feptr, len);
4584
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
4585
0
            Feptr+= len;
4586
0
            }
4587
0
          break;
4588
4589
0
          case OP_WORDCHAR:
4590
0
          for (i = Lmin; i < Lmax; i++)
4591
0
            {
4592
0
            int len = 1;
4593
0
            if (Feptr >= mb->end_subject)
4594
0
              {
4595
0
              SCHECK_PARTIAL();
4596
0
              break;
4597
0
              }
4598
0
            GETCHARLEN(fc, Feptr, len);
4599
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
4600
0
            Feptr+= len;
4601
0
            }
4602
0
          break;
4603
4604
0
          default:
4605
0
          return PCRE2_ERROR_INTERNAL;
4606
0
          }
4607
4608
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4609
4610
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4611
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
4612
        too far. */
4613
4614
0
        for(;;)
4615
0
          {
4616
0
          if (Feptr <= Lstart_eptr) break;
4617
0
          RMATCH(Fecode, RM221);
4618
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4619
0
          Feptr--;
4620
0
          BACKCHAR(Feptr);
4621
0
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
4622
0
              UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
4623
0
            Feptr--;
4624
0
          }
4625
0
        }
4626
0
      else
4627
0
#endif  /* SUPPORT_UNICODE */
4628
4629
      /* Not UTF mode */
4630
0
        {
4631
0
        switch(Lctype)
4632
0
          {
4633
0
          case OP_ANY:
4634
0
          for (i = Lmin; i < Lmax; i++)
4635
0
            {
4636
0
            if (Feptr >= mb->end_subject)
4637
0
              {
4638
0
              SCHECK_PARTIAL();
4639
0
              break;
4640
0
              }
4641
0
            if (IS_NEWLINE(Feptr)) break;
4642
0
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4643
0
                Feptr + 1 >= mb->end_subject &&
4644
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
4645
0
                NLBLOCK->nllen == 2 &&
4646
0
                *Feptr == NLBLOCK->nl[0])
4647
0
              {
4648
0
              mb->hitend = TRUE;
4649
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4650
0
              }
4651
0
            Feptr++;
4652
0
            }
4653
0
          break;
4654
4655
0
          case OP_ALLANY:
4656
0
          case OP_ANYBYTE:
4657
0
          fc = Lmax - Lmin;
4658
0
          if (fc > (uint32_t)(mb->end_subject - Feptr))
4659
0
            {
4660
0
            Feptr = mb->end_subject;
4661
0
            SCHECK_PARTIAL();
4662
0
            }
4663
0
          else Feptr += fc;
4664
0
          break;
4665
4666
0
          case OP_ANYNL:
4667
0
          for (i = Lmin; i < Lmax; i++)
4668
0
            {
4669
0
            if (Feptr >= mb->end_subject)
4670
0
              {
4671
0
              SCHECK_PARTIAL();
4672
0
              break;
4673
0
              }
4674
0
            fc = *Feptr;
4675
0
            if (fc == CHAR_CR)
4676
0
              {
4677
0
              if (++Feptr >= mb->end_subject) break;
4678
0
              if (*Feptr == CHAR_LF) Feptr++;
4679
0
              }
4680
0
            else
4681
0
              {
4682
0
              if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4683
0
                 (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4684
#if PCRE2_CODE_UNIT_WIDTH != 8
4685
                 && fc != 0x2028 && fc != 0x2029
4686
#endif
4687
0
                 ))) break;
4688
0
              Feptr++;
4689
0
              }
4690
0
            }
4691
0
          break;
4692
4693
0
          case OP_NOT_HSPACE:
4694
0
          for (i = Lmin; i < Lmax; i++)
4695
0
            {
4696
0
            if (Feptr >= mb->end_subject)
4697
0
              {
4698
0
              SCHECK_PARTIAL();
4699
0
              break;
4700
0
              }
4701
0
            switch(*Feptr)
4702
0
              {
4703
0
              default: Feptr++; break;
4704
0
              HSPACE_BYTE_CASES:
4705
#if PCRE2_CODE_UNIT_WIDTH != 8
4706
              HSPACE_MULTIBYTE_CASES:
4707
#endif
4708
0
              goto ENDLOOP00;
4709
0
              }
4710
0
            }
4711
0
          ENDLOOP00:
4712
0
          break;
4713
4714
0
          case OP_HSPACE:
4715
0
          for (i = Lmin; i < Lmax; i++)
4716
0
            {
4717
0
            if (Feptr >= mb->end_subject)
4718
0
              {
4719
0
              SCHECK_PARTIAL();
4720
0
              break;
4721
0
              }
4722
0
            switch(*Feptr)
4723
0
              {
4724
0
              default: goto ENDLOOP01;
4725
0
              HSPACE_BYTE_CASES:
4726
#if PCRE2_CODE_UNIT_WIDTH != 8
4727
              HSPACE_MULTIBYTE_CASES:
4728
#endif
4729
0
              Feptr++; break;
4730
0
              }
4731
0
            }
4732
0
          ENDLOOP01:
4733
0
          break;
4734
4735
0
          case OP_NOT_VSPACE:
4736
0
          for (i = Lmin; i < Lmax; i++)
4737
0
            {
4738
0
            if (Feptr >= mb->end_subject)
4739
0
              {
4740
0
              SCHECK_PARTIAL();
4741
0
              break;
4742
0
              }
4743
0
            switch(*Feptr)
4744
0
              {
4745
0
              default: Feptr++; break;
4746
0
              VSPACE_BYTE_CASES:
4747
#if PCRE2_CODE_UNIT_WIDTH != 8
4748
              VSPACE_MULTIBYTE_CASES:
4749
#endif
4750
0
              goto ENDLOOP02;
4751
0
              }
4752
0
            }
4753
0
          ENDLOOP02:
4754
0
          break;
4755
4756
0
          case OP_VSPACE:
4757
0
          for (i = Lmin; i < Lmax; i++)
4758
0
            {
4759
0
            if (Feptr >= mb->end_subject)
4760
0
              {
4761
0
              SCHECK_PARTIAL();
4762
0
              break;
4763
0
              }
4764
0
            switch(*Feptr)
4765
0
              {
4766
0
              default: goto ENDLOOP03;
4767
0
              VSPACE_BYTE_CASES:
4768
#if PCRE2_CODE_UNIT_WIDTH != 8
4769
              VSPACE_MULTIBYTE_CASES:
4770
#endif
4771
0
              Feptr++; break;
4772
0
              }
4773
0
            }
4774
0
          ENDLOOP03:
4775
0
          break;
4776
4777
0
          case OP_NOT_DIGIT:
4778
0
          for (i = Lmin; i < Lmax; i++)
4779
0
            {
4780
0
            if (Feptr >= mb->end_subject)
4781
0
              {
4782
0
              SCHECK_PARTIAL();
4783
0
              break;
4784
0
              }
4785
0
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
4786
0
              break;
4787
0
            Feptr++;
4788
0
            }
4789
0
          break;
4790
4791
0
          case OP_DIGIT:
4792
0
          for (i = Lmin; i < Lmax; i++)
4793
0
            {
4794
0
            if (Feptr >= mb->end_subject)
4795
0
              {
4796
0
              SCHECK_PARTIAL();
4797
0
              break;
4798
0
              }
4799
0
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
4800
0
              break;
4801
0
            Feptr++;
4802
0
            }
4803
0
          break;
4804
4805
0
          case OP_NOT_WHITESPACE:
4806
0
          for (i = Lmin; i < Lmax; i++)
4807
0
            {
4808
0
            if (Feptr >= mb->end_subject)
4809
0
              {
4810
0
              SCHECK_PARTIAL();
4811
0
              break;
4812
0
              }
4813
0
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
4814
0
              break;
4815
0
            Feptr++;
4816
0
            }
4817
0
          break;
4818
4819
0
          case OP_WHITESPACE:
4820
0
          for (i = Lmin; i < Lmax; i++)
4821
0
            {
4822
0
            if (Feptr >= mb->end_subject)
4823
0
              {
4824
0
              SCHECK_PARTIAL();
4825
0
              break;
4826
0
              }
4827
0
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
4828
0
              break;
4829
0
            Feptr++;
4830
0
            }
4831
0
          break;
4832
4833
0
          case OP_NOT_WORDCHAR:
4834
0
          for (i = Lmin; i < Lmax; i++)
4835
0
            {
4836
0
            if (Feptr >= mb->end_subject)
4837
0
              {
4838
0
              SCHECK_PARTIAL();
4839
0
              break;
4840
0
              }
4841
0
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
4842
0
              break;
4843
0
            Feptr++;
4844
0
            }
4845
0
          break;
4846
4847
0
          case OP_WORDCHAR:
4848
0
          for (i = Lmin; i < Lmax; i++)
4849
0
            {
4850
0
            if (Feptr >= mb->end_subject)
4851
0
              {
4852
0
              SCHECK_PARTIAL();
4853
0
              break;
4854
0
              }
4855
0
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
4856
0
              break;
4857
0
            Feptr++;
4858
0
            }
4859
0
          break;
4860
4861
0
          default:
4862
0
          return PCRE2_ERROR_INTERNAL;
4863
0
          }
4864
4865
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4866
4867
0
        for (;;)
4868
0
          {
4869
0
          if (Feptr == Lstart_eptr) break;
4870
0
          RMATCH(Fecode, RM34);
4871
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4872
0
          Feptr--;
4873
0
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
4874
0
              Feptr[-1] == CHAR_CR) Feptr--;
4875
0
          }
4876
0
        }
4877
0
      }
4878
0
    break;  /* End of repeat character type processing */
4879
4880
0
#undef Lstart_eptr
4881
0
#undef Lmin
4882
0
#undef Lmax
4883
0
#undef Lctype
4884
0
#undef Lpropvalue
4885
4886
4887
    /* ===================================================================== */
4888
    /* Match a back reference, possibly repeatedly. Look past the end of the
4889
    item to see if there is repeat information following. The OP_REF and
4890
    OP_REFI opcodes are used for a reference to a numbered group or to a
4891
    non-duplicated named group. For a duplicated named group, OP_DNREF and
4892
    OP_DNREFI are used. In this case we must scan the list of groups to which
4893
    the name refers, and use the first one that is set. */
4894
4895
0
#define Lmin      F->temp_32[0]
4896
0
#define Lmax      F->temp_32[1]
4897
0
#define Lcaseless F->temp_32[2]
4898
0
#define Lstart    F->temp_sptr[0]
4899
0
#define Loffset   F->temp_size
4900
4901
0
    case OP_DNREF:
4902
0
    case OP_DNREFI:
4903
0
    Lcaseless = (Fop == OP_DNREFI);
4904
0
      {
4905
0
      int count = GET2(Fecode, 1+IMM2_SIZE);
4906
0
      PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
4907
0
      Fecode += 1 + 2*IMM2_SIZE;
4908
4909
0
      while (count-- > 0)
4910
0
        {
4911
0
        Loffset = (GET2(slot, 0) << 1) - 2;
4912
0
        if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
4913
0
        slot += mb->name_entry_size;
4914
0
        }
4915
0
      }
4916
0
    goto REF_REPEAT;
4917
4918
0
    case OP_REF:
4919
0
    case OP_REFI:
4920
0
    Lcaseless = (Fop == OP_REFI);
4921
0
    Loffset = (GET2(Fecode, 1) << 1) - 2;
4922
0
    Fecode += 1 + IMM2_SIZE;
4923
4924
    /* Set up for repetition, or handle the non-repeated case. The maximum and
4925
    minimum must be in the heap frame, but as they are short-term values, we
4926
    use temporary fields. */
4927
4928
0
    REF_REPEAT:
4929
0
    switch (*Fecode)
4930
0
      {
4931
0
      case OP_CRSTAR:
4932
0
      case OP_CRMINSTAR:
4933
0
      case OP_CRPLUS:
4934
0
      case OP_CRMINPLUS:
4935
0
      case OP_CRQUERY:
4936
0
      case OP_CRMINQUERY:
4937
0
      fc = *Fecode++ - OP_CRSTAR;
4938
0
      Lmin = rep_min[fc];
4939
0
      Lmax = rep_max[fc];
4940
0
      reptype = rep_typ[fc];
4941
0
      break;
4942
4943
0
      case OP_CRRANGE:
4944
0
      case OP_CRMINRANGE:
4945
0
      Lmin = GET2(Fecode, 1);
4946
0
      Lmax = GET2(Fecode, 1 + IMM2_SIZE);
4947
0
      reptype = rep_typ[*Fecode - OP_CRSTAR];
4948
0
      if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
4949
0
      Fecode += 1 + 2 * IMM2_SIZE;
4950
0
      break;
4951
4952
0
      default:                  /* No repeat follows */
4953
0
        {
4954
0
        rrc = match_ref(Loffset, Lcaseless, F, mb, &length);
4955
0
        if (rrc != 0)
4956
0
          {
4957
0
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4958
0
          CHECK_PARTIAL();
4959
0
          RRETURN(MATCH_NOMATCH);
4960
0
          }
4961
0
        }
4962
0
      Feptr += length;
4963
0
      continue;              /* With the main loop */
4964
0
      }
4965
4966
    /* Handle repeated back references. If a set group has length zero, just
4967
    continue with the main loop, because it matches however many times. For an
4968
    unset reference, if the minimum is zero, we can also just continue. We can
4969
    also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
4970
    group behave as a zero-length group. For any other unset cases, carrying
4971
    on will result in NOMATCH. */
4972
4973
0
    if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
4974
0
      {
4975
0
      if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
4976
0
      }
4977
0
    else  /* Group is not set */
4978
0
      {
4979
0
      if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
4980
0
        continue;
4981
0
      }
4982
4983
    /* First, ensure the minimum number of matches are present. */
4984
4985
0
    for (i = 1; i <= Lmin; i++)
4986
0
      {
4987
0
      PCRE2_SIZE slength;
4988
0
      rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
4989
0
      if (rrc != 0)
4990
0
        {
4991
0
        if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
4992
0
        CHECK_PARTIAL();
4993
0
        RRETURN(MATCH_NOMATCH);
4994
0
        }
4995
0
      Feptr += slength;
4996
0
      }
4997
4998
    /* If min = max, we are done. They are not both allowed to be zero. */
4999
5000
0
    if (Lmin == Lmax) continue;
5001
5002
    /* If minimizing, keep trying and advancing the pointer. */
5003
5004
0
    if (reptype == REPTYPE_MIN)
5005
0
      {
5006
0
      for (;;)
5007
0
        {
5008
0
        PCRE2_SIZE slength;
5009
0
        RMATCH(Fecode, RM20);
5010
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5011
0
        if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
5012
0
        rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
5013
0
        if (rrc != 0)
5014
0
          {
5015
0
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5016
0
          CHECK_PARTIAL();
5017
0
          RRETURN(MATCH_NOMATCH);
5018
0
          }
5019
0
        Feptr += slength;
5020
0
        }
5021
      /* Control never gets here */
5022
0
      }
5023
5024
    /* If maximizing, find the longest string and work backwards, as long as
5025
    the matched lengths for each iteration are the same. */
5026
5027
0
    else
5028
0
      {
5029
0
      BOOL samelengths = TRUE;
5030
0
      Lstart = Feptr;     /* Starting position */
5031
0
      Flength = Fovector[Loffset+1] - Fovector[Loffset];
5032
5033
0
      for (i = Lmin; i < Lmax; i++)
5034
0
        {
5035
0
        PCRE2_SIZE slength;
5036
0
        rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
5037
0
        if (rrc != 0)
5038
0
          {
5039
          /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
5040
          the soft partial matching case. */
5041
5042
0
          if (rrc > 0 && mb->partial != 0 &&
5043
0
              mb->end_subject > mb->start_used_ptr)
5044
0
            {
5045
0
            mb->hitend = TRUE;
5046
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5047
0
            }
5048
0
          break;
5049
0
          }
5050
5051
0
        if (slength != Flength) samelengths = FALSE;
5052
0
        Feptr += slength;
5053
0
        }
5054
5055
      /* If the length matched for each repetition is the same as the length of
5056
      the captured group, we can easily work backwards. This is the normal
5057
      case. However, in caseless UTF-8 mode there are pairs of case-equivalent
5058
      characters whose lengths (in terms of code units) differ. However, this
5059
      is very rare, so we handle it by re-matching fewer and fewer times. */
5060
5061
0
      if (samelengths)
5062
0
        {
5063
0
        while (Feptr >= Lstart)
5064
0
          {
5065
0
          RMATCH(Fecode, RM21);
5066
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5067
0
          Feptr -= Flength;
5068
0
          }
5069
0
        }
5070
5071
      /* The rare case of non-matching lengths. Re-scan the repetition for each
5072
      iteration. We know that match_ref() will succeed every time. */
5073
5074
0
      else
5075
0
        {
5076
0
        Lmax = i;
5077
0
        for (;;)
5078
0
          {
5079
0
          RMATCH(Fecode, RM22);
5080
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5081
0
          if (Feptr == Lstart) break; /* Failed after minimal repetition */
5082
0
          Feptr = Lstart;
5083
0
          Lmax--;
5084
0
          for (i = Lmin; i < Lmax; i++)
5085
0
            {
5086
0
            PCRE2_SIZE slength;
5087
0
            (void)match_ref(Loffset, Lcaseless, F, mb, &slength);
5088
0
            Feptr += slength;
5089
0
            }
5090
0
          }
5091
0
        }
5092
5093
0
      RRETURN(MATCH_NOMATCH);
5094
0
      }
5095
    /* Control never gets here */
5096
5097
0
#undef Lcaseless
5098
0
#undef Lmin
5099
0
#undef Lmax
5100
0
#undef Lstart
5101
0
#undef Loffset
5102
5103
5104
5105
/* ========================================================================= */
5106
/*           Opcodes for the start of various parenthesized items            */
5107
/* ========================================================================= */
5108
5109
    /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
5110
    (*THEN) is within the current branch by comparing the address of OP_THEN
5111
    that is passed back with the end of the branch. If (*THEN) is within the
5112
    current branch, and the branch is one of two or more alternatives (it
5113
    either starts or ends with OP_ALT), we have reached the limit of THEN's
5114
    action, so convert the return code to NOMATCH, which will cause normal
5115
    backtracking to happen from now on. Otherwise, THEN is passed back to an
5116
    outer alternative. This implements Perl's treatment of parenthesized
5117
    groups, where a group not containing | does not affect the current
5118
    alternative, that is, (X) is NOT the same as (X|(*F)). */
5119
5120
5121
    /* ===================================================================== */
5122
    /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
5123
    bracket group, indicating that it may occur zero times. It may repeat
5124
    infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
5125
    the pattern. Brackets with fixed upper repeat limits are compiled as a
5126
    number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
5127
    Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
5128
5129
0
#define Lnext_ecode F->temp_sptr[0]
5130
5131
0
    case OP_BRAZERO:
5132
0
    Lnext_ecode = Fecode + 1;
5133
0
    RMATCH(Lnext_ecode, RM9);
5134
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5135
0
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5136
0
    Fecode = Lnext_ecode + 1 + LINK_SIZE;
5137
0
    break;
5138
5139
0
    case OP_BRAMINZERO:
5140
0
    Lnext_ecode = Fecode + 1;
5141
0
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5142
0
    RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
5143
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5144
0
    Fecode++;
5145
0
    break;
5146
5147
0
#undef Lnext_ecode
5148
5149
0
    case OP_SKIPZERO:
5150
0
    Fecode++;
5151
0
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5152
0
    Fecode += 1 + LINK_SIZE;
5153
0
    break;
5154
5155
5156
    /* ===================================================================== */
5157
    /* Handle possessive brackets with an unlimited repeat. The end of these
5158
    brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
5159
    going further in the pattern. */
5160
5161
0
#define Lframe_type    F->temp_32[0]
5162
0
#define Lmatched_once  F->temp_32[1]
5163
0
#define Lzero_allowed  F->temp_32[2]
5164
0
#define Lstart_eptr    F->temp_sptr[0]
5165
0
#define Lstart_group   F->temp_sptr[1]
5166
5167
0
    case OP_BRAPOSZERO:
5168
0
    Lzero_allowed = TRUE;                /* Zero repeat is allowed */
5169
0
    Fecode += 1;
5170
0
    if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
5171
0
      goto POSSESSIVE_CAPTURE;
5172
0
    goto POSSESSIVE_NON_CAPTURE;
5173
5174
0
    case OP_BRAPOS:
5175
0
    case OP_SBRAPOS:
5176
0
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5177
5178
0
    POSSESSIVE_NON_CAPTURE:
5179
0
    Lframe_type = GF_NOCAPTURE;          /* Remembered frame type */
5180
0
    goto POSSESSIVE_GROUP;
5181
5182
0
    case OP_CBRAPOS:
5183
0
    case OP_SCBRAPOS:
5184
0
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5185
5186
0
    POSSESSIVE_CAPTURE:
5187
0
    number = GET2(Fecode, 1+LINK_SIZE);
5188
0
    Lframe_type = GF_CAPTURE | number;   /* Remembered frame type */
5189
5190
0
    POSSESSIVE_GROUP:
5191
0
    Lmatched_once = FALSE;               /* Never matched */
5192
0
    Lstart_group = Fecode;               /* Start of this group */
5193
5194
0
    for (;;)
5195
0
      {
5196
0
      Lstart_eptr = Feptr;               /* Position at group start */
5197
0
      group_frame_type = Lframe_type;
5198
0
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
5199
0
      if (rrc == MATCH_KETRPOS)
5200
0
        {
5201
0
        Lmatched_once = TRUE;            /* Matched at least once */
5202
0
        if (Feptr == Lstart_eptr)        /* Empty match; skip to end */
5203
0
          {
5204
0
          do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5205
0
          break;
5206
0
          }
5207
5208
0
        Fecode = Lstart_group;
5209
0
        continue;
5210
0
        }
5211
5212
      /* See comment above about handling THEN. */
5213
5214
0
      if (rrc == MATCH_THEN)
5215
0
        {
5216
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5217
0
        if (mb->verb_ecode_ptr < next_ecode &&
5218
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5219
0
          rrc = MATCH_NOMATCH;
5220
0
        }
5221
5222
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5223
0
      Fecode += GET(Fecode, 1);
5224
0
      if (*Fecode != OP_ALT) break;
5225
0
      }
5226
5227
    /* Success if matched something or zero repeat allowed */
5228
5229
0
    if (Lmatched_once || Lzero_allowed)
5230
0
      {
5231
0
      Fecode += 1 + LINK_SIZE;
5232
0
      break;
5233
0
      }
5234
5235
0
    RRETURN(MATCH_NOMATCH);
5236
5237
0
#undef Lmatched_once
5238
0
#undef Lzero_allowed
5239
0
#undef Lframe_type
5240
0
#undef Lstart_eptr
5241
0
#undef Lstart_group
5242
5243
5244
    /* ===================================================================== */
5245
    /* Handle non-capturing brackets that cannot match an empty string. When we
5246
    get to the final alternative within the brackets, as long as there are no
5247
    THEN's in the pattern, we can optimize by not recording a new backtracking
5248
    point. (Ideally we should test for a THEN within this group, but we don't
5249
    have that information.) Don't do this if we are at the very top level,
5250
    however, because that would make handling assertions and once-only brackets
5251
    messier when there is nothing to go back to. */
5252
5253
0
#define Lframe_type F->temp_32[0]     /* Set for all that use GROUPLOOP */
5254
0
#define Lnext_branch F->temp_sptr[0]  /* Used only in OP_BRA handling */
5255
5256
0
    case OP_BRA:
5257
0
    if (mb->hasthen || Frdepth == 0)
5258
0
      {
5259
0
      Lframe_type = 0;
5260
0
      goto GROUPLOOP;
5261
0
      }
5262
5263
0
    for (;;)
5264
0
      {
5265
0
      Lnext_branch = Fecode + GET(Fecode, 1);
5266
0
      if (*Lnext_branch != OP_ALT) break;
5267
5268
      /* This is never the final branch. We do not need to test for MATCH_THEN
5269
      here because this code is not used when there is a THEN in the pattern. */
5270
5271
0
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
5272
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5273
0
      Fecode = Lnext_branch;
5274
0
      }
5275
5276
    /* Hit the start of the final branch. Continue at this level. */
5277
5278
0
    Fecode += PRIV(OP_lengths)[*Fecode];
5279
0
    break;
5280
5281
0
#undef Lnext_branch
5282
5283
5284
    /* ===================================================================== */
5285
    /* Handle a capturing bracket, other than those that are possessive with an
5286
    unlimited repeat. */
5287
5288
0
    case OP_CBRA:
5289
0
    case OP_SCBRA:
5290
0
    Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
5291
0
    goto GROUPLOOP;
5292
5293
5294
    /* ===================================================================== */
5295
    /* Atomic groups and non-capturing brackets that can match an empty string
5296
    must record a backtracking point and also set up a chained frame. */
5297
5298
0
    case OP_ONCE:
5299
0
    case OP_SCRIPT_RUN:
5300
0
    case OP_SBRA:
5301
0
    Lframe_type = GF_NOCAPTURE | Fop;
5302
5303
0
    GROUPLOOP:
5304
0
    for (;;)
5305
0
      {
5306
0
      group_frame_type = Lframe_type;
5307
0
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
5308
0
      if (rrc == MATCH_THEN)
5309
0
        {
5310
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5311
0
        if (mb->verb_ecode_ptr < next_ecode &&
5312
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5313
0
          rrc = MATCH_NOMATCH;
5314
0
        }
5315
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5316
0
      Fecode += GET(Fecode, 1);
5317
0
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5318
0
      }
5319
    /* Control never reaches here. */
5320
5321
0
#undef Lframe_type
5322
5323
5324
    /* ===================================================================== */
5325
    /* Recursion either matches the current regex, or some subexpression. The
5326
    offset data is the offset to the starting bracket from the start of the
5327
    whole pattern. (This is so that it works from duplicated subpatterns.) */
5328
5329
0
#define Lframe_type F->temp_32[0]
5330
0
#define Lstart_branch F->temp_sptr[0]
5331
5332
0
    case OP_RECURSE:
5333
0
    bracode = mb->start_code + GET(Fecode, 1);
5334
0
    number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
5335
5336
    /* If we are already in a recursion, check for repeating the same one
5337
    without advancing the subject pointer. This should catch convoluted mutual
5338
    recursions. (Some simple cases are caught at compile time.) */
5339
5340
0
    if (Fcurrent_recurse != RECURSE_UNSET)
5341
0
      {
5342
0
      offset = Flast_group_offset;
5343
0
      while (offset != PCRE2_UNSET)
5344
0
        {
5345
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
5346
0
        P = (heapframe *)((char *)N - frame_size);
5347
0
        if (N->group_frame_type == (GF_RECURSE | number))
5348
0
          {
5349
0
          if (Feptr == P->eptr) return PCRE2_ERROR_RECURSELOOP;
5350
0
          break;
5351
0
          }
5352
0
        offset = P->last_group_offset;
5353
0
        }
5354
0
      }
5355
5356
    /* Now run the recursion, branch by branch. */
5357
5358
0
    Lstart_branch = bracode;
5359
0
    Lframe_type = GF_RECURSE | number;
5360
5361
0
    for (;;)
5362
0
      {
5363
0
      PCRE2_SPTR next_ecode;
5364
5365
0
      group_frame_type = Lframe_type;
5366
0
      RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
5367
0
      next_ecode = Lstart_branch + GET(Lstart_branch,1);
5368
5369
      /* Handle backtracking verbs, which are defined in a range that can
5370
      easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
5371
      escape beyond a recursion; they cause a NOMATCH for the entire recursion.
5372
5373
      When one of these verbs triggers, the current recursion group number is
5374
      recorded. If it matches the recursion we are processing, the verb
5375
      happened within the recursion and we must deal with it. Otherwise it must
5376
      have happened after the recursion completed, and so has to be passed
5377
      back. See comment above about handling THEN. */
5378
5379
0
      if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
5380
0
          mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
5381
0
        {
5382
0
        if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
5383
0
            (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
5384
0
          rrc = MATCH_NOMATCH;
5385
0
        else RRETURN(MATCH_NOMATCH);
5386
0
        }
5387
5388
      /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
5389
      OP_ACCEPT code. Nothing needs to be done here. */
5390
5391
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5392
0
      Lstart_branch = next_ecode;
5393
0
      if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
5394
0
      }
5395
    /* Control never reaches here. */
5396
5397
0
#undef Lframe_type
5398
0
#undef Lstart_branch
5399
5400
5401
    /* ===================================================================== */
5402
    /* Positive assertions are like other groups except that PCRE doesn't allow
5403
    the effect of (*THEN) to escape beyond an assertion; it is therefore
5404
    treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
5405
    captures and mark retained. Any other return is an error. */
5406
5407
0
#define Lframe_type  F->temp_32[0]
5408
5409
0
    case OP_ASSERT:
5410
0
    case OP_ASSERTBACK:
5411
0
    case OP_ASSERT_NA:
5412
0
    case OP_ASSERTBACK_NA:
5413
0
    Lframe_type = GF_NOCAPTURE | Fop;
5414
0
    for (;;)
5415
0
      {
5416
0
      group_frame_type = Lframe_type;
5417
0
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
5418
0
      if (rrc == MATCH_ACCEPT)
5419
0
        {
5420
0
        memcpy(Fovector,
5421
0
              (char *)assert_accept_frame + offsetof(heapframe, ovector),
5422
0
              assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5423
0
        Foffset_top = assert_accept_frame->offset_top;
5424
0
        Fmark = assert_accept_frame->mark;
5425
0
        break;
5426
0
        }
5427
0
      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
5428
0
      Fecode += GET(Fecode, 1);
5429
0
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5430
0
      }
5431
5432
0
    do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5433
0
    Fecode += 1 + LINK_SIZE;
5434
0
    break;
5435
5436
0
#undef Lframe_type
5437
5438
5439
    /* ===================================================================== */
5440
    /* Handle negative assertions. Loop for each non-matching branch as for
5441
    positive assertions. */
5442
5443
0
#define Lframe_type  F->temp_32[0]
5444
5445
0
    case OP_ASSERT_NOT:
5446
0
    case OP_ASSERTBACK_NOT:
5447
0
    Lframe_type  = GF_NOCAPTURE | Fop;
5448
5449
0
    for (;;)
5450
0
      {
5451
0
      group_frame_type = Lframe_type;
5452
0
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
5453
0
      switch(rrc)
5454
0
        {
5455
0
        case MATCH_ACCEPT:   /* Assertion matched, therefore it fails. */
5456
0
        case MATCH_MATCH:
5457
0
        RRETURN (MATCH_NOMATCH);
5458
5459
0
        case MATCH_NOMATCH:  /* Branch failed, try next if present. */
5460
0
        case MATCH_THEN:
5461
0
        Fecode += GET(Fecode, 1);
5462
0
        if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
5463
0
        break;
5464
5465
0
        case MATCH_COMMIT:   /* Assertion forced to fail, therefore continue. */
5466
0
        case MATCH_SKIP:
5467
0
        case MATCH_PRUNE:
5468
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5469
0
        goto ASSERT_NOT_FAILED;
5470
5471
0
        default:             /* Pass back any other return */
5472
0
        RRETURN(rrc);
5473
0
        }
5474
0
      }
5475
5476
    /* None of the branches have matched or there was a backtrack to (*COMMIT),
5477
    (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
5478
    negative assertion, so carry on. */
5479
5480
0
    ASSERT_NOT_FAILED:
5481
0
    Fecode += 1 + LINK_SIZE;
5482
0
    break;
5483
5484
0
#undef Lframe_type
5485
5486
5487
    /* ===================================================================== */
5488
    /* The callout item calls an external function, if one is provided, passing
5489
    details of the match so far. This is mainly for debugging, though the
5490
    function is able to force a failure. */
5491
5492
0
    case OP_CALLOUT:
5493
0
    case OP_CALLOUT_STR:
5494
0
    rrc = do_callout(F, mb, &length);
5495
0
    if (rrc > 0) RRETURN(MATCH_NOMATCH);
5496
0
    if (rrc < 0) RRETURN(rrc);
5497
0
    Fecode += length;
5498
0
    break;
5499
5500
5501
    /* ===================================================================== */
5502
    /* Conditional group: compilation checked that there are no more than two
5503
    branches. If the condition is false, skipping the first branch takes us
5504
    past the end of the item if there is only one branch, but that's exactly
5505
    what we want. */
5506
5507
0
    case OP_COND:
5508
0
    case OP_SCOND:
5509
5510
    /* The variable Flength will be added to Fecode when the condition is
5511
    false, to get to the second branch. Setting it to the offset to the ALT or
5512
    KET, then incrementing Fecode achieves this effect. However, if the second
5513
    branch is non-existent, we must point to the KET so that the end of the
5514
    group is correctly processed. We now have Fecode pointing to the condition
5515
    or callout. */
5516
5517
0
    Flength = GET(Fecode, 1);    /* Offset to the second branch */
5518
0
    if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
5519
0
    Fecode += 1 + LINK_SIZE;     /* From this opcode */
5520
5521
    /* Because of the way auto-callout works during compile, a callout item is
5522
    inserted between OP_COND and an assertion condition. Such a callout can
5523
    also be inserted manually. */
5524
5525
0
    if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
5526
0
      {
5527
0
      rrc = do_callout(F, mb, &length);
5528
0
      if (rrc > 0) RRETURN(MATCH_NOMATCH);
5529
0
      if (rrc < 0) RRETURN(rrc);
5530
5531
      /* Advance Fecode past the callout, so it now points to the condition. We
5532
      must adjust Flength so that the value of Fecode+Flength is unchanged. */
5533
5534
0
      Fecode += length;
5535
0
      Flength -= length;
5536
0
      }
5537
5538
    /* Test the various possible conditions */
5539
5540
0
    condition = FALSE;
5541
0
    switch(*Fecode)
5542
0
      {
5543
0
      case OP_RREF:                  /* Group recursion test */
5544
0
      if (Fcurrent_recurse != RECURSE_UNSET)
5545
0
        {
5546
0
        number = GET2(Fecode, 1);
5547
0
        condition = (number == RREF_ANY || number == Fcurrent_recurse);
5548
0
        }
5549
0
      break;
5550
5551
0
      case OP_DNRREF:       /* Duplicate named group recursion test */
5552
0
      if (Fcurrent_recurse != RECURSE_UNSET)
5553
0
        {
5554
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
5555
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5556
0
        while (count-- > 0)
5557
0
          {
5558
0
          number = GET2(slot, 0);
5559
0
          condition = number == Fcurrent_recurse;
5560
0
          if (condition) break;
5561
0
          slot += mb->name_entry_size;
5562
0
          }
5563
0
        }
5564
0
      break;
5565
5566
0
      case OP_CREF:                         /* Numbered group used test */
5567
0
      offset = (GET2(Fecode, 1) << 1) - 2;  /* Doubled ref number */
5568
0
      condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5569
0
      break;
5570
5571
0
      case OP_DNCREF:      /* Duplicate named group used test */
5572
0
        {
5573
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
5574
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5575
0
        while (count-- > 0)
5576
0
          {
5577
0
          offset = (GET2(slot, 0) << 1) - 2;
5578
0
          condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5579
0
          if (condition) break;
5580
0
          slot += mb->name_entry_size;
5581
0
          }
5582
0
        }
5583
0
      break;
5584
5585
0
      case OP_FALSE:
5586
0
      case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
5587
0
      break;
5588
5589
0
      case OP_TRUE:
5590
0
      condition = TRUE;
5591
0
      break;
5592
5593
      /* The condition is an assertion. Run code similar to the assertion code
5594
      above. */
5595
5596
0
#define Lpositive      F->temp_32[0]
5597
0
#define Lstart_branch  F->temp_sptr[0]
5598
5599
0
      default:
5600
0
      Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
5601
0
      Lstart_branch = Fecode;
5602
5603
0
      for (;;)
5604
0
        {
5605
0
        group_frame_type = GF_CONDASSERT | *Fecode;
5606
0
        RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
5607
5608
0
        switch(rrc)
5609
0
          {
5610
0
          case MATCH_ACCEPT:  /* Save captures */
5611
0
          memcpy(Fovector,
5612
0
                (char *)assert_accept_frame + offsetof(heapframe, ovector),
5613
0
                assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5614
0
          Foffset_top = assert_accept_frame->offset_top;
5615
5616
          /* Fall through */
5617
          /* In the case of a match, the captures have already been put into
5618
          the current frame. */
5619
5620
0
          case MATCH_MATCH:
5621
0
          condition = Lpositive;   /* TRUE for positive assertion */
5622
0
          break;
5623
5624
          /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
5625
          assertion; it is therefore always treated as NOMATCH. */
5626
5627
0
          case MATCH_NOMATCH:
5628
0
          case MATCH_THEN:
5629
0
          Lstart_branch += GET(Lstart_branch, 1);
5630
0
          if (*Lstart_branch == OP_ALT) continue;  /* Try next branch */
5631
0
          condition = !Lpositive;  /* TRUE for negative assertion */
5632
0
          break;
5633
5634
          /* These force no match without checking other branches. */
5635
5636
0
          case MATCH_COMMIT:
5637
0
          case MATCH_SKIP:
5638
0
          case MATCH_PRUNE:
5639
0
          condition = !Lpositive;
5640
0
          break;
5641
5642
0
          default:
5643
0
          RRETURN(rrc);
5644
0
          }
5645
0
        break;  /* Out of the branch loop */
5646
0
        }
5647
5648
      /* If the condition is true, find the end of the assertion so that
5649
      advancing past it gets us to the start of the first branch. */
5650
5651
0
      if (condition)
5652
0
        {
5653
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5654
0
        }
5655
0
      break;  /* End of assertion condition */
5656
0
      }
5657
5658
0
#undef Lpositive
5659
0
#undef Lstart_branch
5660
5661
    /* Choose branch according to the condition. */
5662
5663
0
    Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
5664
5665
    /* If the opcode is OP_SCOND it means we are at a repeated conditional
5666
    group that might match an empty string. We must therefore descend a level
5667
    so that the start is remembered for checking. For OP_COND we can just
5668
    continue at this level. */
5669
5670
0
    if (Fop == OP_SCOND)
5671
0
      {
5672
0
      group_frame_type  = GF_NOCAPTURE | Fop;
5673
0
      RMATCH(Fecode, RM35);
5674
0
      RRETURN(rrc);
5675
0
      }
5676
0
    break;
5677
5678
5679
5680
/* ========================================================================= */
5681
/*                  End of start of parenthesis opcodes                      */
5682
/* ========================================================================= */
5683
5684
5685
    /* ===================================================================== */
5686
    /* Move the subject pointer back. This occurs only at the start of each
5687
    branch of a lookbehind assertion. If we are too close to the start to move
5688
    back, fail. When working with UTF-8 we move back a number of characters,
5689
    not bytes. */
5690
5691
0
    case OP_REVERSE:
5692
0
    number = GET(Fecode, 1);
5693
0
#ifdef SUPPORT_UNICODE
5694
0
    if (utf)
5695
0
      {
5696
0
      while (number-- > 0)
5697
0
        {
5698
0
        if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
5699
0
        Feptr--;
5700
0
        BACKCHAR(Feptr);
5701
0
        }
5702
0
      }
5703
0
    else
5704
0
#endif
5705
5706
    /* No UTF-8 support, or not in UTF-8 mode: count is code unit count */
5707
5708
0
      {
5709
0
      if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
5710
0
      Feptr -= number;
5711
0
      }
5712
5713
    /* Save the earliest consulted character, then skip to next opcode */
5714
5715
0
    if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
5716
0
    Fecode += 1 + LINK_SIZE;
5717
0
    break;
5718
5719
5720
    /* ===================================================================== */
5721
    /* An alternation is the end of a branch; scan along to find the end of the
5722
    bracketed group. */
5723
5724
0
    case OP_ALT:
5725
0
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5726
0
    break;
5727
5728
5729
    /* ===================================================================== */
5730
    /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
5731
    starting frame was added to the chained frames in order to remember the
5732
    starting subject position for the group. */
5733
5734
0
    case OP_KET:
5735
0
    case OP_KETRMIN:
5736
0
    case OP_KETRMAX:
5737
0
    case OP_KETRPOS:
5738
5739
0
    bracode = Fecode - GET(Fecode, 1);
5740
5741
    /* Point N to the frame at the start of the most recent group.
5742
    Remember the subject pointer at the start of the group. */
5743
5744
0
    if (*bracode != OP_BRA && *bracode != OP_COND)
5745
0
      {
5746
0
      N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset);
5747
0
      P = (heapframe *)((char *)N - frame_size);
5748
0
      Flast_group_offset = P->last_group_offset;
5749
5750
#ifdef DEBUG_SHOW_RMATCH
5751
      fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
5752
        N->rdepth, N->group_frame_type,
5753
        (char *)P->eptr - (char *)mb->start_subject);
5754
#endif
5755
5756
      /* If we are at the end of an assertion that is a condition, return a
5757
      match, discarding any intermediate backtracking points. Copy back the
5758
      mark setting and the captures into the frame before N so that they are
5759
      set on return. Doing this for all assertions, both positive and negative,
5760
      seems to match what Perl does. */
5761
5762
0
      if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
5763
0
        {
5764
0
        memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
5765
0
          Foffset_top * sizeof(PCRE2_SIZE));
5766
0
        P->offset_top = Foffset_top;
5767
0
        P->mark = Fmark;
5768
0
        Fback_frame = (char *)F - (char *)P;
5769
0
        RRETURN(MATCH_MATCH);
5770
0
        }
5771
0
      }
5772
0
    else P = NULL;   /* Indicates starting frame not recorded */
5773
5774
    /* The group was not a conditional assertion. */
5775
5776
0
    switch (*bracode)
5777
0
      {
5778
0
      case OP_BRA:    /* No need to do anything for these */
5779
0
      case OP_COND:
5780
0
      case OP_SCOND:
5781
0
      break;
5782
5783
      /* Non-atomic positive assertions are like OP_BRA, except that the
5784
      subject pointer must be put back to where it was at the start of the
5785
      assertion. */
5786
5787
0
      case OP_ASSERT_NA:
5788
0
      case OP_ASSERTBACK_NA:
5789
0
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
5790
0
      Feptr = P->eptr;
5791
0
      break;
5792
5793
      /* Atomic positive assertions are like OP_ONCE, except that in addition
5794
      the subject pointer must be put back to where it was at the start of the
5795
      assertion. */
5796
5797
0
      case OP_ASSERT:
5798
0
      case OP_ASSERTBACK:
5799
0
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
5800
0
      Feptr = P->eptr;
5801
      /* Fall through */
5802
5803
      /* For an atomic group, discard internal backtracking points. We must
5804
      also ensure that any remaining branches within the top-level of the group
5805
      are not tried. Do this by adjusting the code pointer within the backtrack
5806
      frame so that it points to the final branch. */
5807
5808
0
      case OP_ONCE:
5809
0
      Fback_frame = ((char *)F - (char *)P);
5810
0
      for (;;)
5811
0
        {
5812
0
        uint32_t y = GET(P->ecode,1);
5813
0
        if ((P->ecode)[y] != OP_ALT) break;
5814
0
        P->ecode += y;
5815
0
        }
5816
0
      break;
5817
5818
      /* A matching negative assertion returns MATCH, which is turned into
5819
      NOMATCH at the assertion level. */
5820
5821
0
      case OP_ASSERT_NOT:
5822
0
      case OP_ASSERTBACK_NOT:
5823
0
      RRETURN(MATCH_MATCH);
5824
5825
      /* At the end of a script run, apply the script-checking rules. This code
5826
      will never by exercised if Unicode support it not compiled, because in
5827
      that environment script runs cause an error at compile time. */
5828
5829
0
      case OP_SCRIPT_RUN:
5830
0
      if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
5831
0
      break;
5832
5833
      /* Whole-pattern recursion is coded as a recurse into group 0, so it
5834
      won't be picked up here. Instead, we catch it when the OP_END is reached.
5835
      Other recursion is handled here. */
5836
5837
0
      case OP_CBRA:
5838
0
      case OP_CBRAPOS:
5839
0
      case OP_SCBRA:
5840
0
      case OP_SCBRAPOS:
5841
0
      number = GET2(bracode, 1+LINK_SIZE);
5842
5843
      /* Handle a recursively called group. We reinstate the previous set of
5844
      captures and then carry on after the recursion call. */
5845
5846
0
      if (Fcurrent_recurse == number)
5847
0
        {
5848
0
        P = (heapframe *)((char *)N - frame_size);
5849
0
        memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
5850
0
          P->offset_top * sizeof(PCRE2_SIZE));
5851
0
        Foffset_top = P->offset_top;
5852
0
        Fcapture_last = P->capture_last;
5853
0
        Fcurrent_recurse = P->current_recurse;
5854
0
        Fecode = P->ecode + 1 + LINK_SIZE;
5855
0
        continue;  /* With next opcode */
5856
0
        }
5857
5858
      /* Deal with actual capturing. */
5859
5860
0
      offset = (number << 1) - 2;
5861
0
      Fcapture_last = number;
5862
0
      Fovector[offset] = P->eptr - mb->start_subject;
5863
0
      Fovector[offset+1] = Feptr - mb->start_subject;
5864
0
      if (offset >= Foffset_top) Foffset_top = offset + 2;
5865
0
      break;
5866
0
      }  /* End actions relating to the starting opcode */
5867
5868
    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
5869
    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
5870
    at a time from the outer level. This must precede the empty string test -
5871
    in this case that test is done at the outer level. */
5872
5873
0
    if (*Fecode == OP_KETRPOS)
5874
0
      {
5875
0
      memcpy((char *)P + offsetof(heapframe, eptr),
5876
0
             (char *)F + offsetof(heapframe, eptr),
5877
0
             frame_copy_size);
5878
0
      RRETURN(MATCH_KETRPOS);
5879
0
      }
5880
5881
    /* Handle the different kinds of closing brackets. A non-repeating ket
5882
    needs no special action, just continuing at this level. This also happens
5883
    for the repeating kets if the group matched no characters, in order to
5884
    forcibly break infinite loops. Otherwise, the repeating kets try the rest
5885
    of the pattern or restart from the preceding bracket, in the appropriate
5886
    order. */
5887
5888
0
    if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
5889
0
      {
5890
0
      if (Fop == OP_KETRMIN)
5891
0
        {
5892
0
        RMATCH(Fecode + 1 + LINK_SIZE, RM6);
5893
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5894
0
        Fecode -= GET(Fecode, 1);
5895
0
        break;   /* End of ket processing */
5896
0
        }
5897
5898
      /* Repeat the maximum number of times (KETRMAX) */
5899
5900
0
      RMATCH(bracode, RM7);
5901
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5902
0
      }
5903
5904
    /* Carry on at this level for a non-repeating ket, or after matching an
5905
    empty string, or after repeating for a maximum number of times. */
5906
5907
0
    Fecode += 1 + LINK_SIZE;
5908
0
    break;
5909
5910
5911
    /* ===================================================================== */
5912
    /* Start and end of line assertions, not multiline mode. */
5913
5914
0
    case OP_CIRC:   /* Start of line, unless PCRE2_NOTBOL is set. */
5915
0
    if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
5916
0
      RRETURN(MATCH_NOMATCH);
5917
0
    Fecode++;
5918
0
    break;
5919
5920
0
    case OP_SOD:    /* Unconditional start of subject */
5921
0
    if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
5922
0
    Fecode++;
5923
0
    break;
5924
5925
    /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
5926
    terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
5927
5928
0
    case OP_DOLL:
5929
0
    if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
5930
0
    if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
5931
5932
    /* Fall through */
5933
    /* Unconditional end of subject assertion (\z) */
5934
5935
0
    case OP_EOD:
5936
0
    if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
5937
0
    if (mb->partial != 0)
5938
0
      {
5939
0
      mb->hitend = TRUE;
5940
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5941
0
      }
5942
0
    Fecode++;
5943
0
    break;
5944
5945
    /* End of subject or ending \n assertion (\Z) */
5946
5947
0
    case OP_EODN:
5948
0
    ASSERT_NL_OR_EOS:
5949
0
    if (Feptr < mb->end_subject &&
5950
0
        (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen))
5951
0
      {
5952
0
      if (mb->partial != 0 &&
5953
0
          Feptr + 1 >= mb->end_subject &&
5954
0
          NLBLOCK->nltype == NLTYPE_FIXED &&
5955
0
          NLBLOCK->nllen == 2 &&
5956
0
          UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
5957
0
        {
5958
0
        mb->hitend = TRUE;
5959
0
        if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5960
0
        }
5961
0
      RRETURN(MATCH_NOMATCH);
5962
0
      }
5963
5964
    /* Either at end of string or \n before end. */
5965
5966
0
    if (mb->partial != 0)
5967
0
      {
5968
0
      mb->hitend = TRUE;
5969
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5970
0
      }
5971
0
    Fecode++;
5972
0
    break;
5973
5974
5975
    /* ===================================================================== */
5976
    /* Start and end of line assertions, multiline mode. */
5977
5978
    /* Start of subject unless notbol, or after any newline except for one at
5979
    the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
5980
5981
0
    case OP_CIRCM:
5982
0
    if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
5983
0
      RRETURN(MATCH_NOMATCH);
5984
0
    if (Feptr != mb->start_subject &&
5985
0
        ((Feptr == mb->end_subject &&
5986
0
           (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
5987
0
         !WAS_NEWLINE(Feptr)))
5988
0
      RRETURN(MATCH_NOMATCH);
5989
0
    Fecode++;
5990
0
    break;
5991
5992
    /* Assert before any newline, or before end of subject unless noteol is
5993
    set. */
5994
5995
0
    case OP_DOLLM:
5996
0
    if (Feptr < mb->end_subject)
5997
0
      {
5998
0
      if (!IS_NEWLINE(Feptr))
5999
0
        {
6000
0
        if (mb->partial != 0 &&
6001
0
            Feptr + 1 >= mb->end_subject &&
6002
0
            NLBLOCK->nltype == NLTYPE_FIXED &&
6003
0
            NLBLOCK->nllen == 2 &&
6004
0
            UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6005
0
          {
6006
0
          mb->hitend = TRUE;
6007
0
          if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6008
0
          }
6009
0
        RRETURN(MATCH_NOMATCH);
6010
0
        }
6011
0
      }
6012
0
    else
6013
0
      {
6014
0
      if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6015
0
      SCHECK_PARTIAL();
6016
0
      }
6017
0
    Fecode++;
6018
0
    break;
6019
6020
6021
    /* ===================================================================== */
6022
    /* Start of match assertion */
6023
6024
0
    case OP_SOM:
6025
0
    if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
6026
0
    Fecode++;
6027
0
    break;
6028
6029
6030
    /* ===================================================================== */
6031
    /* Reset the start of match point */
6032
6033
0
    case OP_SET_SOM:
6034
0
    Fstart_match = Feptr;
6035
0
    Fecode++;
6036
0
    break;
6037
6038
6039
    /* ===================================================================== */
6040
    /* Word boundary assertions. Find out if the previous and current
6041
    characters are "word" characters. It takes a bit more work in UTF mode.
6042
    Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
6043
    not set. When it is set, use Unicode properties if available, even when not
6044
    in UTF mode. Remember the earliest and latest consulted characters. */
6045
6046
0
    case OP_NOT_WORD_BOUNDARY:
6047
0
    case OP_WORD_BOUNDARY:
6048
0
    if (Feptr == mb->check_subject) prev_is_word = FALSE; else
6049
0
      {
6050
0
      PCRE2_SPTR lastptr = Feptr - 1;
6051
0
#ifdef SUPPORT_UNICODE
6052
0
      if (utf)
6053
0
        {
6054
0
        BACKCHAR(lastptr);
6055
0
        GETCHAR(fc, lastptr);
6056
0
        }
6057
0
      else
6058
0
#endif  /* SUPPORT_UNICODE */
6059
0
      fc = *lastptr;
6060
0
      if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
6061
0
#ifdef SUPPORT_UNICODE
6062
0
      if ((mb->poptions & PCRE2_UCP) != 0)
6063
0
        {
6064
0
        if (fc == '_') prev_is_word = TRUE; else
6065
0
          {
6066
0
          int cat = UCD_CATEGORY(fc);
6067
0
          prev_is_word = (cat == ucp_L || cat == ucp_N);
6068
0
          }
6069
0
        }
6070
0
      else
6071
0
#endif  /* SUPPORT_UNICODE */
6072
0
      prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6073
0
      }
6074
6075
    /* Get status of next character */
6076
6077
0
    if (Feptr >= mb->end_subject)
6078
0
      {
6079
0
      SCHECK_PARTIAL();
6080
0
      cur_is_word = FALSE;
6081
0
      }
6082
0
    else
6083
0
      {
6084
0
      PCRE2_SPTR nextptr = Feptr + 1;
6085
0
#ifdef SUPPORT_UNICODE
6086
0
      if (utf)
6087
0
        {
6088
0
        FORWARDCHARTEST(nextptr, mb->end_subject);
6089
0
        GETCHAR(fc, Feptr);
6090
0
        }
6091
0
      else
6092
0
#endif  /* SUPPORT_UNICODE */
6093
0
      fc = *Feptr;
6094
0
      if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
6095
0
#ifdef SUPPORT_UNICODE
6096
0
      if ((mb->poptions & PCRE2_UCP) != 0)
6097
0
        {
6098
0
        if (fc == '_') cur_is_word = TRUE; else
6099
0
          {
6100
0
          int cat = UCD_CATEGORY(fc);
6101
0
          cur_is_word = (cat == ucp_L || cat == ucp_N);
6102
0
          }
6103
0
        }
6104
0
      else
6105
0
#endif  /* SUPPORT_UNICODE */
6106
0
      cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6107
0
      }
6108
6109
    /* Now see if the situation is what we want */
6110
6111
0
    if ((*Fecode++ == OP_WORD_BOUNDARY)?
6112
0
         cur_is_word == prev_is_word : cur_is_word != prev_is_word)
6113
0
      RRETURN(MATCH_NOMATCH);
6114
0
    break;
6115
6116
6117
    /* ===================================================================== */
6118
    /* Backtracking (*VERB)s, with and without arguments. Note that if the
6119
    pattern is successfully matched, we do not come back from RMATCH. */
6120
6121
0
    case OP_MARK:
6122
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6123
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
6124
6125
    /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
6126
    argument, and we must check whether that argument matches this MARK's
6127
    argument. It is passed back in mb->verb_skip_ptr. If it does match, we
6128
    return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
6129
    position that corresponds to this mark. Otherwise, pass back the return
6130
    code unaltered. */
6131
6132
0
    if (rrc == MATCH_SKIP_ARG &&
6133
0
             PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
6134
0
      {
6135
0
      mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6136
0
      RRETURN(MATCH_SKIP);
6137
0
      }
6138
0
    RRETURN(rrc);
6139
6140
0
    case OP_FAIL:
6141
0
    RRETURN(MATCH_NOMATCH);
6142
6143
    /* Record the current recursing group number in mb->verb_current_recurse
6144
    when a backtracking return such as MATCH_COMMIT is given. This enables the
6145
    recurse processing to catch verbs from within the recursion. */
6146
6147
0
    case OP_COMMIT:
6148
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
6149
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6150
0
    mb->verb_current_recurse = Fcurrent_recurse;
6151
0
    RRETURN(MATCH_COMMIT);
6152
6153
0
    case OP_COMMIT_ARG:
6154
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6155
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
6156
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6157
0
    mb->verb_current_recurse = Fcurrent_recurse;
6158
0
    RRETURN(MATCH_COMMIT);
6159
6160
0
    case OP_PRUNE:
6161
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
6162
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6163
0
    mb->verb_current_recurse = Fcurrent_recurse;
6164
0
    RRETURN(MATCH_PRUNE);
6165
6166
0
    case OP_PRUNE_ARG:
6167
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6168
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
6169
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6170
0
    mb->verb_current_recurse = Fcurrent_recurse;
6171
0
    RRETURN(MATCH_PRUNE);
6172
6173
0
    case OP_SKIP:
6174
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
6175
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6176
0
    mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6177
0
    mb->verb_current_recurse = Fcurrent_recurse;
6178
0
    RRETURN(MATCH_SKIP);
6179
6180
    /* Note that, for Perl compatibility, SKIP with an argument does NOT set
6181
    nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
6182
    not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
6183
    that failed and any that precede it (either they also failed, or were not
6184
    triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
6185
    SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
6186
    set to the count of the one that failed. */
6187
6188
0
    case OP_SKIP_ARG:
6189
0
    mb->skip_arg_count++;
6190
0
    if (mb->skip_arg_count <= mb->ignore_skip_arg)
6191
0
      {
6192
0
      Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
6193
0
      break;
6194
0
      }
6195
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
6196
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6197
6198
    /* Pass back the current skip name and return the special MATCH_SKIP_ARG
6199
    return code. This will either be caught by a matching MARK, or get to the
6200
    top, where it causes a rematch with mb->ignore_skip_arg set to the value of
6201
    mb->skip_arg_count. */
6202
6203
0
    mb->verb_skip_ptr = Fecode + 2;
6204
0
    mb->verb_current_recurse = Fcurrent_recurse;
6205
0
    RRETURN(MATCH_SKIP_ARG);
6206
6207
    /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
6208
    the branch in which it occurs can be determined. */
6209
6210
0
    case OP_THEN:
6211
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
6212
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6213
0
    mb->verb_ecode_ptr = Fecode;
6214
0
    mb->verb_current_recurse = Fcurrent_recurse;
6215
0
    RRETURN(MATCH_THEN);
6216
6217
0
    case OP_THEN_ARG:
6218
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6219
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
6220
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6221
0
    mb->verb_ecode_ptr = Fecode;
6222
0
    mb->verb_current_recurse = Fcurrent_recurse;
6223
0
    RRETURN(MATCH_THEN);
6224
6225
6226
    /* ===================================================================== */
6227
    /* There's been some horrible disaster. Arrival here can only mean there is
6228
    something seriously wrong in the code above or the OP_xxx definitions. */
6229
6230
0
    default:
6231
0
    return PCRE2_ERROR_INTERNAL;
6232
0
    }
6233
6234
  /* Do not insert any code in here without much thought; it is assumed
6235
  that "continue" in the code above comes out to here to repeat the main
6236
  loop. */
6237
6238
0
  }  /* End of main loop */
6239
/* Control never reaches here */
6240
6241
6242
/* ========================================================================= */
6243
/* The RRETURN() macro jumps here. The number that is saved in Freturn_id
6244
indicates which label we actually want to return to. The value in Frdepth is
6245
the index number of the frame in the vector. The return value has been placed
6246
in rrc. */
6247
6248
0
#define LBL(val) case val: goto L_RM##val;
6249
6250
0
RETURN_SWITCH:
6251
0
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6252
0
if (Frdepth == 0) return rrc;                     /* Exit from the top level */
6253
0
F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
6254
0
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
6255
6256
#ifdef DEBUG_SHOW_RMATCH
6257
fprintf(stderr, "++ RETURN %d to %d\n", rrc, Freturn_id);
6258
#endif
6259
6260
0
switch (Freturn_id)
6261
0
  {
6262
0
  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6263
0
  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
6264
0
  LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
6265
0
  LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
6266
0
  LBL(33) LBL(34) LBL(35) LBL(36)
6267
6268
0
#ifdef SUPPORT_WIDE_CHARS
6269
0
  LBL(100) LBL(101)
6270
0
#endif
6271
6272
0
#ifdef SUPPORT_UNICODE
6273
0
  LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
6274
0
  LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
6275
0
  LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
6276
0
  LBL(221) LBL(222) LBL(223) LBL(224) LBL(225)
6277
0
#endif
6278
6279
0
  default:
6280
0
  return PCRE2_ERROR_INTERNAL;
6281
0
  }
6282
0
#undef LBL
6283
0
}
6284
6285
6286
/*************************************************
6287
*           Match a Regular Expression           *
6288
*************************************************/
6289
6290
/* This function applies a compiled pattern to a subject string and picks out
6291
portions of the string if it matches. Two elements in the vector are set for
6292
each substring: the offsets to the start and end of the substring.
6293
6294
Arguments:
6295
  code            points to the compiled expression
6296
  subject         points to the subject string
6297
  length          length of subject string (may contain binary zeros)
6298
  start_offset    where to start in the subject string
6299
  options         option bits
6300
  match_data      points to a match_data block
6301
  mcontext        points a PCRE2 context
6302
6303
Returns:          > 0 => success; value is the number of ovector pairs filled
6304
                  = 0 => success, but ovector is not big enough
6305
                  = -1 => failed to match (PCRE2_ERROR_NOMATCH)
6306
                  = -2 => partial match (PCRE2_ERROR_PARTIAL)
6307
                  < -2 => some kind of unexpected problem
6308
*/
6309
6310
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
6311
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6312
  PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6313
  pcre2_match_context *mcontext)
6314
0
{
6315
0
int rc;
6316
0
int was_zero_terminated = 0;
6317
0
const uint8_t *start_bits = NULL;
6318
0
const pcre2_real_code *re = (const pcre2_real_code *)code;
6319
6320
0
BOOL anchored;
6321
0
BOOL firstline;
6322
0
BOOL has_first_cu = FALSE;
6323
0
BOOL has_req_cu = FALSE;
6324
0
BOOL startline;
6325
6326
0
#if PCRE2_CODE_UNIT_WIDTH == 8
6327
0
PCRE2_SPTR memchr_found_first_cu;
6328
0
PCRE2_SPTR memchr_found_first_cu2;
6329
0
#endif
6330
6331
0
PCRE2_UCHAR first_cu = 0;
6332
0
PCRE2_UCHAR first_cu2 = 0;
6333
0
PCRE2_UCHAR req_cu = 0;
6334
0
PCRE2_UCHAR req_cu2 = 0;
6335
6336
0
PCRE2_SPTR bumpalong_limit;
6337
0
PCRE2_SPTR end_subject;
6338
0
PCRE2_SPTR true_end_subject;
6339
0
PCRE2_SPTR start_match;
6340
0
PCRE2_SPTR req_cu_ptr;
6341
0
PCRE2_SPTR start_partial;
6342
0
PCRE2_SPTR match_partial;
6343
6344
#ifdef SUPPORT_JIT
6345
BOOL use_jit;
6346
#endif
6347
6348
/* This flag is needed even when Unicode is not supported for convenience
6349
(it is used by the IS_NEWLINE macro). */
6350
6351
0
BOOL utf = FALSE;
6352
6353
0
#ifdef SUPPORT_UNICODE
6354
0
BOOL ucp = FALSE;
6355
0
BOOL allow_invalid;
6356
0
uint32_t fragment_options = 0;
6357
#ifdef SUPPORT_JIT
6358
BOOL jit_checked_utf = FALSE;
6359
#endif
6360
0
#endif  /* SUPPORT_UNICODE */
6361
6362
0
PCRE2_SIZE frame_size;
6363
0
PCRE2_SIZE heapframes_size;
6364
6365
/* We need to have mb as a pointer to a match block, because the IS_NEWLINE
6366
macro is used below, and it expects NLBLOCK to be defined as a pointer. */
6367
6368
0
pcre2_callout_block cb;
6369
0
match_block actual_match_block;
6370
0
match_block *mb = &actual_match_block;
6371
6372
/* Recognize NULL, length 0 as an empty string. */
6373
6374
0
if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
6375
6376
/* Plausibility checks */
6377
6378
0
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
6379
0
if (code == NULL || subject == NULL || match_data == NULL)
6380
0
  return PCRE2_ERROR_NULL;
6381
6382
0
start_match = subject + start_offset;
6383
0
req_cu_ptr = start_match - 1;
6384
0
if (length == PCRE2_ZERO_TERMINATED)
6385
0
  {
6386
0
  length = PRIV(strlen)(subject);
6387
0
  was_zero_terminated = 1;
6388
0
  }
6389
0
true_end_subject = end_subject = subject + length;
6390
6391
0
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
6392
6393
/* Check that the first field in the block is the magic number. */
6394
6395
0
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
6396
6397
/* Check the code unit width. */
6398
6399
0
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
6400
0
  return PCRE2_ERROR_BADMODE;
6401
6402
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
6403
options variable for this function. Users of PCRE2 who are not calling the
6404
function directly would like to have a way of setting these flags, in the same
6405
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
6406
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
6407
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
6408
transfer to the options for this function. The bits are guaranteed to be
6409
adjacent, but do not have the same values. This bit of Boolean trickery assumes
6410
that the match-time bits are not more significant than the flag bits. If by
6411
accident this is not the case, a compile-time division by zero error will
6412
occur. */
6413
6414
0
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
6415
0
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
6416
0
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
6417
0
#undef FF
6418
0
#undef OO
6419
6420
/* If the pattern was successfully studied with JIT support, we will run the
6421
JIT executable instead of the rest of this function. Most options must be set
6422
at compile time for the JIT code to be usable. */
6423
6424
#ifdef SUPPORT_JIT
6425
use_jit = (re->executable_jit != NULL &&
6426
          (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
6427
#endif
6428
6429
/* Initialize UTF/UCP parameters. */
6430
6431
0
#ifdef SUPPORT_UNICODE
6432
0
utf = (re->overall_options & PCRE2_UTF) != 0;
6433
0
allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
6434
0
ucp = (re->overall_options & PCRE2_UCP) != 0;
6435
0
#endif  /* SUPPORT_UNICODE */
6436
6437
/* Convert the partial matching flags into an integer. */
6438
6439
0
mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
6440
0
              ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
6441
6442
/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
6443
time. */
6444
6445
0
if (mb->partial != 0 &&
6446
0
   ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
6447
0
  return PCRE2_ERROR_BADOPTION;
6448
6449
/* It is an error to set an offset limit without setting the flag at compile
6450
time. */
6451
6452
0
if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
6453
0
     (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
6454
0
  return PCRE2_ERROR_BADOFFSETLIMIT;
6455
6456
/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
6457
free the memory that was obtained. Set the field to NULL for no match cases. */
6458
6459
0
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
6460
0
  {
6461
0
  match_data->memctl.free((void *)match_data->subject,
6462
0
    match_data->memctl.memory_data);
6463
0
  match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
6464
0
  }
6465
0
match_data->subject = NULL;
6466
6467
/* Zero the error offset in case the first code unit is invalid UTF. */
6468
6469
0
match_data->startchar = 0;
6470
6471
6472
/* ============================= JIT matching ============================== */
6473
6474
/* Prepare for JIT matching. Check a UTF string for validity unless no check is
6475
requested or invalid UTF can be handled. We check only the portion of the
6476
subject that might be be inspected during matching - from the offset minus the
6477
maximum lookbehind to the given length. This saves time when a small part of a
6478
large subject is being matched by the use of a starting offset. Note that the
6479
maximum lookbehind is a number of characters, not code units. */
6480
6481
#ifdef SUPPORT_JIT
6482
if (use_jit)
6483
  {
6484
#ifdef SUPPORT_UNICODE
6485
  if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
6486
    {
6487
#if PCRE2_CODE_UNIT_WIDTH != 32
6488
    unsigned int i;
6489
#endif
6490
6491
    /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6492
    character start. */
6493
6494
#if PCRE2_CODE_UNIT_WIDTH != 32
6495
    if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6496
      {
6497
      if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6498
#if PCRE2_CODE_UNIT_WIDTH == 8
6499
      return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6500
#else
6501
      return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6502
#endif
6503
      }
6504
#endif  /* WIDTH != 32 */
6505
6506
    /* Move back by the maximum lookbehind, just in case it happens at the very
6507
    start of matching. */
6508
6509
#if PCRE2_CODE_UNIT_WIDTH != 32
6510
    for (i = re->max_lookbehind; i > 0 && start_match > subject; i--)
6511
      {
6512
      start_match--;
6513
      while (start_match > subject &&
6514
#if PCRE2_CODE_UNIT_WIDTH == 8
6515
      (*start_match & 0xc0) == 0x80)
6516
#else  /* 16-bit */
6517
      (*start_match & 0xfc00) == 0xdc00)
6518
#endif
6519
        start_match--;
6520
      }
6521
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6522
6523
    /* In the 32-bit library, one code unit equals one character. However,
6524
    we cannot just subtract the lookbehind and then compare pointers, because
6525
    a very large lookbehind could create an invalid pointer. */
6526
6527
    if (start_offset >= re->max_lookbehind)
6528
      start_match -= re->max_lookbehind;
6529
    else
6530
      start_match = subject;
6531
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6532
6533
    /* Validate the relevant portion of the subject. Adjust the offset of an
6534
    invalid code point to be an absolute offset in the whole string. */
6535
6536
    match_data->rc = PRIV(valid_utf)(start_match,
6537
      length - (start_match - subject), &(match_data->startchar));
6538
    if (match_data->rc != 0)
6539
      {
6540
      match_data->startchar += start_match - subject;
6541
      return match_data->rc;
6542
      }
6543
    jit_checked_utf = TRUE;
6544
    }
6545
#endif  /* SUPPORT_UNICODE */
6546
6547
  /* If JIT returns BADOPTION, which means that the selected complete or
6548
  partial matching mode was not compiled, fall through to the interpreter. */
6549
6550
  rc = pcre2_jit_match(code, subject, length, start_offset, options,
6551
    match_data, mcontext);
6552
  if (rc != PCRE2_ERROR_JIT_BADOPTION)
6553
    {
6554
    if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
6555
      {
6556
      length = CU2BYTES(length + was_zero_terminated);
6557
      match_data->subject = match_data->memctl.malloc(length,
6558
        match_data->memctl.memory_data);
6559
      if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
6560
      memcpy((void *)match_data->subject, subject, length);
6561
      match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
6562
      }
6563
    return rc;
6564
    }
6565
  }
6566
#endif  /* SUPPORT_JIT */
6567
6568
/* ========================= End of JIT matching ========================== */
6569
6570
6571
/* Proceed with non-JIT matching. The default is to allow lookbehinds to the
6572
start of the subject. A UTF check when there is a non-zero offset may change
6573
this. */
6574
6575
0
mb->check_subject = subject;
6576
6577
/* If a UTF subject string was not checked for validity in the JIT code above,
6578
check it here, and handle support for invalid UTF strings. The check above
6579
happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
6580
If we get here in those circumstances, it means the subject string is valid,
6581
but for some reason JIT matching was not successful. There is no need to check
6582
the subject again.
6583
6584
We check only the portion of the subject that might be be inspected during
6585
matching - from the offset minus the maximum lookbehind to the given length.
6586
This saves time when a small part of a large subject is being matched by the
6587
use of a starting offset. Note that the maximum lookbehind is a number of
6588
characters, not code units.
6589
6590
Note also that support for invalid UTF forces a check, overriding the setting
6591
of PCRE2_NO_CHECK_UTF. */
6592
6593
0
#ifdef SUPPORT_UNICODE
6594
0
if (utf &&
6595
#ifdef SUPPORT_JIT
6596
    !jit_checked_utf &&
6597
#endif
6598
0
    ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
6599
0
  {
6600
0
#if PCRE2_CODE_UNIT_WIDTH != 32
6601
0
  BOOL skipped_bad_start = FALSE;
6602
0
#endif
6603
6604
  /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
6605
  character start. If we are handling invalid UTF, just skip over such code
6606
  units. Otherwise, give an appropriate error. */
6607
6608
0
#if PCRE2_CODE_UNIT_WIDTH != 32
6609
0
  if (allow_invalid)
6610
0
    {
6611
0
    while (start_match < end_subject && NOT_FIRSTCU(*start_match))
6612
0
      {
6613
0
      start_match++;
6614
0
      skipped_bad_start = TRUE;
6615
0
      }
6616
0
    }
6617
0
  else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
6618
0
    {
6619
0
    if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
6620
0
#if PCRE2_CODE_UNIT_WIDTH == 8
6621
0
    return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
6622
#else
6623
    return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
6624
#endif
6625
0
    }
6626
0
#endif  /* WIDTH != 32 */
6627
6628
  /* The mb->check_subject field points to the start of UTF checking;
6629
  lookbehinds can go back no further than this. */
6630
6631
0
  mb->check_subject = start_match;
6632
6633
  /* Move back by the maximum lookbehind, just in case it happens at the very
6634
  start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
6635
  units above. */
6636
6637
0
#if PCRE2_CODE_UNIT_WIDTH != 32
6638
0
  if (!skipped_bad_start)
6639
0
    {
6640
0
    unsigned int i;
6641
0
    for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
6642
0
      {
6643
0
      mb->check_subject--;
6644
0
      while (mb->check_subject > subject &&
6645
0
#if PCRE2_CODE_UNIT_WIDTH == 8
6646
0
      (*mb->check_subject & 0xc0) == 0x80)
6647
#else  /* 16-bit */
6648
      (*mb->check_subject & 0xfc00) == 0xdc00)
6649
#endif
6650
0
        mb->check_subject--;
6651
0
      }
6652
0
    }
6653
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6654
6655
  /* In the 32-bit library, one code unit equals one character. However,
6656
  we cannot just subtract the lookbehind and then compare pointers, because
6657
  a very large lookbehind could create an invalid pointer. */
6658
6659
  if (start_offset >= re->max_lookbehind)
6660
    mb->check_subject -= re->max_lookbehind;
6661
  else
6662
    mb->check_subject = subject;
6663
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
6664
6665
  /* Validate the relevant portion of the subject. There's a loop in case we
6666
  encounter bad UTF in the characters preceding start_match which we are
6667
  scanning because of a lookbehind. */
6668
6669
0
  for (;;)
6670
0
    {
6671
0
    match_data->rc = PRIV(valid_utf)(mb->check_subject,
6672
0
      length - (mb->check_subject - subject), &(match_data->startchar));
6673
6674
0
    if (match_data->rc == 0) break;   /* Valid UTF string */
6675
6676
    /* Invalid UTF string. Adjust the offset to be an absolute offset in the
6677
    whole string. If we are handling invalid UTF strings, set end_subject to
6678
    stop before the bad code unit, and set the options to "not end of line".
6679
    Otherwise return the error. */
6680
6681
0
    match_data->startchar += mb->check_subject - subject;
6682
0
    if (!allow_invalid || match_data->rc > 0) return match_data->rc;
6683
0
    end_subject = subject + match_data->startchar;
6684
6685
    /* If the end precedes start_match, it means there is invalid UTF in the
6686
    extra code units we reversed over because of a lookbehind. Advance past the
6687
    first bad code unit, and then skip invalid character starting code units in
6688
    8-bit and 16-bit modes, and try again with the original end point. */
6689
6690
0
    if (end_subject < start_match)
6691
0
      {
6692
0
      mb->check_subject = end_subject + 1;
6693
0
#if PCRE2_CODE_UNIT_WIDTH != 32
6694
0
      while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
6695
0
        mb->check_subject++;
6696
0
#endif
6697
0
      end_subject = true_end_subject;
6698
0
      }
6699
6700
    /* Otherwise, set the not end of line option, and do the match. */
6701
6702
0
    else
6703
0
      {
6704
0
      fragment_options = PCRE2_NOTEOL;
6705
0
      break;
6706
0
      }
6707
0
    }
6708
0
  }
6709
0
#endif  /* SUPPORT_UNICODE */
6710
6711
/* A NULL match context means "use a default context", but we take the memory
6712
control functions from the pattern. */
6713
6714
0
if (mcontext == NULL)
6715
0
  {
6716
0
  mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
6717
0
  mb->memctl = re->memctl;
6718
0
  }
6719
0
else mb->memctl = mcontext->memctl;
6720
6721
0
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
6722
0
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
6723
0
startline = (re->flags & PCRE2_STARTLINE) != 0;
6724
0
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
6725
0
  true_end_subject : subject + mcontext->offset_limit;
6726
6727
/* Initialize and set up the fixed fields in the callout block, with a pointer
6728
in the match block. */
6729
6730
0
mb->cb = &cb;
6731
0
cb.version = 2;
6732
0
cb.subject = subject;
6733
0
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
6734
0
cb.callout_flags = 0;
6735
6736
/* Fill in the remaining fields in the match block, except for moptions, which
6737
gets set later. */
6738
6739
0
mb->callout = mcontext->callout;
6740
0
mb->callout_data = mcontext->callout_data;
6741
6742
0
mb->start_subject = subject;
6743
0
mb->start_offset = start_offset;
6744
0
mb->end_subject = end_subject;
6745
0
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
6746
0
mb->allowemptypartial = (re->max_lookbehind > 0) ||
6747
0
    (re->flags & PCRE2_MATCH_EMPTY) != 0;
6748
0
mb->poptions = re->overall_options;          /* Pattern options */
6749
0
mb->ignore_skip_arg = 0;
6750
0
mb->mark = mb->nomatch_mark = NULL;          /* In case never set */
6751
6752
/* The name table is needed for finding all the numbers associated with a
6753
given name, for condition testing. The code follows the name table. */
6754
6755
0
mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
6756
0
mb->name_count = re->name_count;
6757
0
mb->name_entry_size = re->name_entry_size;
6758
0
mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
6759
6760
/* Process the \R and newline settings. */
6761
6762
0
mb->bsr_convention = re->bsr_convention;
6763
0
mb->nltype = NLTYPE_FIXED;
6764
0
switch(re->newline_convention)
6765
0
  {
6766
0
  case PCRE2_NEWLINE_CR:
6767
0
  mb->nllen = 1;
6768
0
  mb->nl[0] = CHAR_CR;
6769
0
  break;
6770
6771
0
  case PCRE2_NEWLINE_LF:
6772
0
  mb->nllen = 1;
6773
0
  mb->nl[0] = CHAR_NL;
6774
0
  break;
6775
6776
0
  case PCRE2_NEWLINE_NUL:
6777
0
  mb->nllen = 1;
6778
0
  mb->nl[0] = CHAR_NUL;
6779
0
  break;
6780
6781
0
  case PCRE2_NEWLINE_CRLF:
6782
0
  mb->nllen = 2;
6783
0
  mb->nl[0] = CHAR_CR;
6784
0
  mb->nl[1] = CHAR_NL;
6785
0
  break;
6786
6787
0
  case PCRE2_NEWLINE_ANY:
6788
0
  mb->nltype = NLTYPE_ANY;
6789
0
  break;
6790
6791
0
  case PCRE2_NEWLINE_ANYCRLF:
6792
0
  mb->nltype = NLTYPE_ANYCRLF;
6793
0
  break;
6794
6795
0
  default: return PCRE2_ERROR_INTERNAL;
6796
0
  }
6797
6798
/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
6799
vector at the end, whose size depends on the number of capturing parentheses in
6800
the pattern. It is not used at all if there are no capturing parentheses.
6801
6802
  frame_size                   is the total size of each frame
6803
  match_data->heapframes       is the pointer to the frames vector
6804
  match_data->heapframes_size  is the total size of the vector
6805
6806
We must pad the frame_size for alignment to ensure subsequent frames are as
6807
aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
6808
array, that does not guarantee it is suitably aligned for pointers, as some
6809
architectures have pointers that are larger than a size_t. */
6810
6811
0
frame_size = (offsetof(heapframe, ovector) +
6812
0
  re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) &
6813
0
  ~(HEAPFRAME_ALIGNMENT - 1);
6814
6815
/* Limits set in the pattern override the match context only if they are
6816
smaller. */
6817
6818
0
mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)?
6819
0
  mcontext->heap_limit : re->limit_heap) * 1024;
6820
6821
0
mb->match_limit = (mcontext->match_limit < re->limit_match)?
6822
0
  mcontext->match_limit : re->limit_match;
6823
6824
0
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
6825
0
  mcontext->depth_limit : re->limit_depth;
6826
6827
/* If a pattern has very many capturing parentheses, the frame size may be very
6828
large. Set the initial frame vector size to ensure that there are at least 10
6829
available frames, but enforce a minimum of START_FRAMES_SIZE. If this is
6830
greater than the heap limit, get as large a vector as possible. Always round
6831
the size to a multiple of the frame size. */
6832
6833
0
heapframes_size = frame_size * 10;
6834
0
if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE;
6835
0
if (heapframes_size > mb->heap_limit)
6836
0
  {
6837
0
  if (frame_size > mb->heap_limit ) return PCRE2_ERROR_HEAPLIMIT;
6838
0
  heapframes_size = mb->heap_limit;
6839
0
  }
6840
6841
/* If an existing frame vector in the match_data block is large enough, we can
6842
use it.Otherwise, free any pre-existing vector and get a new one. */
6843
6844
0
if (match_data->heapframes_size < heapframes_size)
6845
0
  {
6846
0
  match_data->memctl.free(match_data->heapframes,
6847
0
    match_data->memctl.memory_data);
6848
0
  match_data->heapframes = match_data->memctl.malloc(heapframes_size,
6849
0
    match_data->memctl.memory_data);
6850
0
  if (match_data->heapframes == NULL)
6851
0
    {
6852
0
    match_data->heapframes_size = 0;
6853
0
    return PCRE2_ERROR_NOMEMORY;
6854
0
    }
6855
0
  match_data->heapframes_size = heapframes_size;
6856
0
  }
6857
6858
/* Write to the ovector within the first frame to mark every capture unset and
6859
to avoid uninitialized memory read errors when it is copied to a new frame. */
6860
6861
0
memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff,
6862
0
  frame_size - offsetof(heapframe, ovector));
6863
6864
/* Pointers to the individual character tables */
6865
6866
0
mb->lcc = re->tables + lcc_offset;
6867
0
mb->fcc = re->tables + fcc_offset;
6868
0
mb->ctypes = re->tables + ctypes_offset;
6869
6870
/* Set up the first code unit to match, if available. If there's no first code
6871
unit there may be a bitmap of possible first characters. */
6872
6873
0
if ((re->flags & PCRE2_FIRSTSET) != 0)
6874
0
  {
6875
0
  has_first_cu = TRUE;
6876
0
  first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
6877
0
  if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
6878
0
    {
6879
0
    first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
6880
0
#ifdef SUPPORT_UNICODE
6881
0
#if PCRE2_CODE_UNIT_WIDTH == 8
6882
0
    if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
6883
#else
6884
    if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
6885
#endif
6886
0
#endif  /* SUPPORT_UNICODE */
6887
0
    }
6888
0
  }
6889
0
else
6890
0
  if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
6891
0
    start_bits = re->start_bitmap;
6892
6893
/* There may also be a "last known required character" set. */
6894
6895
0
if ((re->flags & PCRE2_LASTSET) != 0)
6896
0
  {
6897
0
  has_req_cu = TRUE;
6898
0
  req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
6899
0
  if ((re->flags & PCRE2_LASTCASELESS) != 0)
6900
0
    {
6901
0
    req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
6902
0
#ifdef SUPPORT_UNICODE
6903
0
#if PCRE2_CODE_UNIT_WIDTH == 8
6904
0
    if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
6905
#else
6906
    if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
6907
#endif
6908
0
#endif  /* SUPPORT_UNICODE */
6909
0
    }
6910
0
  }
6911
6912
6913
/* ==========================================================================*/
6914
6915
/* Loop for handling unanchored repeated matching attempts; for anchored regexs
6916
the loop runs just once. */
6917
6918
0
#ifdef SUPPORT_UNICODE
6919
0
FRAGMENT_RESTART:
6920
0
#endif
6921
6922
0
start_partial = match_partial = NULL;
6923
0
mb->hitend = FALSE;
6924
6925
0
#if PCRE2_CODE_UNIT_WIDTH == 8
6926
0
memchr_found_first_cu = NULL;
6927
0
memchr_found_first_cu2 = NULL;
6928
0
#endif
6929
6930
0
for(;;)
6931
0
  {
6932
0
  PCRE2_SPTR new_start_match;
6933
6934
  /* ----------------- Start of match optimizations ---------------- */
6935
6936
  /* There are some optimizations that avoid running the match if a known
6937
  starting point is not found, or if a known later code unit is not present.
6938
  However, there is an option (settable at compile time) that disables these,
6939
  for testing and for ensuring that all callouts do actually occur. */
6940
6941
0
  if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
6942
0
    {
6943
    /* If firstline is TRUE, the start of the match is constrained to the first
6944
    line of a multiline string. That is, the match must be before or at the
6945
    first newline following the start of matching. Temporarily adjust
6946
    end_subject so that we stop the scans for a first code unit at a newline.
6947
    If the match fails at the newline, later code breaks the loop. */
6948
6949
0
    if (firstline)
6950
0
      {
6951
0
      PCRE2_SPTR t = start_match;
6952
0
#ifdef SUPPORT_UNICODE
6953
0
      if (utf)
6954
0
        {
6955
0
        while (t < end_subject && !IS_NEWLINE(t))
6956
0
          {
6957
0
          t++;
6958
0
          ACROSSCHAR(t < end_subject, t, t++);
6959
0
          }
6960
0
        }
6961
0
      else
6962
0
#endif
6963
0
      while (t < end_subject && !IS_NEWLINE(t)) t++;
6964
0
      end_subject = t;
6965
0
      }
6966
6967
    /* Anchored: check the first code unit if one is recorded. This may seem
6968
    pointless but it can help in detecting a no match case without scanning for
6969
    the required code unit. */
6970
6971
0
    if (anchored)
6972
0
      {
6973
0
      if (has_first_cu || start_bits != NULL)
6974
0
        {
6975
0
        BOOL ok = start_match < end_subject;
6976
0
        if (ok)
6977
0
          {
6978
0
          PCRE2_UCHAR c = UCHAR21TEST(start_match);
6979
0
          ok = has_first_cu && (c == first_cu || c == first_cu2);
6980
0
          if (!ok && start_bits != NULL)
6981
0
            {
6982
#if PCRE2_CODE_UNIT_WIDTH != 8
6983
            if (c > 255) c = 255;
6984
#endif
6985
0
            ok = (start_bits[c/8] & (1u << (c&7))) != 0;
6986
0
            }
6987
0
          }
6988
0
        if (!ok)
6989
0
          {
6990
0
          rc = MATCH_NOMATCH;
6991
0
          break;
6992
0
          }
6993
0
        }
6994
0
      }
6995
6996
    /* Not anchored. Advance to a unique first code unit if there is one. */
6997
6998
0
    else
6999
0
      {
7000
0
      if (has_first_cu)
7001
0
        {
7002
0
        if (first_cu != first_cu2)  /* Caseless */
7003
0
          {
7004
          /* In 16-bit and 32_bit modes we have to do our own search, so can
7005
          look for both cases at once. */
7006
7007
#if PCRE2_CODE_UNIT_WIDTH != 8
7008
          PCRE2_UCHAR smc;
7009
          while (start_match < end_subject &&
7010
                (smc = UCHAR21TEST(start_match)) != first_cu &&
7011
                 smc != first_cu2)
7012
            start_match++;
7013
#else
7014
          /* In 8-bit mode, the use of memchr() gives a big speed up, even
7015
          though we have to call it twice in order to find the earliest
7016
          occurrence of the code unit in either of its cases. Caching is used
7017
          to remember the positions of previously found code units. This can
7018
          make a huge difference when the strings are very long and only one
7019
          case is actually present. */
7020
7021
0
          PCRE2_SPTR pp1 = NULL;
7022
0
          PCRE2_SPTR pp2 = NULL;
7023
0
          PCRE2_SIZE searchlength = end_subject - start_match;
7024
7025
          /* If we haven't got a previously found position for first_cu, or if
7026
          the current starting position is later, we need to do a search. If
7027
          the code unit is not found, set it to the end. */
7028
7029
0
          if (memchr_found_first_cu == NULL ||
7030
0
              start_match > memchr_found_first_cu)
7031
0
            {
7032
0
            pp1 = memchr(start_match, first_cu, searchlength);
7033
0
            memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
7034
0
            }
7035
7036
          /* If the start is before a previously found position, use the
7037
          previous position, or NULL if a previous search failed. */
7038
7039
0
          else pp1 = (memchr_found_first_cu == end_subject)? NULL :
7040
0
            memchr_found_first_cu;
7041
7042
          /* Do the same thing for the other case. */
7043
7044
0
          if (memchr_found_first_cu2 == NULL ||
7045
0
              start_match > memchr_found_first_cu2)
7046
0
            {
7047
0
            pp2 = memchr(start_match, first_cu2, searchlength);
7048
0
            memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
7049
0
            }
7050
7051
0
          else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
7052
0
            memchr_found_first_cu2;
7053
7054
          /* Set the start to the end of the subject if neither case was found.
7055
          Otherwise, use the earlier found point. */
7056
7057
0
          if (pp1 == NULL)
7058
0
            start_match = (pp2 == NULL)? end_subject : pp2;
7059
0
          else
7060
0
            start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
7061
7062
0
#endif  /* 8-bit handling */
7063
0
          }
7064
7065
        /* The caseful case is much simpler. */
7066
7067
0
        else
7068
0
          {
7069
#if PCRE2_CODE_UNIT_WIDTH != 8
7070
          while (start_match < end_subject && UCHAR21TEST(start_match) !=
7071
                 first_cu)
7072
            start_match++;
7073
#else
7074
0
          start_match = memchr(start_match, first_cu, end_subject - start_match);
7075
0
          if (start_match == NULL) start_match = end_subject;
7076
0
#endif
7077
0
          }
7078
7079
        /* If we can't find the required first code unit, having reached the
7080
        true end of the subject, break the bumpalong loop, to force a match
7081
        failure, except when doing partial matching, when we let the next cycle
7082
        run at the end of the subject. To see why, consider the pattern
7083
        /(?<=abc)def/, which partially matches "abc", even though the string
7084
        does not contain the starting character "d". If we have not reached the
7085
        true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
7086
        temporarily modified) we also let the cycle run, because the matching
7087
        string is legitimately allowed to start with the first code unit of a
7088
        newline. */
7089
7090
0
        if (mb->partial == 0 && start_match >= mb->end_subject)
7091
0
          {
7092
0
          rc = MATCH_NOMATCH;
7093
0
          break;
7094
0
          }
7095
0
        }
7096
7097
      /* If there's no first code unit, advance to just after a linebreak for a
7098
      multiline match if required. */
7099
7100
0
      else if (startline)
7101
0
        {
7102
0
        if (start_match > mb->start_subject + start_offset)
7103
0
          {
7104
0
#ifdef SUPPORT_UNICODE
7105
0
          if (utf)
7106
0
            {
7107
0
            while (start_match < end_subject && !WAS_NEWLINE(start_match))
7108
0
              {
7109
0
              start_match++;
7110
0
              ACROSSCHAR(start_match < end_subject, start_match, start_match++);
7111
0
              }
7112
0
            }
7113
0
          else
7114
0
#endif
7115
0
          while (start_match < end_subject && !WAS_NEWLINE(start_match))
7116
0
            start_match++;
7117
7118
          /* If we have just passed a CR and the newline option is ANY or
7119
          ANYCRLF, and we are now at a LF, advance the match position by one
7120
          more code unit. */
7121
7122
0
          if (start_match[-1] == CHAR_CR &&
7123
0
               (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
7124
0
               start_match < end_subject &&
7125
0
               UCHAR21TEST(start_match) == CHAR_NL)
7126
0
            start_match++;
7127
0
          }
7128
0
        }
7129
7130
      /* If there's no first code unit or a requirement for a multiline line
7131
      start, advance to a non-unique first code unit if any have been
7132
      identified. The bitmap contains only 256 bits. When code units are 16 or
7133
      32 bits wide, all code units greater than 254 set the 255 bit. */
7134
7135
0
      else if (start_bits != NULL)
7136
0
        {
7137
0
        while (start_match < end_subject)
7138
0
          {
7139
0
          uint32_t c = UCHAR21TEST(start_match);
7140
#if PCRE2_CODE_UNIT_WIDTH != 8
7141
          if (c > 255) c = 255;
7142
#endif
7143
0
          if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
7144
0
          start_match++;
7145
0
          }
7146
7147
        /* See comment above in first_cu checking about the next few lines. */
7148
7149
0
        if (mb->partial == 0 && start_match >= mb->end_subject)
7150
0
          {
7151
0
          rc = MATCH_NOMATCH;
7152
0
          break;
7153
0
          }
7154
0
        }
7155
0
      }   /* End first code unit handling */
7156
7157
    /* Restore fudged end_subject */
7158
7159
0
    end_subject = mb->end_subject;
7160
7161
    /* The following two optimizations must be disabled for partial matching. */
7162
7163
0
    if (mb->partial == 0)
7164
0
      {
7165
0
      PCRE2_SPTR p;
7166
7167
      /* The minimum matching length is a lower bound; no string of that length
7168
      may actually match the pattern. Although the value is, strictly, in
7169
      characters, we treat it as code units to avoid spending too much time in
7170
      this optimization. */
7171
7172
0
      if (end_subject - start_match < re->minlength)
7173
0
        {
7174
0
        rc = MATCH_NOMATCH;
7175
0
        break;
7176
0
        }
7177
7178
      /* If req_cu is set, we know that that code unit must appear in the
7179
      subject for the (non-partial) match to succeed. If the first code unit is
7180
      set, req_cu must be later in the subject; otherwise the test starts at
7181
      the match point. This optimization can save a huge amount of backtracking
7182
      in patterns with nested unlimited repeats that aren't going to match.
7183
      Writing separate code for caseful/caseless versions makes it go faster,
7184
      as does using an autoincrement and backing off on a match. As in the case
7185
      of the first code unit, using memchr() in the 8-bit library gives a big
7186
      speed up. Unlike the first_cu check above, we do not need to call
7187
      memchr() twice in the caseless case because we only need to check for the
7188
      presence of the character in either case, not find the first occurrence.
7189
7190
      The search can be skipped if the code unit was found later than the
7191
      current starting point in a previous iteration of the bumpalong loop.
7192
7193
      HOWEVER: when the subject string is very, very long, searching to its end
7194
      can take a long time, and give bad performance on quite ordinary
7195
      anchored patterns. This showed up when somebody was matching something
7196
      like /^\d+C/ on a 32-megabyte string... so we don't do this when the
7197
      string is sufficiently long, but it's worth searching a lot more for
7198
      unanchored patterns. */
7199
7200
0
      p = start_match + (has_first_cu? 1:0);
7201
0
      if (has_req_cu && p > req_cu_ptr)
7202
0
        {
7203
0
        PCRE2_SIZE check_length = end_subject - start_match;
7204
7205
0
        if (check_length < REQ_CU_MAX ||
7206
0
              (!anchored && check_length < REQ_CU_MAX * 1000))
7207
0
          {
7208
0
          if (req_cu != req_cu2)  /* Caseless */
7209
0
            {
7210
#if PCRE2_CODE_UNIT_WIDTH != 8
7211
            while (p < end_subject)
7212
              {
7213
              uint32_t pp = UCHAR21INCTEST(p);
7214
              if (pp == req_cu || pp == req_cu2) { p--; break; }
7215
              }
7216
#else  /* 8-bit code units */
7217
0
            PCRE2_SPTR pp = p;
7218
0
            p = memchr(pp, req_cu, end_subject - pp);
7219
0
            if (p == NULL)
7220
0
              {
7221
0
              p = memchr(pp, req_cu2, end_subject - pp);
7222
0
              if (p == NULL) p = end_subject;
7223
0
              }
7224
0
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
7225
0
            }
7226
7227
          /* The caseful case */
7228
7229
0
          else
7230
0
            {
7231
#if PCRE2_CODE_UNIT_WIDTH != 8
7232
            while (p < end_subject)
7233
              {
7234
              if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
7235
              }
7236
7237
#else  /* 8-bit code units */
7238
0
            p = memchr(p, req_cu, end_subject - p);
7239
0
            if (p == NULL) p = end_subject;
7240
0
#endif
7241
0
            }
7242
7243
          /* If we can't find the required code unit, break the bumpalong loop,
7244
          forcing a match failure. */
7245
7246
0
          if (p >= end_subject)
7247
0
            {
7248
0
            rc = MATCH_NOMATCH;
7249
0
            break;
7250
0
            }
7251
7252
          /* If we have found the required code unit, save the point where we
7253
          found it, so that we don't search again next time round the bumpalong
7254
          loop if the start hasn't yet passed this code unit. */
7255
7256
0
          req_cu_ptr = p;
7257
0
          }
7258
0
        }
7259
0
      }
7260
0
    }
7261
7262
  /* ------------ End of start of match optimizations ------------ */
7263
7264
  /* Give no match if we have passed the bumpalong limit. */
7265
7266
0
  if (start_match > bumpalong_limit)
7267
0
    {
7268
0
    rc = MATCH_NOMATCH;
7269
0
    break;
7270
0
    }
7271
7272
  /* OK, we can now run the match. If "hitend" is set afterwards, remember the
7273
  first starting point for which a partial match was found. */
7274
7275
0
  cb.start_match = (PCRE2_SIZE)(start_match - subject);
7276
0
  cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
7277
7278
0
  mb->start_used_ptr = start_match;
7279
0
  mb->last_used_ptr = start_match;
7280
0
#ifdef SUPPORT_UNICODE
7281
0
  mb->moptions = options | fragment_options;
7282
#else
7283
  mb->moptions = options;
7284
#endif
7285
0
  mb->match_call_count = 0;
7286
0
  mb->end_offset_top = 0;
7287
0
  mb->skip_arg_count = 0;
7288
7289
0
  rc = match(start_match, mb->start_code, re->top_bracket, frame_size,
7290
0
    match_data, mb);
7291
7292
0
  if (mb->hitend && start_partial == NULL)
7293
0
    {
7294
0
    start_partial = mb->start_used_ptr;
7295
0
    match_partial = start_match;
7296
0
    }
7297
7298
0
  switch(rc)
7299
0
    {
7300
    /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7301
    the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7302
    entirely. The only way we can do that is to re-do the match at the same
7303
    point, with a flag to force SKIP with an argument to be ignored. Just
7304
    treating this case as NOMATCH does not work because it does not check other
7305
    alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7306
7307
0
    case MATCH_SKIP_ARG:
7308
0
    new_start_match = start_match;
7309
0
    mb->ignore_skip_arg = mb->skip_arg_count;
7310
0
    break;
7311
7312
    /* SKIP passes back the next starting point explicitly, but if it is no
7313
    greater than the match we have just done, treat it as NOMATCH. */
7314
7315
0
    case MATCH_SKIP:
7316
0
    if (mb->verb_skip_ptr > start_match)
7317
0
      {
7318
0
      new_start_match = mb->verb_skip_ptr;
7319
0
      break;
7320
0
      }
7321
    /* Fall through */
7322
7323
    /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7324
    exactly like PRUNE. Unset ignore SKIP-with-argument. */
7325
7326
0
    case MATCH_NOMATCH:
7327
0
    case MATCH_PRUNE:
7328
0
    case MATCH_THEN:
7329
0
    mb->ignore_skip_arg = 0;
7330
0
    new_start_match = start_match + 1;
7331
0
#ifdef SUPPORT_UNICODE
7332
0
    if (utf)
7333
0
      ACROSSCHAR(new_start_match < end_subject, new_start_match,
7334
0
        new_start_match++);
7335
0
#endif
7336
0
    break;
7337
7338
    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
7339
7340
0
    case MATCH_COMMIT:
7341
0
    rc = MATCH_NOMATCH;
7342
0
    goto ENDLOOP;
7343
7344
    /* Any other return is either a match, or some kind of error. */
7345
7346
0
    default:
7347
0
    goto ENDLOOP;
7348
0
    }
7349
7350
  /* Control reaches here for the various types of "no match at this point"
7351
  result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7352
7353
0
  rc = MATCH_NOMATCH;
7354
7355
  /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
7356
  newline in the subject (though it may continue over the newline). Therefore,
7357
  if we have just failed to match, starting at a newline, do not continue. */
7358
7359
0
  if (firstline && IS_NEWLINE(start_match)) break;
7360
7361
  /* Advance to new matching position */
7362
7363
0
  start_match = new_start_match;
7364
7365
  /* Break the loop if the pattern is anchored or if we have passed the end of
7366
  the subject. */
7367
7368
0
  if (anchored || start_match > end_subject) break;
7369
7370
  /* If we have just passed a CR and we are now at a LF, and the pattern does
7371
  not contain any explicit matches for \r or \n, and the newline option is CRLF
7372
  or ANY or ANYCRLF, advance the match position by one more code unit. In
7373
  normal matching start_match will aways be greater than the first position at
7374
  this stage, but a failed *SKIP can cause a return at the same point, which is
7375
  why the first test exists. */
7376
7377
0
  if (start_match > subject + start_offset &&
7378
0
      start_match[-1] == CHAR_CR &&
7379
0
      start_match < end_subject &&
7380
0
      *start_match == CHAR_NL &&
7381
0
      (re->flags & PCRE2_HASCRORLF) == 0 &&
7382
0
        (mb->nltype == NLTYPE_ANY ||
7383
0
         mb->nltype == NLTYPE_ANYCRLF ||
7384
0
         mb->nllen == 2))
7385
0
    start_match++;
7386
7387
0
  mb->mark = NULL;   /* Reset for start of next match attempt */
7388
0
  }                  /* End of for(;;) "bumpalong" loop */
7389
7390
/* ==========================================================================*/
7391
7392
/* When we reach here, one of the following stopping conditions is true:
7393
7394
(1) The match succeeded, either completely, or partially;
7395
7396
(2) The pattern is anchored or the match was failed after (*COMMIT);
7397
7398
(3) We are past the end of the subject or the bumpalong limit;
7399
7400
(4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
7401
    this option requests that a match occur at or before the first newline in
7402
    the subject.
7403
7404
(5) Some kind of error occurred.
7405
7406
*/
7407
7408
0
ENDLOOP:
7409
7410
/* If end_subject != true_end_subject, it means we are handling invalid UTF,
7411
and have just processed a non-terminal fragment. If this resulted in no match
7412
or a partial match we must carry on to the next fragment (a partial match is
7413
returned to the caller only at the very end of the subject). A loop is used to
7414
avoid trying to match against empty fragments; if the pattern can match an
7415
empty string it would have done so already. */
7416
7417
0
#ifdef SUPPORT_UNICODE
7418
0
if (utf && end_subject != true_end_subject &&
7419
0
    (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
7420
0
  {
7421
0
  for (;;)
7422
0
    {
7423
    /* Advance past the first bad code unit, and then skip invalid character
7424
    starting code units in 8-bit and 16-bit modes. */
7425
7426
0
    start_match = end_subject + 1;
7427
7428
0
#if PCRE2_CODE_UNIT_WIDTH != 32
7429
0
    while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
7430
0
      start_match++;
7431
0
#endif
7432
7433
    /* If we have hit the end of the subject, there isn't another non-empty
7434
    fragment, so give up. */
7435
7436
0
    if (start_match >= true_end_subject)
7437
0
      {
7438
0
      rc = MATCH_NOMATCH;  /* In case it was partial */
7439
0
      break;
7440
0
      }
7441
7442
    /* Check the rest of the subject */
7443
7444
0
    mb->check_subject = start_match;
7445
0
    rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
7446
0
      &(match_data->startchar));
7447
7448
    /* The rest of the subject is valid UTF. */
7449
7450
0
    if (rc == 0)
7451
0
      {
7452
0
      mb->end_subject = end_subject = true_end_subject;
7453
0
      fragment_options = PCRE2_NOTBOL;
7454
0
      goto FRAGMENT_RESTART;
7455
0
      }
7456
7457
    /* A subsequent UTF error has been found; if the next fragment is
7458
    non-empty, set up to process it. Otherwise, let the loop advance. */
7459
7460
0
    else if (rc < 0)
7461
0
      {
7462
0
      mb->end_subject = end_subject = start_match + match_data->startchar;
7463
0
      if (end_subject > start_match)
7464
0
        {
7465
0
        fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
7466
0
        goto FRAGMENT_RESTART;
7467
0
        }
7468
0
      }
7469
0
    }
7470
0
  }
7471
0
#endif  /* SUPPORT_UNICODE */
7472
7473
/* Fill in fields that are always returned in the match data. */
7474
7475
0
match_data->code = re;
7476
0
match_data->mark = mb->mark;
7477
0
match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
7478
7479
/* Handle a fully successful match. Set the return code to the number of
7480
captured strings, or 0 if there were too many to fit into the ovector, and then
7481
set the remaining returned values before returning. Make a copy of the subject
7482
string if requested. */
7483
7484
0
if (rc == MATCH_MATCH)
7485
0
  {
7486
0
  match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
7487
0
    0 : (int)mb->end_offset_top/2 + 1;
7488
0
  match_data->startchar = start_match - subject;
7489
0
  match_data->leftchar = mb->start_used_ptr - subject;
7490
0
  match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
7491
0
    mb->last_used_ptr : mb->end_match_ptr) - subject;
7492
0
  if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
7493
0
    {
7494
0
    length = CU2BYTES(length + was_zero_terminated);
7495
0
    match_data->subject = match_data->memctl.malloc(length,
7496
0
      match_data->memctl.memory_data);
7497
0
    if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
7498
0
    memcpy((void *)match_data->subject, subject, length);
7499
0
    match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
7500
0
    }
7501
0
  else match_data->subject = subject;
7502
0
  return match_data->rc;
7503
0
  }
7504
7505
/* Control gets here if there has been a partial match, an error, or if the
7506
overall match attempt has failed at all permitted starting positions. Any mark
7507
data is in the nomatch_mark field. */
7508
7509
0
match_data->mark = mb->nomatch_mark;
7510
7511
/* For anything other than nomatch or partial match, just return the code. */
7512
7513
0
if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
7514
7515
/* Handle a partial match. If a "soft" partial match was requested, searching
7516
for a complete match will have continued, and the value of rc at this point
7517
will be MATCH_NOMATCH. For a "hard" partial match, it will already be
7518
PCRE2_ERROR_PARTIAL. */
7519
7520
0
else if (match_partial != NULL)
7521
0
  {
7522
0
  match_data->subject = subject;
7523
0
  match_data->ovector[0] = match_partial - subject;
7524
0
  match_data->ovector[1] = end_subject - subject;
7525
0
  match_data->startchar = match_partial - subject;
7526
0
  match_data->leftchar = start_partial - subject;
7527
0
  match_data->rightchar = end_subject - subject;
7528
0
  match_data->rc = PCRE2_ERROR_PARTIAL;
7529
0
  }
7530
7531
/* Else this is the classic nomatch case. */
7532
7533
0
else match_data->rc = PCRE2_ERROR_NOMATCH;
7534
7535
0
return match_data->rc;
7536
0
}
7537
7538
/* These #undefs are here to enable unity builds with CMake. */
7539
7540
#undef NLBLOCK /* Block containing newline information */
7541
#undef PSSTART /* Field containing processed string start */
7542
#undef PSEND   /* Field containing processed string end */
7543
7544
/* End of pcre2_match.c */