Coverage Report

Created: 2024-02-06 06:20

/src/libgit2/deps/pcre/pcre_exec.c
Line
Count
Source (jump to first uncovered line)
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
           Copyright (c) 1997-2021 University of Cambridge
10
11
-----------------------------------------------------------------------------
12
Redistribution and use in source and binary forms, with or without
13
modification, are permitted provided that the following conditions are met:
14
15
    * Redistributions of source code must retain the above copyright notice,
16
      this list of conditions and the following disclaimer.
17
18
    * Redistributions in binary form must reproduce the above copyright
19
      notice, this list of conditions and the following disclaimer in the
20
      documentation and/or other materials provided with the distribution.
21
22
    * Neither the name of the University of Cambridge nor the names of its
23
      contributors may be used to endorse or promote products derived from
24
      this software without specific prior written permission.
25
26
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36
POSSIBILITY OF SUCH DAMAGE.
37
-----------------------------------------------------------------------------
38
*/
39
40
/* This module contains pcre_exec(), the externally visible function that does
41
pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42
possible. There are also some static supporting functions. */
43
44
#ifdef HAVE_CONFIG_H
45
#include "config.h"
46
#endif
47
48
0
#define NLBLOCK md             /* Block containing newline information */
49
0
#define PSSTART start_subject  /* Field containing processed string start */
50
0
#define PSEND   end_subject    /* Field containing processed string end */
51
52
#include "pcre_internal.h"
53
54
/* Undefine some potentially clashing cpp symbols */
55
56
#undef min
57
#undef max
58
59
/* The md->capture_last field uses the lower 16 bits for the last captured
60
substring (which can never be greater than 65535) and a bit in the top half
61
to mean "capture vector overflowed". This odd way of doing things was
62
implemented when it was realized that preserving and restoring the overflow bit
63
whenever the last capture number was saved/restored made for a neater
64
interface, and doing it this way saved on (a) another variable, which would
65
have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66
separate set of save/restore instructions. The following defines are used in
67
implementing this. */
68
69
0
#define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
70
0
#define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
71
0
#define OVFLBIT     0x00010000    /* The bit that is set for overflow */
72
73
/* Values for setting in md->match_function_type to indicate two special types
74
of call to match(). We do it this way to save on using another stack variable,
75
as stack usage is to be discouraged. */
76
77
0
#define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
78
0
#define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
79
80
/* Non-error returns from the match() function. Error returns are externally
81
defined PCRE_ERROR_xxx codes, which are all negative. */
82
83
56.8k
#define MATCH_MATCH        1
84
85.3k
#define MATCH_NOMATCH      0
85
86
/* Special internal returns from the match() function. Make them sufficiently
87
negative to avoid the external error codes. */
88
89
28.4k
#define MATCH_ACCEPT       (-999)
90
0
#define MATCH_KETRPOS      (-998)
91
0
#define MATCH_ONCE         (-997)
92
/* The next 5 must be kept together and in sequence so that a test that checks
93
for any one of them can use a range. */
94
0
#define MATCH_COMMIT       (-996)
95
0
#define MATCH_PRUNE        (-995)
96
0
#define MATCH_SKIP         (-994)
97
0
#define MATCH_SKIP_ARG     (-993)
98
0
#define MATCH_THEN         (-992)
99
0
#define MATCH_BACKTRACK_MAX MATCH_THEN
100
0
#define MATCH_BACKTRACK_MIN MATCH_COMMIT
101
102
/* Maximum number of ints of offset to save on the stack for recursive calls.
103
If the offset vector is bigger, malloc is used. This should be a multiple of 3,
104
because the offset vector is always a multiple of 3 long. */
105
106
0
#define REC_STACK_SAVE_MAX 30
107
108
/* Min and max values for the common repeats; for the maxima, 0 => infinity */
109
110
static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
111
static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
112
113
#ifdef PCRE_DEBUG
114
/*************************************************
115
*        Debugging function to print chars       *
116
*************************************************/
117
118
/* Print a sequence of chars in printable format, stopping at the end of the
119
subject if the requested.
120
121
Arguments:
122
  p           points to characters
123
  length      number to print
124
  is_subject  TRUE if printing from within md->start_subject
125
  md          pointer to matching data block, if is_subject is TRUE
126
127
Returns:     nothing
128
*/
129
130
static void
131
pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
132
{
133
pcre_uint32 c;
134
BOOL utf = md->utf;
135
if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
136
while (length-- > 0)
137
  if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
138
}
139
#endif
140
141
142
143
/*************************************************
144
*          Match a back-reference                *
145
*************************************************/
146
147
/* Normally, if a back reference hasn't been set, the length that is passed is
148
negative, so the match always fails. However, in JavaScript compatibility mode,
149
the length passed is zero. Note that in caseless UTF-8 mode, the number of
150
subject bytes matched may be different to the number of reference bytes.
151
152
Arguments:
153
  offset      index into the offset vector
154
  eptr        pointer into the subject
155
  length      length of reference to be matched (number of bytes)
156
  md          points to match data block
157
  caseless    TRUE if caseless
158
159
Returns:      >= 0 the number of subject bytes matched
160
              -1 no match
161
              -2 partial match; always given if at end subject
162
*/
163
164
static int
165
match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
166
  BOOL caseless)
167
0
{
168
0
PCRE_PUCHAR eptr_start = eptr;
169
0
register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
170
#if defined SUPPORT_UTF && defined SUPPORT_UCP
171
BOOL utf = md->utf;
172
#endif
173
174
#ifdef PCRE_DEBUG
175
if (eptr >= md->end_subject)
176
  printf("matching subject <null>");
177
else
178
  {
179
  printf("matching subject ");
180
  pchars(eptr, length, TRUE, md);
181
  }
182
printf(" against backref ");
183
pchars(p, length, FALSE, md);
184
printf("\n");
185
#endif
186
187
/* Always fail if reference not set (and not JavaScript compatible - in that
188
case the length is passed as zero). */
189
190
0
if (length < 0) return -1;
191
192
/* Separate the caseless case for speed. In UTF-8 mode we can only do this
193
properly if Unicode properties are supported. Otherwise, we can check only
194
ASCII characters. */
195
196
0
if (caseless)
197
0
  {
198
#if defined SUPPORT_UTF && defined SUPPORT_UCP
199
  if (utf)
200
    {
201
    /* Match characters up to the end of the reference. NOTE: the number of
202
    data units matched may differ, because in UTF-8 there are some characters
203
    whose upper and lower case versions code have different numbers of bytes.
204
    For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
205
    (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
206
    sequence of two of the latter. It is important, therefore, to check the
207
    length along the reference, not along the subject (earlier code did this
208
    wrong). */
209
210
    PCRE_PUCHAR endptr = p + length;
211
    while (p < endptr)
212
      {
213
      pcre_uint32 c, d;
214
      const ucd_record *ur;
215
      if (eptr >= md->end_subject) return -2;   /* Partial match */
216
      GETCHARINC(c, eptr);
217
      GETCHARINC(d, p);
218
      ur = GET_UCD(d);
219
      if (c != d && c != d + ur->other_case)
220
        {
221
        const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
222
        for (;;)
223
          {
224
          if (c < *pp) return -1;
225
          if (c == *pp++) break;
226
          }
227
        }
228
      }
229
    }
230
  else
231
#endif
232
233
  /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
234
  is no UCP support. */
235
0
    {
236
0
    while (length-- > 0)
237
0
      {
238
0
      pcre_uint32 cc, cp;
239
0
      if (eptr >= md->end_subject) return -2;   /* Partial match */
240
0
      cc = UCHAR21TEST(eptr);
241
0
      cp = UCHAR21TEST(p);
242
0
      if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
243
0
      p++;
244
0
      eptr++;
245
0
      }
246
0
    }
247
0
  }
248
249
/* In the caseful case, we can just compare the bytes, whether or not we
250
are in UTF-8 mode. */
251
252
0
else
253
0
  {
254
0
  while (length-- > 0)
255
0
    {
256
0
    if (eptr >= md->end_subject) return -2;   /* Partial match */
257
0
    if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
258
0
    }
259
0
  }
260
261
0
return (int)(eptr - eptr_start);
262
0
}
263
264
265
266
/***************************************************************************
267
****************************************************************************
268
                   RECURSION IN THE match() FUNCTION
269
270
The match() function is highly recursive, though not every recursive call
271
increases the recursive depth. Nevertheless, some regular expressions can cause
272
it to recurse to a great depth. I was writing for Unix, so I just let it call
273
itself recursively. This uses the stack for saving everything that has to be
274
saved for a recursive call. On Unix, the stack can be large, and this works
275
fine.
276
277
It turns out that on some non-Unix-like systems there are problems with
278
programs that use a lot of stack. (This despite the fact that every last chip
279
has oodles of memory these days, and techniques for extending the stack have
280
been known for decades.) So....
281
282
There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
283
calls by keeping local variables that need to be preserved in blocks of memory
284
obtained from malloc() instead instead of on the stack. Macros are used to
285
achieve this so that the actual code doesn't look very different to what it
286
always used to.
287
288
The original heap-recursive code used longjmp(). However, it seems that this
289
can be very slow on some operating systems. Following a suggestion from Stan
290
Switzer, the use of longjmp() has been abolished, at the cost of having to
291
provide a unique number for each call to RMATCH. There is no way of generating
292
a sequence of numbers at compile time in C. I have given them names, to make
293
them stand out more clearly.
294
295
Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
296
FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
297
tests. Furthermore, not using longjmp() means that local dynamic variables
298
don't have indeterminate values; this has meant that the frame size can be
299
reduced because the result can be "passed back" by straight setting of the
300
variable instead of being passed in the frame.
301
****************************************************************************
302
***************************************************************************/
303
304
/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
305
below must be updated in sync.  */
306
307
enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
308
       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
309
       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
310
       RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
311
       RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
312
       RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
313
       RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
314
315
/* These versions of the macros use the stack, as normal. There are debugging
316
versions and production versions. Note that the "rw" argument of RMATCH isn't
317
actually used in this definition. */
318
319
#ifndef NO_RECURSE
320
#define REGISTER register
321
322
#ifdef PCRE_DEBUG
323
#define RMATCH(ra,rb,rc,rd,re,rw) \
324
  { \
325
  printf("match() called in line %d\n", __LINE__); \
326
  rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
327
  printf("to line %d\n", __LINE__); \
328
  }
329
#define RRETURN(ra) \
330
  { \
331
  printf("match() returned %d from line %d\n", ra, __LINE__); \
332
  return ra; \
333
  }
334
#else
335
#define RMATCH(ra,rb,rc,rd,re,rw) \
336
  rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
337
#define RRETURN(ra) return ra
338
#endif
339
340
#else
341
342
343
/* These versions of the macros manage a private stack on the heap. Note that
344
the "rd" argument of RMATCH isn't actually used in this definition. It's the md
345
argument of match(), which never changes. */
346
347
#define REGISTER
348
349
#define RMATCH(ra,rb,rc,rd,re,rw)\
350
0
  {\
351
0
  heapframe *newframe = frame->Xnextframe;\
352
0
  if (newframe == NULL)\
353
0
    {\
354
0
    newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
355
0
    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
356
0
    newframe->Xnextframe = NULL;\
357
0
    frame->Xnextframe = newframe;\
358
0
    }\
359
0
  frame->Xwhere = rw;\
360
0
  newframe->Xeptr = ra;\
361
0
  newframe->Xecode = rb;\
362
0
  newframe->Xmstart = mstart;\
363
0
  newframe->Xoffset_top = rc;\
364
0
  newframe->Xeptrb = re;\
365
0
  newframe->Xrdepth = frame->Xrdepth + 1;\
366
0
  newframe->Xprevframe = frame;\
367
0
  frame = newframe;\
368
0
  DPRINTF(("restarting from line %d\n", __LINE__));\
369
0
  goto HEAP_RECURSE;\
370
0
  L_##rw:\
371
0
  DPRINTF(("jumped back to line %d\n", __LINE__));\
372
0
  }
373
374
#define RRETURN(ra)\
375
0
  {\
376
0
  heapframe *oldframe = frame;\
377
0
  frame = oldframe->Xprevframe;\
378
0
  if (frame != NULL)\
379
0
    {\
380
0
    rrc = ra;\
381
0
    goto HEAP_RETURN;\
382
0
    }\
383
0
  return ra;\
384
0
  }
385
386
387
/* Structure for remembering the local variables in a private frame */
388
389
typedef struct heapframe {
390
  struct heapframe *Xprevframe;
391
  struct heapframe *Xnextframe;
392
393
  /* Function arguments that may change */
394
395
  PCRE_PUCHAR Xeptr;
396
  const pcre_uchar *Xecode;
397
  PCRE_PUCHAR Xmstart;
398
  int Xoffset_top;
399
  eptrblock *Xeptrb;
400
  unsigned int Xrdepth;
401
402
  /* Function local variables */
403
404
  PCRE_PUCHAR Xcallpat;
405
#ifdef SUPPORT_UTF
406
  PCRE_PUCHAR Xcharptr;
407
#endif
408
  PCRE_PUCHAR Xdata;
409
  PCRE_PUCHAR Xnext;
410
  PCRE_PUCHAR Xpp;
411
  PCRE_PUCHAR Xprev;
412
  PCRE_PUCHAR Xsaved_eptr;
413
414
  recursion_info Xnew_recursive;
415
416
  BOOL Xcur_is_word;
417
  BOOL Xcondition;
418
  BOOL Xprev_is_word;
419
420
#ifdef SUPPORT_UCP
421
  int Xprop_type;
422
  unsigned int Xprop_value;
423
  int Xprop_fail_result;
424
  int Xoclength;
425
  pcre_uchar Xocchars[6];
426
#endif
427
428
  int Xcodelink;
429
  int Xctype;
430
  unsigned int Xfc;
431
  int Xfi;
432
  int Xlength;
433
  int Xmax;
434
  int Xmin;
435
  unsigned int Xnumber;
436
  int Xoffset;
437
  unsigned int Xop;
438
  pcre_int32 Xsave_capture_last;
439
  int Xsave_offset1, Xsave_offset2, Xsave_offset3;
440
  int Xstacksave[REC_STACK_SAVE_MAX];
441
442
  eptrblock Xnewptrb;
443
444
  /* Where to jump back to */
445
446
  int Xwhere;
447
448
} heapframe;
449
450
#endif
451
452
453
/***************************************************************************
454
***************************************************************************/
455
456
457
458
/*************************************************
459
*         Match from current position            *
460
*************************************************/
461
462
/* This function is called recursively in many circumstances. Whenever it
463
returns a negative (error) response, the outer incarnation must also return the
464
same response. */
465
466
/* These macros pack up tests that are used for partial matching, and which
467
appear several times in the code. We set the "hit end" flag if the pointer is
468
at the end of the subject and also past the start of the subject (i.e.
469
something has been matched). For hard partial matching, we then return
470
immediately. The second one is used when we already know we are past the end of
471
the subject. */
472
473
#define CHECK_PARTIAL()\
474
0
  if (md->partial != 0 && eptr >= md->end_subject && \
475
0
      eptr > md->start_used_ptr) \
476
0
    { \
477
0
    md->hitend = TRUE; \
478
0
    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
479
0
    }
480
481
#define SCHECK_PARTIAL()\
482
0
  if (md->partial != 0 && eptr > md->start_used_ptr) \
483
0
    { \
484
0
    md->hitend = TRUE; \
485
0
    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
486
0
    }
487
488
489
/* Performance note: It might be tempting to extract commonly used fields from
490
the md structure (e.g. utf, end_subject) into individual variables to improve
491
performance. Tests using gcc on a SPARC disproved this; in the first case, it
492
made performance worse.
493
494
Arguments:
495
   eptr        pointer to current character in subject
496
   ecode       pointer to current position in compiled code
497
   mstart      pointer to the current match start position (can be modified
498
                 by encountering \K)
499
   offset_top  current top pointer
500
   md          pointer to "static" info for the match
501
   eptrb       pointer to chain of blocks containing eptr at start of
502
                 brackets - for testing for empty matches
503
   rdepth      the recursion depth
504
505
Returns:       MATCH_MATCH if matched            )  these values are >= 0
506
               MATCH_NOMATCH if failed to match  )
507
               a negative MATCH_xxx value for PRUNE, SKIP, etc
508
               a negative PCRE_ERROR_xxx value if aborted by an error condition
509
                 (e.g. stopped by repeated call or recursion limit)
510
*/
511
512
static int
513
match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
514
  PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
515
  unsigned int rdepth)
516
0
{
517
/* These variables do not need to be preserved over recursion in this function,
518
so they can be ordinary variables in all cases. Mark some of them with
519
"register" because they are used a lot in loops. */
520
521
0
register int  rrc;         /* Returns from recursive calls */
522
0
register int  i;           /* Used for loops not involving calls to RMATCH() */
523
0
register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
524
0
register BOOL utf;         /* Local copy of UTF flag for speed */
525
526
0
BOOL minimize, possessive; /* Quantifier options */
527
0
BOOL caseless;
528
0
int condcode;
529
530
/* When recursion is not being used, all "local" variables that have to be
531
preserved over calls to RMATCH() are part of a "frame". We set up the top-level
532
frame on the stack here; subsequent instantiations are obtained from the heap
533
whenever RMATCH() does a "recursion". See the macro definitions above. Putting
534
the top-level on the stack rather than malloc-ing them all gives a performance
535
boost in many cases where there is not much "recursion". */
536
537
0
#ifdef NO_RECURSE
538
0
heapframe *frame = (heapframe *)md->match_frames_base;
539
540
/* Copy in the original argument variables */
541
542
0
frame->Xeptr = eptr;
543
0
frame->Xecode = ecode;
544
0
frame->Xmstart = mstart;
545
0
frame->Xoffset_top = offset_top;
546
0
frame->Xeptrb = eptrb;
547
0
frame->Xrdepth = rdepth;
548
549
/* This is where control jumps back to to effect "recursion" */
550
551
0
HEAP_RECURSE:
552
553
/* Macros make the argument variables come from the current frame */
554
555
0
#define eptr               frame->Xeptr
556
0
#define ecode              frame->Xecode
557
0
#define mstart             frame->Xmstart
558
0
#define offset_top         frame->Xoffset_top
559
0
#define eptrb              frame->Xeptrb
560
0
#define rdepth             frame->Xrdepth
561
562
/* Ditto for the local variables */
563
564
#ifdef SUPPORT_UTF
565
#define charptr            frame->Xcharptr
566
#endif
567
0
#define callpat            frame->Xcallpat
568
0
#define codelink           frame->Xcodelink
569
0
#define data               frame->Xdata
570
0
#define next               frame->Xnext
571
0
#define pp                 frame->Xpp
572
0
#define prev               frame->Xprev
573
0
#define saved_eptr         frame->Xsaved_eptr
574
575
0
#define new_recursive      frame->Xnew_recursive
576
577
0
#define cur_is_word        frame->Xcur_is_word
578
0
#define condition          frame->Xcondition
579
0
#define prev_is_word       frame->Xprev_is_word
580
581
#ifdef SUPPORT_UCP
582
#define prop_type          frame->Xprop_type
583
#define prop_value         frame->Xprop_value
584
#define prop_fail_result   frame->Xprop_fail_result
585
#define oclength           frame->Xoclength
586
#define occhars            frame->Xocchars
587
#endif
588
589
0
#define ctype              frame->Xctype
590
0
#define fc                 frame->Xfc
591
0
#define fi                 frame->Xfi
592
0
#define length             frame->Xlength
593
0
#define max                frame->Xmax
594
0
#define min                frame->Xmin
595
0
#define number             frame->Xnumber
596
0
#define offset             frame->Xoffset
597
0
#define op                 frame->Xop
598
0
#define save_capture_last  frame->Xsave_capture_last
599
0
#define save_offset1       frame->Xsave_offset1
600
0
#define save_offset2       frame->Xsave_offset2
601
0
#define save_offset3       frame->Xsave_offset3
602
0
#define stacksave          frame->Xstacksave
603
604
0
#define newptrb            frame->Xnewptrb
605
606
/* When recursion is being used, local variables are allocated on the stack and
607
get preserved during recursion in the normal way. In this environment, fi and
608
i, and fc and c, can be the same variables. */
609
610
#else         /* NO_RECURSE not defined */
611
#define fi i
612
#define fc c
613
614
/* Many of the following variables are used only in small blocks of the code.
615
My normal style of coding would have declared them within each of those blocks.
616
However, in order to accommodate the version of this code that uses an external
617
"stack" implemented on the heap, it is easier to declare them all here, so the
618
declarations can be cut out in a block. The only declarations within blocks
619
below are for variables that do not have to be preserved over a recursive call
620
to RMATCH(). */
621
622
#ifdef SUPPORT_UTF
623
const pcre_uchar *charptr;
624
#endif
625
const pcre_uchar *callpat;
626
const pcre_uchar *data;
627
const pcre_uchar *next;
628
PCRE_PUCHAR       pp;
629
const pcre_uchar *prev;
630
PCRE_PUCHAR       saved_eptr;
631
632
recursion_info new_recursive;
633
634
BOOL cur_is_word;
635
BOOL condition;
636
BOOL prev_is_word;
637
638
#ifdef SUPPORT_UCP
639
int prop_type;
640
unsigned int prop_value;
641
int prop_fail_result;
642
int oclength;
643
pcre_uchar occhars[6];
644
#endif
645
646
int codelink;
647
int ctype;
648
int length;
649
int max;
650
int min;
651
unsigned int number;
652
int offset;
653
unsigned int op;
654
pcre_int32 save_capture_last;
655
int save_offset1, save_offset2, save_offset3;
656
int stacksave[REC_STACK_SAVE_MAX];
657
658
eptrblock newptrb;
659
660
/* There is a special fudge for calling match() in a way that causes it to
661
measure the size of its basic stack frame when the stack is being used for
662
recursion. The second argument (ecode) being NULL triggers this behaviour. It
663
cannot normally ever be NULL. The return is the negated value of the frame
664
size. */
665
666
if (ecode == NULL)
667
  {
668
  if (rdepth == 0)
669
    return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
670
  else
671
    {
672
    int len = (int)((char *)&rdepth - (char *)eptr);
673
    return (len > 0)? -len : len;
674
    }
675
  }
676
#endif     /* NO_RECURSE */
677
678
/* To save space on the stack and in the heap frame, I have doubled up on some
679
of the local variables that are used only in localised parts of the code, but
680
still need to be preserved over recursive calls of match(). These macros define
681
the alternative names that are used. */
682
683
0
#define allow_zero    cur_is_word
684
0
#define cbegroup      condition
685
0
#define code_offset   codelink
686
0
#define condassert    condition
687
0
#define matched_once  prev_is_word
688
0
#define foc           number
689
0
#define save_mark     data
690
691
/* These statements are here to stop the compiler complaining about unitialized
692
variables. */
693
694
#ifdef SUPPORT_UCP
695
prop_value = 0;
696
prop_fail_result = 0;
697
#endif
698
699
700
/* This label is used for tail recursion, which is used in a few cases even
701
when NO_RECURSE is not defined, in order to reduce the amount of stack that is
702
used. Thanks to Ian Taylor for noticing this possibility and sending the
703
original patch. */
704
705
0
TAIL_RECURSE:
706
707
/* OK, now we can get on with the real code of the function. Recursive calls
708
are specified by the macro RMATCH and RRETURN is used to return. When
709
NO_RECURSE is *not* defined, these just turn into a recursive call to match()
710
and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
711
defined). However, RMATCH isn't like a function call because it's quite a
712
complicated macro. It has to be used in one particular way. This shouldn't,
713
however, impact performance when true recursion is being used. */
714
715
#ifdef SUPPORT_UTF
716
utf = md->utf;       /* Local copy of the flag */
717
#else
718
0
utf = FALSE;
719
0
#endif
720
721
/* First check that we haven't called match() too many times, or that we
722
haven't exceeded the recursive call limit. */
723
724
0
if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
725
0
if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
726
727
/* At the start of a group with an unlimited repeat that may match an empty
728
string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
729
done this way to save having to use another function argument, which would take
730
up space on the stack. See also MATCH_CONDASSERT below.
731
732
When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
733
such remembered pointers, to be checked when we hit the closing ket, in order
734
to break infinite loops that match no characters. When match() is called in
735
other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
736
NOT be used with tail recursion, because the memory block that is used is on
737
the stack, so a new one may be required for each match(). */
738
739
0
if (md->match_function_type == MATCH_CBEGROUP)
740
0
  {
741
0
  newptrb.epb_saved_eptr = eptr;
742
0
  newptrb.epb_prev = eptrb;
743
0
  eptrb = &newptrb;
744
0
  md->match_function_type = 0;
745
0
  }
746
747
/* Now start processing the opcodes. */
748
749
0
for (;;)
750
0
  {
751
0
  minimize = possessive = FALSE;
752
0
  op = *ecode;
753
754
0
  switch(op)
755
0
    {
756
0
    case OP_MARK:
757
0
    md->nomatch_mark = ecode + 2;
758
0
    md->mark = NULL;    /* In case previously set by assertion */
759
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
760
0
      eptrb, RM55);
761
0
    if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT || rrc == MATCH_KETRPOS) &&
762
0
         md->mark == NULL) md->mark = ecode + 2;
763
764
    /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
765
    argument, and we must check whether that argument matches this MARK's
766
    argument. It is passed back in md->start_match_ptr (an overloading of that
767
    variable). If it does match, we reset that variable to the current subject
768
    position and return MATCH_SKIP. Otherwise, pass back the return code
769
    unaltered. */
770
771
0
    else if (rrc == MATCH_SKIP_ARG &&
772
0
        STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
773
0
      {
774
0
      md->start_match_ptr = eptr;
775
0
      RRETURN(MATCH_SKIP);
776
0
      }
777
0
    RRETURN(rrc);
778
779
0
    case OP_FAIL:
780
0
    RRETURN(MATCH_NOMATCH);
781
782
0
    case OP_COMMIT:
783
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
784
0
      eptrb, RM52);
785
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
786
0
    RRETURN(MATCH_COMMIT);
787
788
0
    case OP_PRUNE:
789
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
790
0
      eptrb, RM51);
791
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
792
0
    RRETURN(MATCH_PRUNE);
793
794
0
    case OP_PRUNE_ARG:
795
0
    md->nomatch_mark = ecode + 2;
796
0
    md->mark = NULL;    /* In case previously set by assertion */
797
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
798
0
      eptrb, RM56);
799
0
    if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
800
0
         md->mark == NULL) md->mark = ecode + 2;
801
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
802
0
    RRETURN(MATCH_PRUNE);
803
804
0
    case OP_SKIP:
805
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
806
0
      eptrb, RM53);
807
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
808
0
    md->start_match_ptr = eptr;   /* Pass back current position */
809
0
    RRETURN(MATCH_SKIP);
810
811
    /* Note that, for Perl compatibility, SKIP with an argument does NOT set
812
    nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
813
    not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
814
    that failed and any that precede it (either they also failed, or were not
815
    triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
816
    SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
817
    set to the count of the one that failed. */
818
819
0
    case OP_SKIP_ARG:
820
0
    md->skip_arg_count++;
821
0
    if (md->skip_arg_count <= md->ignore_skip_arg)
822
0
      {
823
0
      ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
824
0
      break;
825
0
      }
826
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
827
0
      eptrb, RM57);
828
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
829
830
    /* Pass back the current skip name by overloading md->start_match_ptr and
831
    returning the special MATCH_SKIP_ARG return code. This will either be
832
    caught by a matching MARK, or get to the top, where it causes a rematch
833
    with md->ignore_skip_arg set to the value of md->skip_arg_count. */
834
835
0
    md->start_match_ptr = ecode + 2;
836
0
    RRETURN(MATCH_SKIP_ARG);
837
838
    /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
839
    the branch in which it occurs can be determined. Overload the start of
840
    match pointer to do this. */
841
842
0
    case OP_THEN:
843
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
844
0
      eptrb, RM54);
845
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
846
0
    md->start_match_ptr = ecode;
847
0
    RRETURN(MATCH_THEN);
848
849
0
    case OP_THEN_ARG:
850
0
    md->nomatch_mark = ecode + 2;
851
0
    md->mark = NULL;    /* In case previously set by assertion */
852
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
853
0
      md, eptrb, RM58);
854
0
    if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
855
0
         md->mark == NULL) md->mark = ecode + 2;
856
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
857
0
    md->start_match_ptr = ecode;
858
0
    RRETURN(MATCH_THEN);
859
860
    /* Handle an atomic group that does not contain any capturing parentheses.
861
    This can be handled like an assertion. Prior to 8.13, all atomic groups
862
    were handled this way. In 8.13, the code was changed as below for ONCE, so
863
    that backups pass through the group and thereby reset captured values.
864
    However, this uses a lot more stack, so in 8.20, atomic groups that do not
865
    contain any captures generate OP_ONCE_NC, which can be handled in the old,
866
    less stack intensive way.
867
868
    Check the alternative branches in turn - the matching won't pass the KET
869
    for this kind of subpattern. If any one branch matches, we carry on as at
870
    the end of a normal bracket, leaving the subject pointer, but resetting
871
    the start-of-match value in case it was changed by \K. */
872
873
0
    case OP_ONCE_NC:
874
0
    prev = ecode;
875
0
    saved_eptr = eptr;
876
0
    save_mark = md->mark;
877
0
    do
878
0
      {
879
0
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
880
0
      if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
881
0
        {
882
0
        mstart = md->start_match_ptr;
883
0
        break;
884
0
        }
885
0
      if (rrc == MATCH_THEN)
886
0
        {
887
0
        next = ecode + GET(ecode,1);
888
0
        if (md->start_match_ptr < next &&
889
0
            (*ecode == OP_ALT || *next == OP_ALT))
890
0
          rrc = MATCH_NOMATCH;
891
0
        }
892
893
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
894
0
      ecode += GET(ecode,1);
895
0
      md->mark = save_mark;
896
0
      }
897
0
    while (*ecode == OP_ALT);
898
899
    /* If hit the end of the group (which could be repeated), fail */
900
901
0
    if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
902
903
    /* Continue as from after the group, updating the offsets high water
904
    mark, since extracts may have been taken. */
905
906
0
    do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
907
908
0
    offset_top = md->end_offset_top;
909
0
    eptr = md->end_match_ptr;
910
911
    /* For a non-repeating ket, just continue at this level. This also
912
    happens for a repeating ket if no characters were matched in the group.
913
    This is the forcible breaking of infinite loops as implemented in Perl
914
    5.005. */
915
916
0
    if (*ecode == OP_KET || eptr == saved_eptr)
917
0
      {
918
0
      ecode += 1+LINK_SIZE;
919
0
      break;
920
0
      }
921
922
    /* The repeating kets try the rest of the pattern or restart from the
923
    preceding bracket, in the appropriate order. The second "call" of match()
924
    uses tail recursion, to avoid using another stack frame. */
925
926
0
    if (*ecode == OP_KETRMIN)
927
0
      {
928
0
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
929
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
930
0
      ecode = prev;
931
0
      goto TAIL_RECURSE;
932
0
      }
933
0
    else  /* OP_KETRMAX */
934
0
      {
935
0
      RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
936
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
937
0
      ecode += 1 + LINK_SIZE;
938
0
      goto TAIL_RECURSE;
939
0
      }
940
    /* Control never gets here */
941
942
    /* Handle a capturing bracket, other than those that are possessive with an
943
    unlimited repeat. If there is space in the offset vector, save the current
944
    subject position in the working slot at the top of the vector. We mustn't
945
    change the current values of the data slot, because they may be set from a
946
    previous iteration of this group, and be referred to by a reference inside
947
    the group. A failure to match might occur after the group has succeeded,
948
    if something later on doesn't match. For this reason, we need to restore
949
    the working value and also the values of the final offsets, in case they
950
    were set by a previous iteration of the same bracket.
951
952
    If there isn't enough space in the offset vector, treat this as if it were
953
    a non-capturing bracket. Don't worry about setting the flag for the error
954
    case here; that is handled in the code for KET. */
955
956
0
    case OP_CBRA:
957
0
    case OP_SCBRA:
958
0
    number = GET2(ecode, 1+LINK_SIZE);
959
0
    offset = number << 1;
960
961
#ifdef PCRE_DEBUG
962
    printf("start bracket %d\n", number);
963
    printf("subject=");
964
    pchars(eptr, 16, TRUE, md);
965
    printf("\n");
966
#endif
967
968
0
    if (offset < md->offset_max)
969
0
      {
970
0
      save_offset1 = md->offset_vector[offset];
971
0
      save_offset2 = md->offset_vector[offset+1];
972
0
      save_offset3 = md->offset_vector[md->offset_end - number];
973
0
      save_capture_last = md->capture_last;
974
0
      save_mark = md->mark;
975
976
0
      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
977
0
      md->offset_vector[md->offset_end - number] =
978
0
        (int)(eptr - md->start_subject);
979
980
0
      for (;;)
981
0
        {
982
0
        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
983
0
        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
984
0
          eptrb, RM1);
985
0
        if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
986
987
        /* If we backed up to a THEN, check whether it is within the current
988
        branch by comparing the address of the THEN that is passed back with
989
        the end of the branch. If it is within the current branch, and the
990
        branch is one of two or more alternatives (it either starts or ends
991
        with OP_ALT), we have reached the limit of THEN's action, so convert
992
        the return code to NOMATCH, which will cause normal backtracking to
993
        happen from now on. Otherwise, THEN is passed back to an outer
994
        alternative. This implements Perl's treatment of parenthesized groups,
995
        where a group not containing | does not affect the current alternative,
996
        that is, (X) is NOT the same as (X|(*F)). */
997
998
0
        if (rrc == MATCH_THEN)
999
0
          {
1000
0
          next = ecode + GET(ecode,1);
1001
0
          if (md->start_match_ptr < next &&
1002
0
              (*ecode == OP_ALT || *next == OP_ALT))
1003
0
            rrc = MATCH_NOMATCH;
1004
0
          }
1005
1006
        /* Anything other than NOMATCH is passed back. */
1007
1008
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1009
0
        md->capture_last = save_capture_last;
1010
0
        ecode += GET(ecode, 1);
1011
0
        md->mark = save_mark;
1012
0
        if (*ecode != OP_ALT) break;
1013
0
        }
1014
1015
0
      DPRINTF(("bracket %d failed\n", number));
1016
0
      md->offset_vector[offset] = save_offset1;
1017
0
      md->offset_vector[offset+1] = save_offset2;
1018
0
      md->offset_vector[md->offset_end - number] = save_offset3;
1019
1020
      /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
1021
1022
0
      RRETURN(rrc);
1023
0
      }
1024
1025
    /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1026
    as a non-capturing bracket. */
1027
1028
    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1029
    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1030
1031
0
    DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1032
1033
    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1034
    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1035
1036
    /* Non-capturing or atomic group, except for possessive with unlimited
1037
    repeat and ONCE group with no captures. Loop for all the alternatives.
1038
1039
    When we get to the final alternative within the brackets, we used to return
1040
    the result of a recursive call to match() whatever happened so it was
1041
    possible to reduce stack usage by turning this into a tail recursion,
1042
    except in the case of a possibly empty group. However, now that there is
1043
    the possiblity of (*THEN) occurring in the final alternative, this
1044
    optimization is no longer always possible.
1045
1046
    We can optimize if we know there are no (*THEN)s in the pattern; at present
1047
    this is the best that can be done.
1048
1049
    MATCH_ONCE is returned when the end of an atomic group is successfully
1050
    reached, but subsequent matching fails. It passes back up the tree (causing
1051
    captured values to be reset) until the original atomic group level is
1052
    reached. This is tested by comparing md->once_target with the start of the
1053
    group. At this point, the return is converted into MATCH_NOMATCH so that
1054
    previous backup points can be taken. */
1055
1056
0
    case OP_ONCE:
1057
0
    case OP_BRA:
1058
0
    case OP_SBRA:
1059
0
    DPRINTF(("start non-capturing bracket\n"));
1060
1061
0
    for (;;)
1062
0
      {
1063
0
      if (op >= OP_SBRA || op == OP_ONCE)
1064
0
        md->match_function_type = MATCH_CBEGROUP;
1065
1066
      /* If this is not a possibly empty group, and there are no (*THEN)s in
1067
      the pattern, and this is the final alternative, optimize as described
1068
      above. */
1069
1070
0
      else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1071
0
        {
1072
0
        ecode += PRIV(OP_lengths)[*ecode];
1073
0
        goto TAIL_RECURSE;
1074
0
        }
1075
1076
      /* In all other cases, we have to make another call to match(). */
1077
1078
0
      save_mark = md->mark;
1079
0
      save_capture_last = md->capture_last;
1080
0
      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1081
0
        RM2);
1082
1083
      /* See comment in the code for capturing groups above about handling
1084
      THEN. */
1085
1086
0
      if (rrc == MATCH_THEN)
1087
0
        {
1088
0
        next = ecode + GET(ecode,1);
1089
0
        if (md->start_match_ptr < next &&
1090
0
            (*ecode == OP_ALT || *next == OP_ALT))
1091
0
          rrc = MATCH_NOMATCH;
1092
0
        }
1093
1094
0
      if (rrc != MATCH_NOMATCH)
1095
0
        {
1096
0
        if (rrc == MATCH_ONCE)
1097
0
          {
1098
0
          const pcre_uchar *scode = ecode;
1099
0
          if (*scode != OP_ONCE)           /* If not at start, find it */
1100
0
            {
1101
0
            while (*scode == OP_ALT) scode += GET(scode, 1);
1102
0
            scode -= GET(scode, 1);
1103
0
            }
1104
0
          if (md->once_target == scode) rrc = MATCH_NOMATCH;
1105
0
          }
1106
0
        RRETURN(rrc);
1107
0
        }
1108
0
      ecode += GET(ecode, 1);
1109
0
      md->mark = save_mark;
1110
0
      if (*ecode != OP_ALT) break;
1111
0
      md->capture_last = save_capture_last;
1112
0
      }
1113
1114
0
    RRETURN(MATCH_NOMATCH);
1115
1116
    /* Handle possessive capturing brackets with an unlimited repeat. We come
1117
    here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1118
    handled similarly to the normal case above. However, the matching is
1119
    different. The end of these brackets will always be OP_KETRPOS, which
1120
    returns MATCH_KETRPOS without going further in the pattern. By this means
1121
    we can handle the group by iteration rather than recursion, thereby
1122
    reducing the amount of stack needed. */
1123
1124
0
    case OP_CBRAPOS:
1125
0
    case OP_SCBRAPOS:
1126
0
    allow_zero = FALSE;
1127
1128
0
    POSSESSIVE_CAPTURE:
1129
0
    number = GET2(ecode, 1+LINK_SIZE);
1130
0
    offset = number << 1;
1131
1132
#ifdef PCRE_DEBUG
1133
    printf("start possessive bracket %d\n", number);
1134
    printf("subject=");
1135
    pchars(eptr, 16, TRUE, md);
1136
    printf("\n");
1137
#endif
1138
1139
0
    if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
1140
1141
0
    matched_once = FALSE;
1142
0
    code_offset = (int)(ecode - md->start_code);
1143
1144
0
    save_offset1 = md->offset_vector[offset];
1145
0
    save_offset2 = md->offset_vector[offset+1];
1146
0
    save_offset3 = md->offset_vector[md->offset_end - number];
1147
0
    save_capture_last = md->capture_last;
1148
1149
0
    DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1150
1151
    /* Each time round the loop, save the current subject position for use
1152
    when the group matches. For MATCH_MATCH, the group has matched, so we
1153
    restart it with a new subject starting position, remembering that we had
1154
    at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1155
    usual. If we haven't matched any alternatives in any iteration, check to
1156
    see if a previous iteration matched. If so, the group has matched;
1157
    continue from afterwards. Otherwise it has failed; restore the previous
1158
    capture values before returning NOMATCH. */
1159
1160
0
    for (;;)
1161
0
      {
1162
0
      md->offset_vector[md->offset_end - number] =
1163
0
        (int)(eptr - md->start_subject);
1164
0
      if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1165
0
      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1166
0
        eptrb, RM63);
1167
0
      if (rrc == MATCH_KETRPOS)
1168
0
        {
1169
0
        offset_top = md->end_offset_top;
1170
0
        ecode = md->start_code + code_offset;
1171
0
        save_capture_last = md->capture_last;
1172
0
        matched_once = TRUE;
1173
0
        mstart = md->start_match_ptr;    /* In case \K changed it */
1174
0
        if (eptr == md->end_match_ptr)   /* Matched an empty string */
1175
0
          {
1176
0
          do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1177
0
          break;
1178
0
          }
1179
0
        eptr = md->end_match_ptr;
1180
0
        continue;
1181
0
        }
1182
1183
      /* See comment in the code for capturing groups above about handling
1184
      THEN. */
1185
1186
0
      if (rrc == MATCH_THEN)
1187
0
        {
1188
0
        next = ecode + GET(ecode,1);
1189
0
        if (md->start_match_ptr < next &&
1190
0
            (*ecode == OP_ALT || *next == OP_ALT))
1191
0
          rrc = MATCH_NOMATCH;
1192
0
        }
1193
1194
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1195
0
      md->capture_last = save_capture_last;
1196
0
      ecode += GET(ecode, 1);
1197
0
      if (*ecode != OP_ALT) break;
1198
0
      }
1199
1200
0
    if (!matched_once)
1201
0
      {
1202
0
      md->offset_vector[offset] = save_offset1;
1203
0
      md->offset_vector[offset+1] = save_offset2;
1204
0
      md->offset_vector[md->offset_end - number] = save_offset3;
1205
0
      }
1206
1207
0
    if (allow_zero || matched_once)
1208
0
      {
1209
0
      ecode += 1 + LINK_SIZE;
1210
0
      break;
1211
0
      }
1212
1213
0
    RRETURN(MATCH_NOMATCH);
1214
1215
    /* Non-capturing possessive bracket with unlimited repeat. We come here
1216
    from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1217
    without the capturing complication. It is written out separately for speed
1218
    and cleanliness. */
1219
1220
0
    case OP_BRAPOS:
1221
0
    case OP_SBRAPOS:
1222
0
    allow_zero = FALSE;
1223
1224
0
    POSSESSIVE_NON_CAPTURE:
1225
0
    matched_once = FALSE;
1226
0
    code_offset = (int)(ecode - md->start_code);
1227
0
    save_capture_last = md->capture_last;
1228
1229
0
    for (;;)
1230
0
      {
1231
0
      if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1232
0
      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1233
0
        eptrb, RM48);
1234
0
      if (rrc == MATCH_KETRPOS)
1235
0
        {
1236
0
        offset_top = md->end_offset_top;
1237
0
        ecode = md->start_code + code_offset;
1238
0
        matched_once = TRUE;
1239
0
        mstart = md->start_match_ptr;   /* In case \K reset it */
1240
0
        if (eptr == md->end_match_ptr)  /* Matched an empty string */
1241
0
          {
1242
0
          do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1243
0
          break;
1244
0
          }
1245
0
        eptr = md->end_match_ptr;
1246
0
        continue;
1247
0
        }
1248
1249
      /* See comment in the code for capturing groups above about handling
1250
      THEN. */
1251
1252
0
      if (rrc == MATCH_THEN)
1253
0
        {
1254
0
        next = ecode + GET(ecode,1);
1255
0
        if (md->start_match_ptr < next &&
1256
0
            (*ecode == OP_ALT || *next == OP_ALT))
1257
0
          rrc = MATCH_NOMATCH;
1258
0
        }
1259
1260
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1261
0
      ecode += GET(ecode, 1);
1262
0
      if (*ecode != OP_ALT) break;
1263
0
      md->capture_last = save_capture_last;
1264
0
      }
1265
1266
0
    if (matched_once || allow_zero)
1267
0
      {
1268
0
      ecode += 1 + LINK_SIZE;
1269
0
      break;
1270
0
      }
1271
0
    RRETURN(MATCH_NOMATCH);
1272
1273
    /* Control never reaches here. */
1274
1275
    /* Conditional group: compilation checked that there are no more than two
1276
    branches. If the condition is false, skipping the first branch takes us
1277
    past the end of the item if there is only one branch, but that's exactly
1278
    what we want. */
1279
1280
0
    case OP_COND:
1281
0
    case OP_SCOND:
1282
1283
    /* The variable codelink will be added to ecode when the condition is
1284
    false, to get to the second branch. Setting it to the offset to the ALT
1285
    or KET, then incrementing ecode achieves this effect. We now have ecode
1286
    pointing to the condition or callout. */
1287
1288
0
    codelink = GET(ecode, 1);   /* Offset to the second branch */
1289
0
    ecode += 1 + LINK_SIZE;     /* From this opcode */
1290
1291
    /* Because of the way auto-callout works during compile, a callout item is
1292
    inserted between OP_COND and an assertion condition. */
1293
1294
0
    if (*ecode == OP_CALLOUT)
1295
0
      {
1296
0
      if (PUBL(callout) != NULL)
1297
0
        {
1298
0
        PUBL(callout_block) cb;
1299
0
        cb.version          = 2;   /* Version 1 of the callout block */
1300
0
        cb.callout_number   = ecode[1];
1301
0
        cb.offset_vector    = md->offset_vector;
1302
0
#if defined COMPILE_PCRE8
1303
0
        cb.subject          = (PCRE_SPTR)md->start_subject;
1304
#elif defined COMPILE_PCRE16
1305
        cb.subject          = (PCRE_SPTR16)md->start_subject;
1306
#elif defined COMPILE_PCRE32
1307
        cb.subject          = (PCRE_SPTR32)md->start_subject;
1308
#endif
1309
0
        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1310
0
        cb.start_match      = (int)(mstart - md->start_subject);
1311
0
        cb.current_position = (int)(eptr - md->start_subject);
1312
0
        cb.pattern_position = GET(ecode, 2);
1313
0
        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1314
0
        cb.capture_top      = offset_top/2;
1315
0
        cb.capture_last     = md->capture_last & CAPLMASK;
1316
        /* Internal change requires this for API compatibility. */
1317
0
        if (cb.capture_last == 0) cb.capture_last = -1;
1318
0
        cb.callout_data     = md->callout_data;
1319
0
        cb.mark             = md->nomatch_mark;
1320
0
        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1321
0
        if (rrc < 0) RRETURN(rrc);
1322
0
        }
1323
1324
      /* Advance ecode past the callout, so it now points to the condition. We
1325
      must adjust codelink so that the value of ecode+codelink is unchanged. */
1326
1327
0
      ecode += PRIV(OP_lengths)[OP_CALLOUT];
1328
0
      codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1329
0
      }
1330
1331
    /* Test the various possible conditions */
1332
1333
0
    condition = FALSE;
1334
0
    switch(condcode = *ecode)
1335
0
      {
1336
0
      case OP_RREF:         /* Numbered group recursion test */
1337
0
      if (md->recursive != NULL)     /* Not recursing => FALSE */
1338
0
        {
1339
0
        unsigned int recno = GET2(ecode, 1);   /* Recursion group number*/
1340
0
        condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1341
0
        }
1342
0
      break;
1343
1344
0
      case OP_DNRREF:       /* Duplicate named group recursion test */
1345
0
      if (md->recursive != NULL)
1346
0
        {
1347
0
        int count = GET2(ecode, 1 + IMM2_SIZE);
1348
0
        pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1349
0
        while (count-- > 0)
1350
0
          {
1351
0
          unsigned int recno = GET2(slot, 0);
1352
0
          condition = recno == md->recursive->group_num;
1353
0
          if (condition) break;
1354
0
          slot += md->name_entry_size;
1355
0
          }
1356
0
        }
1357
0
      break;
1358
1359
0
      case OP_CREF:         /* Numbered group used test */
1360
0
      offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
1361
0
      condition = offset < offset_top && md->offset_vector[offset] >= 0;
1362
0
      break;
1363
1364
0
      case OP_DNCREF:      /* Duplicate named group used test */
1365
0
        {
1366
0
        int count = GET2(ecode, 1 + IMM2_SIZE);
1367
0
        pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1368
0
        while (count-- > 0)
1369
0
          {
1370
0
          offset = GET2(slot, 0) << 1;
1371
0
          condition = offset < offset_top && md->offset_vector[offset] >= 0;
1372
0
          if (condition) break;
1373
0
          slot += md->name_entry_size;
1374
0
          }
1375
0
        }
1376
0
      break;
1377
1378
0
      case OP_DEF:     /* DEFINE - always false */
1379
0
      case OP_FAIL:    /* From optimized (?!) condition */
1380
0
      break;
1381
1382
      /* The condition is an assertion. Call match() to evaluate it - setting
1383
      md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
1384
      of an assertion. */
1385
1386
0
      default:
1387
0
      md->match_function_type = MATCH_CONDASSERT;
1388
0
      RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
1389
0
      if (rrc == MATCH_MATCH)
1390
0
        {
1391
0
        if (md->end_offset_top > offset_top)
1392
0
          offset_top = md->end_offset_top;  /* Captures may have happened */
1393
0
        condition = TRUE;
1394
1395
        /* Advance ecode past the assertion to the start of the first branch,
1396
        but adjust it so that the general choosing code below works. If the
1397
        assertion has a quantifier that allows zero repeats we must skip over
1398
        the BRAZERO. This is a lunatic thing to do, but somebody did! */
1399
1400
0
        if (*ecode == OP_BRAZERO) ecode++;
1401
0
        ecode += GET(ecode, 1);
1402
0
        while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1403
0
        ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
1404
0
        }
1405
1406
      /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1407
      assertion; it is therefore treated as NOMATCH. Any other return is an
1408
      error. */
1409
1410
0
      else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1411
0
        {
1412
0
        RRETURN(rrc);         /* Need braces because of following else */
1413
0
        }
1414
0
      break;
1415
0
      }
1416
1417
    /* Choose branch according to the condition */
1418
1419
0
    ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
1420
1421
    /* We are now at the branch that is to be obeyed. As there is only one, we
1422
    can use tail recursion to avoid using another stack frame, except when
1423
    there is unlimited repeat of a possibly empty group. In the latter case, a
1424
    recursive call to match() is always required, unless the second alternative
1425
    doesn't exist, in which case we can just plough on. Note that, for
1426
    compatibility with Perl, the | in a conditional group is NOT treated as
1427
    creating two alternatives. If a THEN is encountered in the branch, it
1428
    propagates out to the enclosing alternative (unless nested in a deeper set
1429
    of alternatives, of course). */
1430
1431
0
    if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
1432
0
      {
1433
0
      if (op != OP_SCOND)
1434
0
        {
1435
0
        goto TAIL_RECURSE;
1436
0
        }
1437
1438
0
      md->match_function_type = MATCH_CBEGROUP;
1439
0
      RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
1440
0
      RRETURN(rrc);
1441
0
      }
1442
1443
     /* Condition false & no alternative; continue after the group. */
1444
1445
0
    else
1446
0
      {
1447
0
      }
1448
0
    break;
1449
1450
1451
    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1452
    to close any currently open capturing brackets. */
1453
1454
0
    case OP_CLOSE:
1455
0
    number = GET2(ecode, 1);   /* Must be less than 65536 */
1456
0
    offset = number << 1;
1457
1458
#ifdef PCRE_DEBUG
1459
      printf("end bracket %d at *ACCEPT", number);
1460
      printf("\n");
1461
#endif
1462
1463
0
    md->capture_last = (md->capture_last & OVFLMASK) | number;
1464
0
    if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1465
0
      {
1466
0
      md->offset_vector[offset] =
1467
0
        md->offset_vector[md->offset_end - number];
1468
0
      md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1469
1470
      /* If this group is at or above the current highwater mark, ensure that
1471
      any groups between the current high water mark and this group are marked
1472
      unset and then update the high water mark. */
1473
1474
0
      if (offset >= offset_top)
1475
0
        {
1476
0
        register int *iptr = md->offset_vector + offset_top;
1477
0
        register int *iend = md->offset_vector + offset;
1478
0
        while (iptr < iend) *iptr++ = -1;
1479
0
        offset_top = offset + 2;
1480
0
        }
1481
0
      }
1482
0
    ecode += 1 + IMM2_SIZE;
1483
0
    break;
1484
1485
1486
    /* End of the pattern, either real or forced. */
1487
1488
0
    case OP_END:
1489
0
    case OP_ACCEPT:
1490
0
    case OP_ASSERT_ACCEPT:
1491
1492
    /* If we have matched an empty string, fail if not in an assertion and not
1493
    in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1494
    is set and we have matched at the start of the subject. In both cases,
1495
    backtracking will then try other alternatives, if any. */
1496
1497
0
    if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1498
0
         md->recursive == NULL &&
1499
0
         (md->notempty ||
1500
0
           (md->notempty_atstart &&
1501
0
             mstart == md->start_subject + md->start_offset)))
1502
0
      RRETURN(MATCH_NOMATCH);
1503
1504
    /* Otherwise, we have a match. */
1505
1506
0
    md->end_match_ptr = eptr;           /* Record where we ended */
1507
0
    md->end_offset_top = offset_top;    /* and how many extracts were taken */
1508
0
    md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1509
1510
    /* For some reason, the macros don't work properly if an expression is
1511
    given as the argument to RRETURN when the heap is in use. */
1512
1513
0
    rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1514
0
    RRETURN(rrc);
1515
1516
    /* Assertion brackets. Check the alternative branches in turn - the
1517
    matching won't pass the KET for an assertion. If any one branch matches,
1518
    the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1519
    start of each branch to move the current point backwards, so the code at
1520
    this level is identical to the lookahead case. When the assertion is part
1521
    of a condition, we want to return immediately afterwards. The caller of
1522
    this incarnation of the match() function will have set MATCH_CONDASSERT in
1523
    md->match_function type, and one of these opcodes will be the first opcode
1524
    that is processed. We use a local variable that is preserved over calls to
1525
    match() to remember this case. */
1526
1527
0
    case OP_ASSERT:
1528
0
    case OP_ASSERTBACK:
1529
0
    save_mark = md->mark;
1530
0
    if (md->match_function_type == MATCH_CONDASSERT)
1531
0
      {
1532
0
      condassert = TRUE;
1533
0
      md->match_function_type = 0;
1534
0
      }
1535
0
    else condassert = FALSE;
1536
1537
    /* Loop for each branch */
1538
1539
0
    do
1540
0
      {
1541
0
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1542
1543
      /* A match means that the assertion is true; break out of the loop
1544
      that matches its alternatives. */
1545
1546
0
      if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1547
0
        {
1548
0
        mstart = md->start_match_ptr;   /* In case \K reset it */
1549
0
        break;
1550
0
        }
1551
1552
      /* If not matched, restore the previous mark setting. */
1553
1554
0
      md->mark = save_mark;
1555
1556
      /* See comment in the code for capturing groups above about handling
1557
      THEN. */
1558
1559
0
      if (rrc == MATCH_THEN)
1560
0
        {
1561
0
        next = ecode + GET(ecode,1);
1562
0
        if (md->start_match_ptr < next &&
1563
0
            (*ecode == OP_ALT || *next == OP_ALT))
1564
0
          rrc = MATCH_NOMATCH;
1565
0
        }
1566
1567
      /* Anything other than NOMATCH causes the entire assertion to fail,
1568
      passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1569
      uncaptured THEN, which means they take their normal effect. This
1570
      consistent approach does not always have exactly the same effect as in
1571
      Perl. */
1572
1573
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1574
0
      ecode += GET(ecode, 1);
1575
0
      }
1576
0
    while (*ecode == OP_ALT);   /* Continue for next alternative */
1577
1578
    /* If we have tried all the alternative branches, the assertion has
1579
    failed. If not, we broke out after a match. */
1580
1581
0
    if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1582
1583
    /* If checking an assertion for a condition, return MATCH_MATCH. */
1584
1585
0
    if (condassert) RRETURN(MATCH_MATCH);
1586
1587
    /* Continue from after a successful assertion, updating the offsets high
1588
    water mark, since extracts may have been taken during the assertion. */
1589
1590
0
    do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1591
0
    ecode += 1 + LINK_SIZE;
1592
0
    offset_top = md->end_offset_top;
1593
0
    continue;
1594
1595
    /* Negative assertion: all branches must fail to match for the assertion to
1596
    succeed. */
1597
1598
0
    case OP_ASSERT_NOT:
1599
0
    case OP_ASSERTBACK_NOT:
1600
0
    save_mark = md->mark;
1601
0
    if (md->match_function_type == MATCH_CONDASSERT)
1602
0
      {
1603
0
      condassert = TRUE;
1604
0
      md->match_function_type = 0;
1605
0
      }
1606
0
    else condassert = FALSE;
1607
1608
    /* Loop for each alternative branch. */
1609
1610
0
    do
1611
0
      {
1612
0
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1613
0
      md->mark = save_mark;   /* Always restore the mark setting */
1614
1615
0
      switch(rrc)
1616
0
        {
1617
0
        case MATCH_MATCH:            /* A successful match means */
1618
0
        case MATCH_ACCEPT:           /* the assertion has failed. */
1619
0
        RRETURN(MATCH_NOMATCH);
1620
1621
0
        case MATCH_NOMATCH:          /* Carry on with next branch */
1622
0
        break;
1623
1624
        /* See comment in the code for capturing groups above about handling
1625
        THEN. */
1626
1627
0
        case MATCH_THEN:
1628
0
        next = ecode + GET(ecode,1);
1629
0
        if (md->start_match_ptr < next &&
1630
0
            (*ecode == OP_ALT || *next == OP_ALT))
1631
0
          {
1632
0
          rrc = MATCH_NOMATCH;
1633
0
          break;
1634
0
          }
1635
        /* Otherwise fall through. */
1636
1637
        /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
1638
        assertion to fail to match, without considering any more alternatives.
1639
        Failing to match means the assertion is true. This is a consistent
1640
        approach, but does not always have the same effect as in Perl. */
1641
1642
0
        case MATCH_COMMIT:
1643
0
        case MATCH_SKIP:
1644
0
        case MATCH_SKIP_ARG:
1645
0
        case MATCH_PRUNE:
1646
0
        do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1647
0
        goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
1648
1649
        /* Anything else is an error */
1650
1651
0
        default:
1652
0
        RRETURN(rrc);
1653
0
        }
1654
1655
      /* Continue with next branch */
1656
1657
0
      ecode += GET(ecode,1);
1658
0
      }
1659
0
    while (*ecode == OP_ALT);
1660
1661
    /* All branches in the assertion failed to match. */
1662
1663
0
    NEG_ASSERT_TRUE:
1664
0
    if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1665
0
    ecode += 1 + LINK_SIZE;                /* Continue with current branch */
1666
0
    continue;
1667
1668
    /* Move the subject pointer back. This occurs only at the start of
1669
    each branch of a lookbehind assertion. If we are too close to the start to
1670
    move back, this match function fails. When working with UTF-8 we move
1671
    back a number of characters, not bytes. */
1672
1673
0
    case OP_REVERSE:
1674
#ifdef SUPPORT_UTF
1675
    if (utf)
1676
      {
1677
      i = GET(ecode, 1);
1678
      while (i-- > 0)
1679
        {
1680
        eptr--;
1681
        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1682
        BACKCHAR(eptr);
1683
        }
1684
      }
1685
    else
1686
#endif
1687
1688
    /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1689
1690
0
      {
1691
0
      eptr -= GET(ecode, 1);
1692
0
      if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1693
0
      }
1694
1695
    /* Save the earliest consulted character, then skip to next op code */
1696
1697
0
    if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1698
0
    ecode += 1 + LINK_SIZE;
1699
0
    break;
1700
1701
    /* The callout item calls an external function, if one is provided, passing
1702
    details of the match so far. This is mainly for debugging, though the
1703
    function is able to force a failure. */
1704
1705
0
    case OP_CALLOUT:
1706
0
    if (PUBL(callout) != NULL)
1707
0
      {
1708
0
      PUBL(callout_block) cb;
1709
0
      cb.version          = 2;   /* Version 1 of the callout block */
1710
0
      cb.callout_number   = ecode[1];
1711
0
      cb.offset_vector    = md->offset_vector;
1712
0
#if defined COMPILE_PCRE8
1713
0
      cb.subject          = (PCRE_SPTR)md->start_subject;
1714
#elif defined COMPILE_PCRE16
1715
      cb.subject          = (PCRE_SPTR16)md->start_subject;
1716
#elif defined COMPILE_PCRE32
1717
      cb.subject          = (PCRE_SPTR32)md->start_subject;
1718
#endif
1719
0
      cb.subject_length   = (int)(md->end_subject - md->start_subject);
1720
0
      cb.start_match      = (int)(mstart - md->start_subject);
1721
0
      cb.current_position = (int)(eptr - md->start_subject);
1722
0
      cb.pattern_position = GET(ecode, 2);
1723
0
      cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1724
0
      cb.capture_top      = offset_top/2;
1725
0
      cb.capture_last     = md->capture_last & CAPLMASK;
1726
      /* Internal change requires this for API compatibility. */
1727
0
      if (cb.capture_last == 0) cb.capture_last = -1;
1728
0
      cb.callout_data     = md->callout_data;
1729
0
      cb.mark             = md->nomatch_mark;
1730
0
      if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1731
0
      if (rrc < 0) RRETURN(rrc);
1732
0
      }
1733
0
    ecode += 2 + 2*LINK_SIZE;
1734
0
    break;
1735
1736
    /* Recursion either matches the current regex, or some subexpression. The
1737
    offset data is the offset to the starting bracket from the start of the
1738
    whole pattern. (This is so that it works from duplicated subpatterns.)
1739
1740
    The state of the capturing groups is preserved over recursion, and
1741
    re-instated afterwards. We don't know how many are started and not yet
1742
    finished (offset_top records the completed total) so we just have to save
1743
    all the potential data. There may be up to 65535 such values, which is too
1744
    large to put on the stack, but using malloc for small numbers seems
1745
    expensive. As a compromise, the stack is used when there are no more than
1746
    REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1747
1748
    There are also other values that have to be saved. We use a chained
1749
    sequence of blocks that actually live on the stack. Thanks to Robin Houston
1750
    for the original version of this logic. It has, however, been hacked around
1751
    a lot, so he is not to blame for the current way it works. */
1752
1753
0
    case OP_RECURSE:
1754
0
      {
1755
0
      recursion_info *ri;
1756
0
      unsigned int recno;
1757
1758
0
      callpat = md->start_code + GET(ecode, 1);
1759
0
      recno = (callpat == md->start_code)? 0 :
1760
0
        GET2(callpat, 1 + LINK_SIZE);
1761
1762
      /* Check for repeating a recursion without advancing the subject pointer.
1763
      This should catch convoluted mutual recursions. (Some simple cases are
1764
      caught at compile time.) */
1765
1766
0
      for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1767
0
        if (recno == ri->group_num && eptr == ri->subject_position)
1768
0
          RRETURN(PCRE_ERROR_RECURSELOOP);
1769
1770
      /* Add to "recursing stack" */
1771
1772
0
      new_recursive.group_num = recno;
1773
0
      new_recursive.saved_capture_last = md->capture_last;
1774
0
      new_recursive.subject_position = eptr;
1775
0
      new_recursive.prevrec = md->recursive;
1776
0
      md->recursive = &new_recursive;
1777
1778
      /* Where to continue from afterwards */
1779
1780
0
      ecode += 1 + LINK_SIZE;
1781
1782
      /* Now save the offset data */
1783
1784
0
      new_recursive.saved_max = md->offset_end;
1785
0
      if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1786
0
        new_recursive.offset_save = stacksave;
1787
0
      else
1788
0
        {
1789
0
        new_recursive.offset_save =
1790
0
          (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1791
0
        if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1792
0
        }
1793
0
      memcpy(new_recursive.offset_save, md->offset_vector,
1794
0
            new_recursive.saved_max * sizeof(int));
1795
1796
      /* OK, now we can do the recursion. After processing each alternative,
1797
      restore the offset data and the last captured value. If there were nested
1798
      recursions, md->recursive might be changed, so reset it before looping.
1799
      */
1800
1801
0
      DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1802
0
      cbegroup = (*callpat >= OP_SBRA);
1803
0
      do
1804
0
        {
1805
0
        if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1806
0
        RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1807
0
          md, eptrb, RM6);
1808
0
        memcpy(md->offset_vector, new_recursive.offset_save,
1809
0
            new_recursive.saved_max * sizeof(int));
1810
0
        md->capture_last = new_recursive.saved_capture_last;
1811
0
        md->recursive = new_recursive.prevrec;
1812
0
        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1813
0
          {
1814
0
          DPRINTF(("Recursion matched\n"));
1815
0
          if (new_recursive.offset_save != stacksave)
1816
0
            (PUBL(free))(new_recursive.offset_save);
1817
1818
          /* Set where we got to in the subject, and reset the start in case
1819
          it was changed by \K. This *is* propagated back out of a recursion,
1820
          for Perl compatibility. */
1821
1822
0
          eptr = md->end_match_ptr;
1823
0
          mstart = md->start_match_ptr;
1824
0
          goto RECURSION_MATCHED;        /* Exit loop; end processing */
1825
0
          }
1826
1827
        /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1828
        recursion; they cause a NOMATCH for the entire recursion. These codes
1829
        are defined in a range that can be tested for. */
1830
1831
0
        if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1832
0
          {
1833
0
          if (new_recursive.offset_save != stacksave)
1834
0
            (PUBL(free))(new_recursive.offset_save);
1835
0
          RRETURN(MATCH_NOMATCH);
1836
0
          }
1837
1838
        /* Any return code other than NOMATCH is an error. */
1839
1840
0
        if (rrc != MATCH_NOMATCH)
1841
0
          {
1842
0
          DPRINTF(("Recursion gave error %d\n", rrc));
1843
0
          if (new_recursive.offset_save != stacksave)
1844
0
            (PUBL(free))(new_recursive.offset_save);
1845
0
          RRETURN(rrc);
1846
0
          }
1847
1848
0
        md->recursive = &new_recursive;
1849
0
        callpat += GET(callpat, 1);
1850
0
        }
1851
0
      while (*callpat == OP_ALT);
1852
1853
0
      DPRINTF(("Recursion didn't match\n"));
1854
0
      md->recursive = new_recursive.prevrec;
1855
0
      if (new_recursive.offset_save != stacksave)
1856
0
        (PUBL(free))(new_recursive.offset_save);
1857
0
      RRETURN(MATCH_NOMATCH);
1858
0
      }
1859
1860
0
    RECURSION_MATCHED:
1861
0
    break;
1862
1863
    /* An alternation is the end of a branch; scan along to find the end of the
1864
    bracketed group and go to there. */
1865
1866
0
    case OP_ALT:
1867
0
    do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1868
0
    break;
1869
1870
    /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1871
    indicating that it may occur zero times. It may repeat infinitely, or not
1872
    at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1873
    with fixed upper repeat limits are compiled as a number of copies, with the
1874
    optional ones preceded by BRAZERO or BRAMINZERO. */
1875
1876
0
    case OP_BRAZERO:
1877
0
    next = ecode + 1;
1878
0
    RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1879
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1880
0
    do next += GET(next, 1); while (*next == OP_ALT);
1881
0
    ecode = next + 1 + LINK_SIZE;
1882
0
    break;
1883
1884
0
    case OP_BRAMINZERO:
1885
0
    next = ecode + 1;
1886
0
    do next += GET(next, 1); while (*next == OP_ALT);
1887
0
    RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1888
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1889
0
    ecode++;
1890
0
    break;
1891
1892
0
    case OP_SKIPZERO:
1893
0
    next = ecode+1;
1894
0
    do next += GET(next,1); while (*next == OP_ALT);
1895
0
    ecode = next + 1 + LINK_SIZE;
1896
0
    break;
1897
1898
    /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1899
    here; just jump to the group, with allow_zero set TRUE. */
1900
1901
0
    case OP_BRAPOSZERO:
1902
0
    op = *(++ecode);
1903
0
    allow_zero = TRUE;
1904
0
    if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1905
0
      goto POSSESSIVE_NON_CAPTURE;
1906
1907
    /* End of a group, repeated or non-repeating. */
1908
1909
0
    case OP_KET:
1910
0
    case OP_KETRMIN:
1911
0
    case OP_KETRMAX:
1912
0
    case OP_KETRPOS:
1913
0
    prev = ecode - GET(ecode, 1);
1914
1915
    /* If this was a group that remembered the subject start, in order to break
1916
    infinite repeats of empty string matches, retrieve the subject start from
1917
    the chain. Otherwise, set it NULL. */
1918
1919
0
    if (*prev >= OP_SBRA || *prev == OP_ONCE)
1920
0
      {
1921
0
      saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1922
0
      eptrb = eptrb->epb_prev;              /* Backup to previous group */
1923
0
      }
1924
0
    else saved_eptr = NULL;
1925
1926
    /* If we are at the end of an assertion group or a non-capturing atomic
1927
    group, stop matching and return MATCH_MATCH, but record the current high
1928
    water mark for use by positive assertions. We also need to record the match
1929
    start in case it was changed by \K. */
1930
1931
0
    if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1932
0
         *prev == OP_ONCE_NC)
1933
0
      {
1934
0
      md->end_match_ptr = eptr;      /* For ONCE_NC */
1935
0
      md->end_offset_top = offset_top;
1936
0
      md->start_match_ptr = mstart;
1937
0
      RRETURN(MATCH_MATCH);         /* Sets md->mark */
1938
0
      }
1939
1940
    /* For capturing groups we have to check the group number back at the start
1941
    and if necessary complete handling an extraction by setting the offsets and
1942
    bumping the high water mark. Whole-pattern recursion is coded as a recurse
1943
    into group 0, so it won't be picked up here. Instead, we catch it when the
1944
    OP_END is reached. Other recursion is handled here. We just have to record
1945
    the current subject position and start match pointer and give a MATCH
1946
    return. */
1947
1948
0
    if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1949
0
        *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1950
0
      {
1951
0
      number = GET2(prev, 1+LINK_SIZE);
1952
0
      offset = number << 1;
1953
1954
#ifdef PCRE_DEBUG
1955
      printf("end bracket %d", number);
1956
      printf("\n");
1957
#endif
1958
1959
      /* Handle a recursively called group. */
1960
1961
0
      if (md->recursive != NULL && md->recursive->group_num == number)
1962
0
        {
1963
0
        md->end_match_ptr = eptr;
1964
0
        md->start_match_ptr = mstart;
1965
0
        RRETURN(MATCH_MATCH);
1966
0
        }
1967
1968
      /* Deal with capturing */
1969
1970
0
      md->capture_last = (md->capture_last & OVFLMASK) | number;
1971
0
      if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1972
0
        {
1973
        /* If offset is greater than offset_top, it means that we are
1974
        "skipping" a capturing group, and that group's offsets must be marked
1975
        unset. In earlier versions of PCRE, all the offsets were unset at the
1976
        start of matching, but this doesn't work because atomic groups and
1977
        assertions can cause a value to be set that should later be unset.
1978
        Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1979
        part of the atomic group, but this is not on the final matching path,
1980
        so must be unset when 2 is set. (If there is no group 2, there is no
1981
        problem, because offset_top will then be 2, indicating no capture.) */
1982
1983
0
        if (offset > offset_top)
1984
0
          {
1985
0
          register int *iptr = md->offset_vector + offset_top;
1986
0
          register int *iend = md->offset_vector + offset;
1987
0
          while (iptr < iend) *iptr++ = -1;
1988
0
          }
1989
1990
        /* Now make the extraction */
1991
1992
0
        md->offset_vector[offset] =
1993
0
          md->offset_vector[md->offset_end - number];
1994
0
        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1995
0
        if (offset_top <= offset) offset_top = offset + 2;
1996
0
        }
1997
0
      }
1998
1999
    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
2000
    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
2001
    at a time from the outer level, thus saving stack. This must precede the
2002
    empty string test - in this case that test is done at the outer level. */
2003
2004
0
    if (*ecode == OP_KETRPOS)
2005
0
      {
2006
0
      md->start_match_ptr = mstart;    /* In case \K reset it */
2007
0
      md->end_match_ptr = eptr;
2008
0
      md->end_offset_top = offset_top;
2009
0
      RRETURN(MATCH_KETRPOS);
2010
0
      }
2011
2012
    /* For an ordinary non-repeating ket, just continue at this level. This
2013
    also happens for a repeating ket if no characters were matched in the
2014
    group. This is the forcible breaking of infinite loops as implemented in
2015
    Perl 5.005. For a non-repeating atomic group that includes captures,
2016
    establish a backup point by processing the rest of the pattern at a lower
2017
    level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
2018
    original OP_ONCE level, thereby bypassing intermediate backup points, but
2019
    resetting any captures that happened along the way. */
2020
2021
0
    if (*ecode == OP_KET || eptr == saved_eptr)
2022
0
      {
2023
0
      if (*prev == OP_ONCE)
2024
0
        {
2025
0
        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
2026
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2027
0
        md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2028
0
        RRETURN(MATCH_ONCE);
2029
0
        }
2030
0
      ecode += 1 + LINK_SIZE;    /* Carry on at this level */
2031
0
      break;
2032
0
      }
2033
2034
    /* The normal repeating kets try the rest of the pattern or restart from
2035
    the preceding bracket, in the appropriate order. In the second case, we can
2036
    use tail recursion to avoid using another stack frame, unless we have an
2037
    an atomic group or an unlimited repeat of a group that can match an empty
2038
    string. */
2039
2040
0
    if (*ecode == OP_KETRMIN)
2041
0
      {
2042
0
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
2043
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2044
0
      if (*prev == OP_ONCE)
2045
0
        {
2046
0
        RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2047
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2048
0
        md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2049
0
        RRETURN(MATCH_ONCE);
2050
0
        }
2051
0
      if (*prev >= OP_SBRA)    /* Could match an empty string */
2052
0
        {
2053
0
        RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2054
0
        RRETURN(rrc);
2055
0
        }
2056
0
      ecode = prev;
2057
0
      goto TAIL_RECURSE;
2058
0
      }
2059
0
    else  /* OP_KETRMAX */
2060
0
      {
2061
0
      RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2062
0
      if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2063
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2064
0
      if (*prev == OP_ONCE)
2065
0
        {
2066
0
        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2067
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2068
0
        md->once_target = prev;
2069
0
        RRETURN(MATCH_ONCE);
2070
0
        }
2071
0
      ecode += 1 + LINK_SIZE;
2072
0
      goto TAIL_RECURSE;
2073
0
      }
2074
    /* Control never gets here */
2075
2076
    /* Not multiline mode: start of subject assertion, unless notbol. */
2077
2078
0
    case OP_CIRC:
2079
0
    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2080
2081
    /* Start of subject assertion */
2082
2083
0
    case OP_SOD:
2084
0
    if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2085
0
    ecode++;
2086
0
    break;
2087
2088
    /* Multiline mode: start of subject unless notbol, or after any newline. */
2089
2090
0
    case OP_CIRCM:
2091
0
    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2092
0
    if (eptr != md->start_subject &&
2093
0
        (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2094
0
      RRETURN(MATCH_NOMATCH);
2095
0
    ecode++;
2096
0
    break;
2097
2098
    /* Start of match assertion */
2099
2100
0
    case OP_SOM:
2101
0
    if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2102
0
    ecode++;
2103
0
    break;
2104
2105
    /* Reset the start of match point */
2106
2107
0
    case OP_SET_SOM:
2108
0
    mstart = eptr;
2109
0
    ecode++;
2110
0
    break;
2111
2112
    /* Multiline mode: assert before any newline, or before end of subject
2113
    unless noteol is set. */
2114
2115
0
    case OP_DOLLM:
2116
0
    if (eptr < md->end_subject)
2117
0
      {
2118
0
      if (!IS_NEWLINE(eptr))
2119
0
        {
2120
0
        if (md->partial != 0 &&
2121
0
            eptr + 1 >= md->end_subject &&
2122
0
            NLBLOCK->nltype == NLTYPE_FIXED &&
2123
0
            NLBLOCK->nllen == 2 &&
2124
0
            UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2125
0
          {
2126
0
          md->hitend = TRUE;
2127
0
          if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2128
0
          }
2129
0
        RRETURN(MATCH_NOMATCH);
2130
0
        }
2131
0
      }
2132
0
    else
2133
0
      {
2134
0
      if (md->noteol) RRETURN(MATCH_NOMATCH);
2135
0
      SCHECK_PARTIAL();
2136
0
      }
2137
0
    ecode++;
2138
0
    break;
2139
2140
    /* Not multiline mode: assert before a terminating newline or before end of
2141
    subject unless noteol is set. */
2142
2143
0
    case OP_DOLL:
2144
0
    if (md->noteol) RRETURN(MATCH_NOMATCH);
2145
0
    if (!md->endonly) goto ASSERT_NL_OR_EOS;
2146
2147
    /* ... else fall through for endonly */
2148
2149
    /* End of subject assertion (\z) */
2150
2151
0
    case OP_EOD:
2152
0
    if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2153
0
    SCHECK_PARTIAL();
2154
0
    ecode++;
2155
0
    break;
2156
2157
    /* End of subject or ending \n assertion (\Z) */
2158
2159
0
    case OP_EODN:
2160
0
    ASSERT_NL_OR_EOS:
2161
0
    if (eptr < md->end_subject &&
2162
0
        (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2163
0
      {
2164
0
      if (md->partial != 0 &&
2165
0
          eptr + 1 >= md->end_subject &&
2166
0
          NLBLOCK->nltype == NLTYPE_FIXED &&
2167
0
          NLBLOCK->nllen == 2 &&
2168
0
          UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2169
0
        {
2170
0
        md->hitend = TRUE;
2171
0
        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2172
0
        }
2173
0
      RRETURN(MATCH_NOMATCH);
2174
0
      }
2175
2176
    /* Either at end of string or \n before end. */
2177
2178
0
    SCHECK_PARTIAL();
2179
0
    ecode++;
2180
0
    break;
2181
2182
    /* Word boundary assertions */
2183
2184
0
    case OP_NOT_WORD_BOUNDARY:
2185
0
    case OP_WORD_BOUNDARY:
2186
0
      {
2187
2188
      /* Find out if the previous and current characters are "word" characters.
2189
      It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2190
      be "non-word" characters. Remember the earliest consulted character for
2191
      partial matching. */
2192
2193
#ifdef SUPPORT_UTF
2194
      if (utf)
2195
        {
2196
        /* Get status of previous character */
2197
2198
        if (eptr == md->start_subject) prev_is_word = FALSE; else
2199
          {
2200
          PCRE_PUCHAR lastptr = eptr - 1;
2201
          BACKCHAR(lastptr);
2202
          if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2203
          GETCHAR(c, lastptr);
2204
#ifdef SUPPORT_UCP
2205
          if (md->use_ucp)
2206
            {
2207
            if (c == '_') prev_is_word = TRUE; else
2208
              {
2209
              int cat = UCD_CATEGORY(c);
2210
              prev_is_word = (cat == ucp_L || cat == ucp_N);
2211
              }
2212
            }
2213
          else
2214
#endif
2215
          prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2216
          }
2217
2218
        /* Get status of next character */
2219
2220
        if (eptr >= md->end_subject)
2221
          {
2222
          SCHECK_PARTIAL();
2223
          cur_is_word = FALSE;
2224
          }
2225
        else
2226
          {
2227
          GETCHAR(c, eptr);
2228
#ifdef SUPPORT_UCP
2229
          if (md->use_ucp)
2230
            {
2231
            if (c == '_') cur_is_word = TRUE; else
2232
              {
2233
              int cat = UCD_CATEGORY(c);
2234
              cur_is_word = (cat == ucp_L || cat == ucp_N);
2235
              }
2236
            }
2237
          else
2238
#endif
2239
          cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2240
          }
2241
        }
2242
      else
2243
#endif
2244
2245
      /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2246
      consistency with the behaviour of \w we do use it in this case. */
2247
2248
0
        {
2249
        /* Get status of previous character */
2250
2251
0
        if (eptr == md->start_subject) prev_is_word = FALSE; else
2252
0
          {
2253
0
          if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2254
#ifdef SUPPORT_UCP
2255
          if (md->use_ucp)
2256
            {
2257
            c = eptr[-1];
2258
            if (c == '_') prev_is_word = TRUE; else
2259
              {
2260
              int cat = UCD_CATEGORY(c);
2261
              prev_is_word = (cat == ucp_L || cat == ucp_N);
2262
              }
2263
            }
2264
          else
2265
#endif
2266
0
          prev_is_word = MAX_255(eptr[-1])
2267
0
            && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2268
0
          }
2269
2270
        /* Get status of next character */
2271
2272
0
        if (eptr >= md->end_subject)
2273
0
          {
2274
0
          SCHECK_PARTIAL();
2275
0
          cur_is_word = FALSE;
2276
0
          }
2277
0
        else
2278
#ifdef SUPPORT_UCP
2279
        if (md->use_ucp)
2280
          {
2281
          c = *eptr;
2282
          if (c == '_') cur_is_word = TRUE; else
2283
            {
2284
            int cat = UCD_CATEGORY(c);
2285
            cur_is_word = (cat == ucp_L || cat == ucp_N);
2286
            }
2287
          }
2288
        else
2289
#endif
2290
0
        cur_is_word = MAX_255(*eptr)
2291
0
          && ((md->ctypes[*eptr] & ctype_word) != 0);
2292
0
        }
2293
2294
      /* Now see if the situation is what we want */
2295
2296
0
      if ((*ecode++ == OP_WORD_BOUNDARY)?
2297
0
           cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2298
0
        RRETURN(MATCH_NOMATCH);
2299
0
      }
2300
0
    break;
2301
2302
    /* Match any single character type except newline; have to take care with
2303
    CRLF newlines and partial matching. */
2304
2305
0
    case OP_ANY:
2306
0
    if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2307
0
    if (md->partial != 0 &&
2308
0
        eptr == md->end_subject - 1 &&
2309
0
        NLBLOCK->nltype == NLTYPE_FIXED &&
2310
0
        NLBLOCK->nllen == 2 &&
2311
0
        UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2312
0
      {
2313
0
      md->hitend = TRUE;
2314
0
      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2315
0
      }
2316
2317
    /* Fall through */
2318
2319
    /* Match any single character whatsoever. */
2320
2321
0
    case OP_ALLANY:
2322
0
    if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2323
0
      {                            /* not be updated before SCHECK_PARTIAL. */
2324
0
      SCHECK_PARTIAL();
2325
0
      RRETURN(MATCH_NOMATCH);
2326
0
      }
2327
0
    eptr++;
2328
#ifdef SUPPORT_UTF
2329
    if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2330
#endif
2331
0
    ecode++;
2332
0
    break;
2333
2334
    /* Match a single byte, even in UTF-8 mode. This opcode really does match
2335
    any byte, even newline, independent of the setting of PCRE_DOTALL. */
2336
2337
0
    case OP_ANYBYTE:
2338
0
    if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2339
0
      {                            /* not be updated before SCHECK_PARTIAL. */
2340
0
      SCHECK_PARTIAL();
2341
0
      RRETURN(MATCH_NOMATCH);
2342
0
      }
2343
0
    eptr++;
2344
0
    ecode++;
2345
0
    break;
2346
2347
0
    case OP_NOT_DIGIT:
2348
0
    if (eptr >= md->end_subject)
2349
0
      {
2350
0
      SCHECK_PARTIAL();
2351
0
      RRETURN(MATCH_NOMATCH);
2352
0
      }
2353
0
    GETCHARINCTEST(c, eptr);
2354
0
    if (
2355
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2356
       c < 256 &&
2357
#endif
2358
0
       (md->ctypes[c] & ctype_digit) != 0
2359
0
       )
2360
0
      RRETURN(MATCH_NOMATCH);
2361
0
    ecode++;
2362
0
    break;
2363
2364
0
    case OP_DIGIT:
2365
0
    if (eptr >= md->end_subject)
2366
0
      {
2367
0
      SCHECK_PARTIAL();
2368
0
      RRETURN(MATCH_NOMATCH);
2369
0
      }
2370
0
    GETCHARINCTEST(c, eptr);
2371
0
    if (
2372
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2373
       c > 255 ||
2374
#endif
2375
0
       (md->ctypes[c] & ctype_digit) == 0
2376
0
       )
2377
0
      RRETURN(MATCH_NOMATCH);
2378
0
    ecode++;
2379
0
    break;
2380
2381
0
    case OP_NOT_WHITESPACE:
2382
0
    if (eptr >= md->end_subject)
2383
0
      {
2384
0
      SCHECK_PARTIAL();
2385
0
      RRETURN(MATCH_NOMATCH);
2386
0
      }
2387
0
    GETCHARINCTEST(c, eptr);
2388
0
    if (
2389
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2390
       c < 256 &&
2391
#endif
2392
0
       (md->ctypes[c] & ctype_space) != 0
2393
0
       )
2394
0
      RRETURN(MATCH_NOMATCH);
2395
0
    ecode++;
2396
0
    break;
2397
2398
0
    case OP_WHITESPACE:
2399
0
    if (eptr >= md->end_subject)
2400
0
      {
2401
0
      SCHECK_PARTIAL();
2402
0
      RRETURN(MATCH_NOMATCH);
2403
0
      }
2404
0
    GETCHARINCTEST(c, eptr);
2405
0
    if (
2406
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2407
       c > 255 ||
2408
#endif
2409
0
       (md->ctypes[c] & ctype_space) == 0
2410
0
       )
2411
0
      RRETURN(MATCH_NOMATCH);
2412
0
    ecode++;
2413
0
    break;
2414
2415
0
    case OP_NOT_WORDCHAR:
2416
0
    if (eptr >= md->end_subject)
2417
0
      {
2418
0
      SCHECK_PARTIAL();
2419
0
      RRETURN(MATCH_NOMATCH);
2420
0
      }
2421
0
    GETCHARINCTEST(c, eptr);
2422
0
    if (
2423
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2424
       c < 256 &&
2425
#endif
2426
0
       (md->ctypes[c] & ctype_word) != 0
2427
0
       )
2428
0
      RRETURN(MATCH_NOMATCH);
2429
0
    ecode++;
2430
0
    break;
2431
2432
0
    case OP_WORDCHAR:
2433
0
    if (eptr >= md->end_subject)
2434
0
      {
2435
0
      SCHECK_PARTIAL();
2436
0
      RRETURN(MATCH_NOMATCH);
2437
0
      }
2438
0
    GETCHARINCTEST(c, eptr);
2439
0
    if (
2440
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2441
       c > 255 ||
2442
#endif
2443
0
       (md->ctypes[c] & ctype_word) == 0
2444
0
       )
2445
0
      RRETURN(MATCH_NOMATCH);
2446
0
    ecode++;
2447
0
    break;
2448
2449
0
    case OP_ANYNL:
2450
0
    if (eptr >= md->end_subject)
2451
0
      {
2452
0
      SCHECK_PARTIAL();
2453
0
      RRETURN(MATCH_NOMATCH);
2454
0
      }
2455
0
    GETCHARINCTEST(c, eptr);
2456
0
    switch(c)
2457
0
      {
2458
0
      default: RRETURN(MATCH_NOMATCH);
2459
2460
0
      case CHAR_CR:
2461
0
      if (eptr >= md->end_subject)
2462
0
        {
2463
0
        SCHECK_PARTIAL();
2464
0
        }
2465
0
      else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
2466
0
      break;
2467
2468
0
      case CHAR_LF:
2469
0
      break;
2470
2471
0
      case CHAR_VT:
2472
0
      case CHAR_FF:
2473
0
      case CHAR_NEL:
2474
0
#ifndef EBCDIC
2475
0
      case 0x2028:
2476
0
      case 0x2029:
2477
0
#endif  /* Not EBCDIC */
2478
0
      if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2479
0
      break;
2480
0
      }
2481
0
    ecode++;
2482
0
    break;
2483
2484
0
    case OP_NOT_HSPACE:
2485
0
    if (eptr >= md->end_subject)
2486
0
      {
2487
0
      SCHECK_PARTIAL();
2488
0
      RRETURN(MATCH_NOMATCH);
2489
0
      }
2490
0
    GETCHARINCTEST(c, eptr);
2491
0
    switch(c)
2492
0
      {
2493
0
      HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2494
0
      default: break;
2495
0
      }
2496
0
    ecode++;
2497
0
    break;
2498
2499
0
    case OP_HSPACE:
2500
0
    if (eptr >= md->end_subject)
2501
0
      {
2502
0
      SCHECK_PARTIAL();
2503
0
      RRETURN(MATCH_NOMATCH);
2504
0
      }
2505
0
    GETCHARINCTEST(c, eptr);
2506
0
    switch(c)
2507
0
      {
2508
0
      HSPACE_CASES: break;  /* Byte and multibyte cases */
2509
0
      default: RRETURN(MATCH_NOMATCH);
2510
0
      }
2511
0
    ecode++;
2512
0
    break;
2513
2514
0
    case OP_NOT_VSPACE:
2515
0
    if (eptr >= md->end_subject)
2516
0
      {
2517
0
      SCHECK_PARTIAL();
2518
0
      RRETURN(MATCH_NOMATCH);
2519
0
      }
2520
0
    GETCHARINCTEST(c, eptr);
2521
0
    switch(c)
2522
0
      {
2523
0
      VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2524
0
      default: break;
2525
0
      }
2526
0
    ecode++;
2527
0
    break;
2528
2529
0
    case OP_VSPACE:
2530
0
    if (eptr >= md->end_subject)
2531
0
      {
2532
0
      SCHECK_PARTIAL();
2533
0
      RRETURN(MATCH_NOMATCH);
2534
0
      }
2535
0
    GETCHARINCTEST(c, eptr);
2536
0
    switch(c)
2537
0
      {
2538
0
      VSPACE_CASES: break;
2539
0
      default: RRETURN(MATCH_NOMATCH);
2540
0
      }
2541
0
    ecode++;
2542
0
    break;
2543
2544
#ifdef SUPPORT_UCP
2545
    /* Check the next character by Unicode property. We will get here only
2546
    if the support is in the binary; otherwise a compile-time error occurs. */
2547
2548
    case OP_PROP:
2549
    case OP_NOTPROP:
2550
    if (eptr >= md->end_subject)
2551
      {
2552
      SCHECK_PARTIAL();
2553
      RRETURN(MATCH_NOMATCH);
2554
      }
2555
    GETCHARINCTEST(c, eptr);
2556
      {
2557
      const pcre_uint32 *cp;
2558
      const ucd_record *prop = GET_UCD(c);
2559
2560
      switch(ecode[1])
2561
        {
2562
        case PT_ANY:
2563
        if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2564
        break;
2565
2566
        case PT_LAMP:
2567
        if ((prop->chartype == ucp_Lu ||
2568
             prop->chartype == ucp_Ll ||
2569
             prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2570
          RRETURN(MATCH_NOMATCH);
2571
        break;
2572
2573
        case PT_GC:
2574
        if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2575
          RRETURN(MATCH_NOMATCH);
2576
        break;
2577
2578
        case PT_PC:
2579
        if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2580
          RRETURN(MATCH_NOMATCH);
2581
        break;
2582
2583
        case PT_SC:
2584
        if ((ecode[2] != prop->script) == (op == OP_PROP))
2585
          RRETURN(MATCH_NOMATCH);
2586
        break;
2587
2588
        /* These are specials */
2589
2590
        case PT_ALNUM:
2591
        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2592
             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2593
          RRETURN(MATCH_NOMATCH);
2594
        break;
2595
2596
        /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2597
        which means that Perl space and POSIX space are now identical. PCRE
2598
        was changed at release 8.34. */
2599
2600
        case PT_SPACE:    /* Perl space */
2601
        case PT_PXSPACE:  /* POSIX space */
2602
        switch(c)
2603
          {
2604
          HSPACE_CASES:
2605
          VSPACE_CASES:
2606
          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2607
          break;
2608
2609
          default:
2610
          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2611
            (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2612
          break;
2613
          }
2614
        break;
2615
2616
        case PT_WORD:
2617
        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2618
             PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2619
             c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2620
          RRETURN(MATCH_NOMATCH);
2621
        break;
2622
2623
        case PT_CLIST:
2624
        cp = PRIV(ucd_caseless_sets) + ecode[2];
2625
        for (;;)
2626
          {
2627
          if (c < *cp)
2628
            { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2629
          if (c == *cp++)
2630
            { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2631
          }
2632
        break;
2633
2634
        case PT_UCNC:
2635
        if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2636
             c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2637
             c >= 0xe000) == (op == OP_NOTPROP))
2638
          RRETURN(MATCH_NOMATCH);
2639
        break;
2640
2641
        /* This should never occur */
2642
2643
        default:
2644
        RRETURN(PCRE_ERROR_INTERNAL);
2645
        }
2646
2647
      ecode += 3;
2648
      }
2649
    break;
2650
2651
    /* Match an extended Unicode sequence. We will get here only if the support
2652
    is in the binary; otherwise a compile-time error occurs. */
2653
2654
    case OP_EXTUNI:
2655
    if (eptr >= md->end_subject)
2656
      {
2657
      SCHECK_PARTIAL();
2658
      RRETURN(MATCH_NOMATCH);
2659
      }
2660
    else
2661
      {
2662
      int lgb, rgb;
2663
      GETCHARINCTEST(c, eptr);
2664
      lgb = UCD_GRAPHBREAK(c);
2665
      while (eptr < md->end_subject)
2666
        {
2667
        int len = 1;
2668
        if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2669
        rgb = UCD_GRAPHBREAK(c);
2670
        if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2671
        lgb = rgb;
2672
        eptr += len;
2673
        }
2674
      }
2675
    CHECK_PARTIAL();
2676
    ecode++;
2677
    break;
2678
#endif  /* SUPPORT_UCP */
2679
2680
2681
    /* Match a back reference, possibly repeatedly. Look past the end of the
2682
    item to see if there is repeat information following. The code is similar
2683
    to that for character classes, but repeated for efficiency. Then obey
2684
    similar code to character type repeats - written out again for speed.
2685
    However, if the referenced string is the empty string, always treat
2686
    it as matched, any number of times (otherwise there could be infinite
2687
    loops). If the reference is unset, there are two possibilities:
2688
2689
    (a) In the default, Perl-compatible state, set the length negative;
2690
    this ensures that every attempt at a match fails. We can't just fail
2691
    here, because of the possibility of quantifiers with zero minima.
2692
2693
    (b) If the JavaScript compatibility flag is set, set the length to zero
2694
    so that the back reference matches an empty string.
2695
2696
    Otherwise, set the length to the length of what was matched by the
2697
    referenced subpattern.
2698
2699
    The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
2700
    or to a non-duplicated named group. For a duplicated named group, OP_DNREF
2701
    and OP_DNREFI are used. In this case we must scan the list of groups to
2702
    which the name refers, and use the first one that is set. */
2703
2704
0
    case OP_DNREF:
2705
0
    case OP_DNREFI:
2706
0
    caseless = op == OP_DNREFI;
2707
0
      {
2708
0
      int count = GET2(ecode, 1+IMM2_SIZE);
2709
0
      pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
2710
0
      ecode += 1 + 2*IMM2_SIZE;
2711
2712
      /* Setting the default length first and initializing 'offset' avoids
2713
      compiler warnings in the REF_REPEAT code. */
2714
2715
0
      length = (md->jscript_compat)? 0 : -1;
2716
0
      offset = 0;
2717
2718
0
      while (count-- > 0)
2719
0
        {
2720
0
        offset = GET2(slot, 0) << 1;
2721
0
        if (offset < offset_top && md->offset_vector[offset] >= 0)
2722
0
          {
2723
0
          length = md->offset_vector[offset+1] - md->offset_vector[offset];
2724
0
          break;
2725
0
          }
2726
0
        slot += md->name_entry_size;
2727
0
        }
2728
0
      }
2729
0
    goto REF_REPEAT;
2730
2731
0
    case OP_REF:
2732
0
    case OP_REFI:
2733
0
    caseless = op == OP_REFI;
2734
0
    offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2735
0
    ecode += 1 + IMM2_SIZE;
2736
0
    if (offset >= offset_top || md->offset_vector[offset] < 0)
2737
0
      length = (md->jscript_compat)? 0 : -1;
2738
0
    else
2739
0
      length = md->offset_vector[offset+1] - md->offset_vector[offset];
2740
2741
    /* Set up for repetition, or handle the non-repeated case */
2742
2743
0
    REF_REPEAT:
2744
0
    switch (*ecode)
2745
0
      {
2746
0
      case OP_CRSTAR:
2747
0
      case OP_CRMINSTAR:
2748
0
      case OP_CRPLUS:
2749
0
      case OP_CRMINPLUS:
2750
0
      case OP_CRQUERY:
2751
0
      case OP_CRMINQUERY:
2752
0
      c = *ecode++ - OP_CRSTAR;
2753
0
      minimize = (c & 1) != 0;
2754
0
      min = rep_min[c];                 /* Pick up values from tables; */
2755
0
      max = rep_max[c];                 /* zero for max => infinity */
2756
0
      if (max == 0) max = INT_MAX;
2757
0
      break;
2758
2759
0
      case OP_CRRANGE:
2760
0
      case OP_CRMINRANGE:
2761
0
      minimize = (*ecode == OP_CRMINRANGE);
2762
0
      min = GET2(ecode, 1);
2763
0
      max = GET2(ecode, 1 + IMM2_SIZE);
2764
0
      if (max == 0) max = INT_MAX;
2765
0
      ecode += 1 + 2 * IMM2_SIZE;
2766
0
      break;
2767
2768
0
      default:               /* No repeat follows */
2769
0
      if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2770
0
        {
2771
0
        if (length == -2) eptr = md->end_subject;   /* Partial match */
2772
0
        CHECK_PARTIAL();
2773
0
        RRETURN(MATCH_NOMATCH);
2774
0
        }
2775
0
      eptr += length;
2776
0
      continue;              /* With the main loop */
2777
0
      }
2778
2779
    /* Handle repeated back references. If the length of the reference is
2780
    zero, just continue with the main loop. If the length is negative, it
2781
    means the reference is unset in non-Java-compatible mode. If the minimum is
2782
    zero, we can continue at the same level without recursion. For any other
2783
    minimum, carrying on will result in NOMATCH. */
2784
2785
0
    if (length == 0) continue;
2786
0
    if (length < 0 && min == 0) continue;
2787
2788
    /* First, ensure the minimum number of matches are present. We get back
2789
    the length of the reference string explicitly rather than passing the
2790
    address of eptr, so that eptr can be a register variable. */
2791
2792
0
    for (i = 1; i <= min; i++)
2793
0
      {
2794
0
      int slength;
2795
0
      if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2796
0
        {
2797
0
        if (slength == -2) eptr = md->end_subject;   /* Partial match */
2798
0
        CHECK_PARTIAL();
2799
0
        RRETURN(MATCH_NOMATCH);
2800
0
        }
2801
0
      eptr += slength;
2802
0
      }
2803
2804
    /* If min = max, continue at the same level without recursion.
2805
    They are not both allowed to be zero. */
2806
2807
0
    if (min == max) continue;
2808
2809
    /* If minimizing, keep trying and advancing the pointer */
2810
2811
0
    if (minimize)
2812
0
      {
2813
0
      for (fi = min;; fi++)
2814
0
        {
2815
0
        int slength;
2816
0
        RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2817
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2818
0
        if (fi >= max) RRETURN(MATCH_NOMATCH);
2819
0
        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2820
0
          {
2821
0
          if (slength == -2) eptr = md->end_subject;   /* Partial match */
2822
0
          CHECK_PARTIAL();
2823
0
          RRETURN(MATCH_NOMATCH);
2824
0
          }
2825
0
        eptr += slength;
2826
0
        }
2827
      /* Control never gets here */
2828
0
      }
2829
2830
    /* If maximizing, find the longest string and work backwards */
2831
2832
0
    else
2833
0
      {
2834
0
      pp = eptr;
2835
0
      for (i = min; i < max; i++)
2836
0
        {
2837
0
        int slength;
2838
0
        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2839
0
          {
2840
          /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2841
          the soft partial matching case. */
2842
2843
0
          if (slength == -2 && md->partial != 0 &&
2844
0
              md->end_subject > md->start_used_ptr)
2845
0
            {
2846
0
            md->hitend = TRUE;
2847
0
            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2848
0
            }
2849
0
          break;
2850
0
          }
2851
0
        eptr += slength;
2852
0
        }
2853
2854
0
      while (eptr >= pp)
2855
0
        {
2856
0
        RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2857
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2858
0
        eptr -= length;
2859
0
        }
2860
0
      RRETURN(MATCH_NOMATCH);
2861
0
      }
2862
    /* Control never gets here */
2863
2864
    /* Match a bit-mapped character class, possibly repeatedly. This op code is
2865
    used when all the characters in the class have values in the range 0-255,
2866
    and either the matching is caseful, or the characters are in the range
2867
    0-127 when UTF-8 processing is enabled. The only difference between
2868
    OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2869
    encountered.
2870
2871
    First, look past the end of the item to see if there is repeat information
2872
    following. Then obey similar code to character type repeats - written out
2873
    again for speed. */
2874
2875
0
    case OP_NCLASS:
2876
0
    case OP_CLASS:
2877
0
      {
2878
      /* The data variable is saved across frames, so the byte map needs to
2879
      be stored there. */
2880
0
#define BYTE_MAP ((pcre_uint8 *)data)
2881
0
      data = ecode + 1;                /* Save for matching */
2882
0
      ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2883
2884
0
      switch (*ecode)
2885
0
        {
2886
0
        case OP_CRSTAR:
2887
0
        case OP_CRMINSTAR:
2888
0
        case OP_CRPLUS:
2889
0
        case OP_CRMINPLUS:
2890
0
        case OP_CRQUERY:
2891
0
        case OP_CRMINQUERY:
2892
0
        case OP_CRPOSSTAR:
2893
0
        case OP_CRPOSPLUS:
2894
0
        case OP_CRPOSQUERY:
2895
0
        c = *ecode++ - OP_CRSTAR;
2896
0
        if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
2897
0
        else possessive = TRUE;
2898
0
        min = rep_min[c];                 /* Pick up values from tables; */
2899
0
        max = rep_max[c];                 /* zero for max => infinity */
2900
0
        if (max == 0) max = INT_MAX;
2901
0
        break;
2902
2903
0
        case OP_CRRANGE:
2904
0
        case OP_CRMINRANGE:
2905
0
        case OP_CRPOSRANGE:
2906
0
        minimize = (*ecode == OP_CRMINRANGE);
2907
0
        possessive = (*ecode == OP_CRPOSRANGE);
2908
0
        min = GET2(ecode, 1);
2909
0
        max = GET2(ecode, 1 + IMM2_SIZE);
2910
0
        if (max == 0) max = INT_MAX;
2911
0
        ecode += 1 + 2 * IMM2_SIZE;
2912
0
        break;
2913
2914
0
        default:               /* No repeat follows */
2915
0
        min = max = 1;
2916
0
        break;
2917
0
        }
2918
2919
      /* First, ensure the minimum number of matches are present. */
2920
2921
#ifdef SUPPORT_UTF
2922
      if (utf)
2923
        {
2924
        for (i = 1; i <= min; i++)
2925
          {
2926
          if (eptr >= md->end_subject)
2927
            {
2928
            SCHECK_PARTIAL();
2929
            RRETURN(MATCH_NOMATCH);
2930
            }
2931
          GETCHARINC(c, eptr);
2932
          if (c > 255)
2933
            {
2934
            if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2935
            }
2936
          else
2937
            if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2938
          }
2939
        }
2940
      else
2941
#endif
2942
      /* Not UTF mode */
2943
0
        {
2944
0
        for (i = 1; i <= min; i++)
2945
0
          {
2946
0
          if (eptr >= md->end_subject)
2947
0
            {
2948
0
            SCHECK_PARTIAL();
2949
0
            RRETURN(MATCH_NOMATCH);
2950
0
            }
2951
0
          c = *eptr++;
2952
#ifndef COMPILE_PCRE8
2953
          if (c > 255)
2954
            {
2955
            if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2956
            }
2957
          else
2958
#endif
2959
0
            if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2960
0
          }
2961
0
        }
2962
2963
      /* If max == min we can continue with the main loop without the
2964
      need to recurse. */
2965
2966
0
      if (min == max) continue;
2967
2968
      /* If minimizing, keep testing the rest of the expression and advancing
2969
      the pointer while it matches the class. */
2970
2971
0
      if (minimize)
2972
0
        {
2973
#ifdef SUPPORT_UTF
2974
        if (utf)
2975
          {
2976
          for (fi = min;; fi++)
2977
            {
2978
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2979
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2980
            if (fi >= max) RRETURN(MATCH_NOMATCH);
2981
            if (eptr >= md->end_subject)
2982
              {
2983
              SCHECK_PARTIAL();
2984
              RRETURN(MATCH_NOMATCH);
2985
              }
2986
            GETCHARINC(c, eptr);
2987
            if (c > 255)
2988
              {
2989
              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2990
              }
2991
            else
2992
              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2993
            }
2994
          }
2995
        else
2996
#endif
2997
        /* Not UTF mode */
2998
0
          {
2999
0
          for (fi = min;; fi++)
3000
0
            {
3001
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
3002
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3003
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
3004
0
            if (eptr >= md->end_subject)
3005
0
              {
3006
0
              SCHECK_PARTIAL();
3007
0
              RRETURN(MATCH_NOMATCH);
3008
0
              }
3009
0
            c = *eptr++;
3010
#ifndef COMPILE_PCRE8
3011
            if (c > 255)
3012
              {
3013
              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3014
              }
3015
            else
3016
#endif
3017
0
              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3018
0
            }
3019
0
          }
3020
        /* Control never gets here */
3021
0
        }
3022
3023
      /* If maximizing, find the longest possible run, then work backwards. */
3024
3025
0
      else
3026
0
        {
3027
0
        pp = eptr;
3028
3029
#ifdef SUPPORT_UTF
3030
        if (utf)
3031
          {
3032
          for (i = min; i < max; i++)
3033
            {
3034
            int len = 1;
3035
            if (eptr >= md->end_subject)
3036
              {
3037
              SCHECK_PARTIAL();
3038
              break;
3039
              }
3040
            GETCHARLEN(c, eptr, len);
3041
            if (c > 255)
3042
              {
3043
              if (op == OP_CLASS) break;
3044
              }
3045
            else
3046
              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3047
            eptr += len;
3048
            }
3049
3050
          if (possessive) continue;    /* No backtracking */
3051
3052
          for (;;)
3053
            {
3054
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
3055
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3056
            if (eptr-- <= pp) break;        /* Stop if tried at original pos */
3057
            BACKCHAR(eptr);
3058
            }
3059
          }
3060
        else
3061
#endif
3062
          /* Not UTF mode */
3063
0
          {
3064
0
          for (i = min; i < max; i++)
3065
0
            {
3066
0
            if (eptr >= md->end_subject)
3067
0
              {
3068
0
              SCHECK_PARTIAL();
3069
0
              break;
3070
0
              }
3071
0
            c = *eptr;
3072
#ifndef COMPILE_PCRE8
3073
            if (c > 255)
3074
              {
3075
              if (op == OP_CLASS) break;
3076
              }
3077
            else
3078
#endif
3079
0
              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3080
0
            eptr++;
3081
0
            }
3082
3083
0
          if (possessive) continue;    /* No backtracking */
3084
3085
0
          while (eptr >= pp)
3086
0
            {
3087
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3088
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3089
0
            eptr--;
3090
0
            }
3091
0
          }
3092
3093
0
        RRETURN(MATCH_NOMATCH);
3094
0
        }
3095
0
#undef BYTE_MAP
3096
0
      }
3097
    /* Control never gets here */
3098
3099
3100
    /* Match an extended character class. In the 8-bit library, this opcode is
3101
    encountered only when UTF-8 mode mode is supported. In the 16-bit and
3102
    32-bit libraries, codepoints greater than 255 may be encountered even when
3103
    UTF is not supported. */
3104
3105
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3106
    case OP_XCLASS:
3107
      {
3108
      data = ecode + 1 + LINK_SIZE;                /* Save for matching */
3109
      ecode += GET(ecode, 1);                      /* Advance past the item */
3110
3111
      switch (*ecode)
3112
        {
3113
        case OP_CRSTAR:
3114
        case OP_CRMINSTAR:
3115
        case OP_CRPLUS:
3116
        case OP_CRMINPLUS:
3117
        case OP_CRQUERY:
3118
        case OP_CRMINQUERY:
3119
        case OP_CRPOSSTAR:
3120
        case OP_CRPOSPLUS:
3121
        case OP_CRPOSQUERY:
3122
        c = *ecode++ - OP_CRSTAR;
3123
        if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
3124
        else possessive = TRUE;
3125
        min = rep_min[c];                 /* Pick up values from tables; */
3126
        max = rep_max[c];                 /* zero for max => infinity */
3127
        if (max == 0) max = INT_MAX;
3128
        break;
3129
3130
        case OP_CRRANGE:
3131
        case OP_CRMINRANGE:
3132
        case OP_CRPOSRANGE:
3133
        minimize = (*ecode == OP_CRMINRANGE);
3134
        possessive = (*ecode == OP_CRPOSRANGE);
3135
        min = GET2(ecode, 1);
3136
        max = GET2(ecode, 1 + IMM2_SIZE);
3137
        if (max == 0) max = INT_MAX;
3138
        ecode += 1 + 2 * IMM2_SIZE;
3139
        break;
3140
3141
        default:               /* No repeat follows */
3142
        min = max = 1;
3143
        break;
3144
        }
3145
3146
      /* First, ensure the minimum number of matches are present. */
3147
3148
      for (i = 1; i <= min; i++)
3149
        {
3150
        if (eptr >= md->end_subject)
3151
          {
3152
          SCHECK_PARTIAL();
3153
          RRETURN(MATCH_NOMATCH);
3154
          }
3155
        GETCHARINCTEST(c, eptr);
3156
        if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3157
        }
3158
3159
      /* If max == min we can continue with the main loop without the
3160
      need to recurse. */
3161
3162
      if (min == max) continue;
3163
3164
      /* If minimizing, keep testing the rest of the expression and advancing
3165
      the pointer while it matches the class. */
3166
3167
      if (minimize)
3168
        {
3169
        for (fi = min;; fi++)
3170
          {
3171
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3172
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3173
          if (fi >= max) RRETURN(MATCH_NOMATCH);
3174
          if (eptr >= md->end_subject)
3175
            {
3176
            SCHECK_PARTIAL();
3177
            RRETURN(MATCH_NOMATCH);
3178
            }
3179
          GETCHARINCTEST(c, eptr);
3180
          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3181
          }
3182
        /* Control never gets here */
3183
        }
3184
3185
      /* If maximizing, find the longest possible run, then work backwards. */
3186
3187
      else
3188
        {
3189
        pp = eptr;
3190
        for (i = min; i < max; i++)
3191
          {
3192
          int len = 1;
3193
          if (eptr >= md->end_subject)
3194
            {
3195
            SCHECK_PARTIAL();
3196
            break;
3197
            }
3198
#ifdef SUPPORT_UTF
3199
          GETCHARLENTEST(c, eptr, len);
3200
#else
3201
          c = *eptr;
3202
#endif
3203
          if (!PRIV(xclass)(c, data, utf)) break;
3204
          eptr += len;
3205
          }
3206
3207
        if (possessive) continue;    /* No backtracking */
3208
3209
        for(;;)
3210
          {
3211
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3212
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3213
          if (eptr-- <= pp) break;        /* Stop if tried at original pos */
3214
#ifdef SUPPORT_UTF
3215
          if (utf) BACKCHAR(eptr);
3216
#endif
3217
          }
3218
        RRETURN(MATCH_NOMATCH);
3219
        }
3220
3221
      /* Control never gets here */
3222
      }
3223
#endif    /* End of XCLASS */
3224
3225
    /* Match a single character, casefully */
3226
3227
0
    case OP_CHAR:
3228
#ifdef SUPPORT_UTF
3229
    if (utf)
3230
      {
3231
      length = 1;
3232
      ecode++;
3233
      GETCHARLEN(fc, ecode, length);
3234
      if (length > md->end_subject - eptr)
3235
        {
3236
        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3237
        RRETURN(MATCH_NOMATCH);
3238
        }
3239
      while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
3240
      }
3241
    else
3242
#endif
3243
    /* Not UTF mode */
3244
0
      {
3245
0
      if (md->end_subject - eptr < 1)
3246
0
        {
3247
0
        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3248
0
        RRETURN(MATCH_NOMATCH);
3249
0
        }
3250
0
      if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3251
0
      ecode += 2;
3252
0
      }
3253
0
    break;
3254
3255
    /* Match a single character, caselessly. If we are at the end of the
3256
    subject, give up immediately. */
3257
3258
0
    case OP_CHARI:
3259
0
    if (eptr >= md->end_subject)
3260
0
      {
3261
0
      SCHECK_PARTIAL();
3262
0
      RRETURN(MATCH_NOMATCH);
3263
0
      }
3264
3265
#ifdef SUPPORT_UTF
3266
    if (utf)
3267
      {
3268
      length = 1;
3269
      ecode++;
3270
      GETCHARLEN(fc, ecode, length);
3271
3272
      /* If the pattern character's value is < 128, we have only one byte, and
3273
      we know that its other case must also be one byte long, so we can use the
3274
      fast lookup table. We know that there is at least one byte left in the
3275
      subject. */
3276
3277
      if (fc < 128)
3278
        {
3279
        pcre_uint32 cc = UCHAR21(eptr);
3280
        if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3281
        ecode++;
3282
        eptr++;
3283
        }
3284
3285
      /* Otherwise we must pick up the subject character. Note that we cannot
3286
      use the value of "length" to check for sufficient bytes left, because the
3287
      other case of the character may have more or fewer bytes.  */
3288
3289
      else
3290
        {
3291
        pcre_uint32 dc;
3292
        GETCHARINC(dc, eptr);
3293
        ecode += length;
3294
3295
        /* If we have Unicode property support, we can use it to test the other
3296
        case of the character, if there is one. */
3297
3298
        if (fc != dc)
3299
          {
3300
#ifdef SUPPORT_UCP
3301
          if (dc != UCD_OTHERCASE(fc))
3302
#endif
3303
            RRETURN(MATCH_NOMATCH);
3304
          }
3305
        }
3306
      }
3307
    else
3308
#endif   /* SUPPORT_UTF */
3309
3310
    /* Not UTF mode */
3311
0
      {
3312
0
      if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3313
0
          != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3314
0
      eptr++;
3315
0
      ecode += 2;
3316
0
      }
3317
0
    break;
3318
3319
    /* Match a single character repeatedly. */
3320
3321
0
    case OP_EXACT:
3322
0
    case OP_EXACTI:
3323
0
    min = max = GET2(ecode, 1);
3324
0
    ecode += 1 + IMM2_SIZE;
3325
0
    goto REPEATCHAR;
3326
3327
0
    case OP_POSUPTO:
3328
0
    case OP_POSUPTOI:
3329
0
    possessive = TRUE;
3330
    /* Fall through */
3331
3332
0
    case OP_UPTO:
3333
0
    case OP_UPTOI:
3334
0
    case OP_MINUPTO:
3335
0
    case OP_MINUPTOI:
3336
0
    min = 0;
3337
0
    max = GET2(ecode, 1);
3338
0
    minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3339
0
    ecode += 1 + IMM2_SIZE;
3340
0
    goto REPEATCHAR;
3341
3342
0
    case OP_POSSTAR:
3343
0
    case OP_POSSTARI:
3344
0
    possessive = TRUE;
3345
0
    min = 0;
3346
0
    max = INT_MAX;
3347
0
    ecode++;
3348
0
    goto REPEATCHAR;
3349
3350
0
    case OP_POSPLUS:
3351
0
    case OP_POSPLUSI:
3352
0
    possessive = TRUE;
3353
0
    min = 1;
3354
0
    max = INT_MAX;
3355
0
    ecode++;
3356
0
    goto REPEATCHAR;
3357
3358
0
    case OP_POSQUERY:
3359
0
    case OP_POSQUERYI:
3360
0
    possessive = TRUE;
3361
0
    min = 0;
3362
0
    max = 1;
3363
0
    ecode++;
3364
0
    goto REPEATCHAR;
3365
3366
0
    case OP_STAR:
3367
0
    case OP_STARI:
3368
0
    case OP_MINSTAR:
3369
0
    case OP_MINSTARI:
3370
0
    case OP_PLUS:
3371
0
    case OP_PLUSI:
3372
0
    case OP_MINPLUS:
3373
0
    case OP_MINPLUSI:
3374
0
    case OP_QUERY:
3375
0
    case OP_QUERYI:
3376
0
    case OP_MINQUERY:
3377
0
    case OP_MINQUERYI:
3378
0
    c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3379
0
    minimize = (c & 1) != 0;
3380
0
    min = rep_min[c];                 /* Pick up values from tables; */
3381
0
    max = rep_max[c];                 /* zero for max => infinity */
3382
0
    if (max == 0) max = INT_MAX;
3383
3384
    /* Common code for all repeated single-character matches. We first check
3385
    for the minimum number of characters. If the minimum equals the maximum, we
3386
    are done. Otherwise, if minimizing, check the rest of the pattern for a
3387
    match; if there isn't one, advance up to the maximum, one character at a
3388
    time.
3389
3390
    If maximizing, advance up to the maximum number of matching characters,
3391
    until eptr is past the end of the maximum run. If possessive, we are
3392
    then done (no backing up). Otherwise, match at this position; anything
3393
    other than no match is immediately returned. For nomatch, back up one
3394
    character, unless we are matching \R and the last thing matched was
3395
    \r\n, in which case, back up two bytes. When we reach the first optional
3396
    character position, we can save stack by doing a tail recurse.
3397
3398
    The various UTF/non-UTF and caseful/caseless cases are handled separately,
3399
    for speed. */
3400
3401
0
    REPEATCHAR:
3402
#ifdef SUPPORT_UTF
3403
    if (utf)
3404
      {
3405
      length = 1;
3406
      charptr = ecode;
3407
      GETCHARLEN(fc, ecode, length);
3408
      ecode += length;
3409
3410
      /* Handle multibyte character matching specially here. There is
3411
      support for caseless matching if UCP support is present. */
3412
3413
      if (length > 1)
3414
        {
3415
#ifdef SUPPORT_UCP
3416
        pcre_uint32 othercase;
3417
        if (op >= OP_STARI &&     /* Caseless */
3418
            (othercase = UCD_OTHERCASE(fc)) != fc)
3419
          oclength = PRIV(ord2utf)(othercase, occhars);
3420
        else oclength = 0;
3421
#endif  /* SUPPORT_UCP */
3422
3423
        for (i = 1; i <= min; i++)
3424
          {
3425
          if (eptr <= md->end_subject - length &&
3426
            memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3427
#ifdef SUPPORT_UCP
3428
          else if (oclength > 0 &&
3429
                   eptr <= md->end_subject - oclength &&
3430
                   memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3431
#endif  /* SUPPORT_UCP */
3432
          else
3433
            {
3434
            CHECK_PARTIAL();
3435
            RRETURN(MATCH_NOMATCH);
3436
            }
3437
          }
3438
3439
        if (min == max) continue;
3440
3441
        if (minimize)
3442
          {
3443
          for (fi = min;; fi++)
3444
            {
3445
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3446
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3447
            if (fi >= max) RRETURN(MATCH_NOMATCH);
3448
            if (eptr <= md->end_subject - length &&
3449
              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3450
#ifdef SUPPORT_UCP
3451
            else if (oclength > 0 &&
3452
                     eptr <= md->end_subject - oclength &&
3453
                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3454
#endif  /* SUPPORT_UCP */
3455
            else
3456
              {
3457
              CHECK_PARTIAL();
3458
              RRETURN(MATCH_NOMATCH);
3459
              }
3460
            }
3461
          /* Control never gets here */
3462
          }
3463
3464
        else  /* Maximize */
3465
          {
3466
          pp = eptr;
3467
          for (i = min; i < max; i++)
3468
            {
3469
            if (eptr <= md->end_subject - length &&
3470
                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3471
#ifdef SUPPORT_UCP
3472
            else if (oclength > 0 &&
3473
                     eptr <= md->end_subject - oclength &&
3474
                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3475
#endif  /* SUPPORT_UCP */
3476
            else
3477
              {
3478
              CHECK_PARTIAL();
3479
              break;
3480
              }
3481
            }
3482
3483
          if (possessive) continue;    /* No backtracking */
3484
          for(;;)
3485
            {
3486
            if (eptr <= pp) goto TAIL_RECURSE;
3487
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3488
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3489
#ifdef SUPPORT_UCP
3490
            eptr--;
3491
            BACKCHAR(eptr);
3492
#else   /* without SUPPORT_UCP */
3493
            eptr -= length;
3494
#endif  /* SUPPORT_UCP */
3495
            }
3496
          }
3497
        /* Control never gets here */
3498
        }
3499
3500
      /* If the length of a UTF-8 character is 1, we fall through here, and
3501
      obey the code as for non-UTF-8 characters below, though in this case the
3502
      value of fc will always be < 128. */
3503
      }
3504
    else
3505
#endif  /* SUPPORT_UTF */
3506
      /* When not in UTF-8 mode, load a single-byte character. */
3507
0
      fc = *ecode++;
3508
3509
    /* The value of fc at this point is always one character, though we may
3510
    or may not be in UTF mode. The code is duplicated for the caseless and
3511
    caseful cases, for speed, since matching characters is likely to be quite
3512
    common. First, ensure the minimum number of matches are present. If min =
3513
    max, continue at the same level without recursing. Otherwise, if
3514
    minimizing, keep trying the rest of the expression and advancing one
3515
    matching character if failing, up to the maximum. Alternatively, if
3516
    maximizing, find the maximum number of characters and work backwards. */
3517
3518
0
    DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3519
0
      max, (char *)eptr));
3520
3521
0
    if (op >= OP_STARI)  /* Caseless */
3522
0
      {
3523
0
#ifdef COMPILE_PCRE8
3524
      /* fc must be < 128 if UTF is enabled. */
3525
0
      foc = md->fcc[fc];
3526
#else
3527
#ifdef SUPPORT_UTF
3528
#ifdef SUPPORT_UCP
3529
      if (utf && fc > 127)
3530
        foc = UCD_OTHERCASE(fc);
3531
#else
3532
      if (utf && fc > 127)
3533
        foc = fc;
3534
#endif /* SUPPORT_UCP */
3535
      else
3536
#endif /* SUPPORT_UTF */
3537
        foc = TABLE_GET(fc, md->fcc, fc);
3538
#endif /* COMPILE_PCRE8 */
3539
3540
0
      for (i = 1; i <= min; i++)
3541
0
        {
3542
0
        pcre_uint32 cc;                 /* Faster than pcre_uchar */
3543
0
        if (eptr >= md->end_subject)
3544
0
          {
3545
0
          SCHECK_PARTIAL();
3546
0
          RRETURN(MATCH_NOMATCH);
3547
0
          }
3548
0
        cc = UCHAR21TEST(eptr);
3549
0
        if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3550
0
        eptr++;
3551
0
        }
3552
0
      if (min == max) continue;
3553
0
      if (minimize)
3554
0
        {
3555
0
        for (fi = min;; fi++)
3556
0
          {
3557
0
          pcre_uint32 cc;               /* Faster than pcre_uchar */
3558
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3559
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3560
0
          if (fi >= max) RRETURN(MATCH_NOMATCH);
3561
0
          if (eptr >= md->end_subject)
3562
0
            {
3563
0
            SCHECK_PARTIAL();
3564
0
            RRETURN(MATCH_NOMATCH);
3565
0
            }
3566
0
          cc = UCHAR21TEST(eptr);
3567
0
          if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3568
0
          eptr++;
3569
0
          }
3570
        /* Control never gets here */
3571
0
        }
3572
0
      else  /* Maximize */
3573
0
        {
3574
0
        pp = eptr;
3575
0
        for (i = min; i < max; i++)
3576
0
          {
3577
0
          pcre_uint32 cc;               /* Faster than pcre_uchar */
3578
0
          if (eptr >= md->end_subject)
3579
0
            {
3580
0
            SCHECK_PARTIAL();
3581
0
            break;
3582
0
            }
3583
0
          cc = UCHAR21TEST(eptr);
3584
0
          if (fc != cc && foc != cc) break;
3585
0
          eptr++;
3586
0
          }
3587
0
        if (possessive) continue;       /* No backtracking */
3588
0
        for (;;)
3589
0
          {
3590
0
          if (eptr == pp) goto TAIL_RECURSE;
3591
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3592
0
          eptr--;
3593
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3594
0
          }
3595
        /* Control never gets here */
3596
0
        }
3597
0
      }
3598
3599
    /* Caseful comparisons (includes all multi-byte characters) */
3600
3601
0
    else
3602
0
      {
3603
0
      for (i = 1; i <= min; i++)
3604
0
        {
3605
0
        if (eptr >= md->end_subject)
3606
0
          {
3607
0
          SCHECK_PARTIAL();
3608
0
          RRETURN(MATCH_NOMATCH);
3609
0
          }
3610
0
        if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3611
0
        }
3612
3613
0
      if (min == max) continue;
3614
3615
0
      if (minimize)
3616
0
        {
3617
0
        for (fi = min;; fi++)
3618
0
          {
3619
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3620
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3621
0
          if (fi >= max) RRETURN(MATCH_NOMATCH);
3622
0
          if (eptr >= md->end_subject)
3623
0
            {
3624
0
            SCHECK_PARTIAL();
3625
0
            RRETURN(MATCH_NOMATCH);
3626
0
            }
3627
0
          if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3628
0
          }
3629
        /* Control never gets here */
3630
0
        }
3631
0
      else  /* Maximize */
3632
0
        {
3633
0
        pp = eptr;
3634
0
        for (i = min; i < max; i++)
3635
0
          {
3636
0
          if (eptr >= md->end_subject)
3637
0
            {
3638
0
            SCHECK_PARTIAL();
3639
0
            break;
3640
0
            }
3641
0
          if (fc != UCHAR21TEST(eptr)) break;
3642
0
          eptr++;
3643
0
          }
3644
0
        if (possessive) continue;    /* No backtracking */
3645
0
        for (;;)
3646
0
          {
3647
0
          if (eptr == pp) goto TAIL_RECURSE;
3648
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3649
0
          eptr--;
3650
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3651
0
          }
3652
        /* Control never gets here */
3653
0
        }
3654
0
      }
3655
    /* Control never gets here */
3656
3657
    /* Match a negated single one-byte character. The character we are
3658
    checking can be multibyte. */
3659
3660
0
    case OP_NOT:
3661
0
    case OP_NOTI:
3662
0
    if (eptr >= md->end_subject)
3663
0
      {
3664
0
      SCHECK_PARTIAL();
3665
0
      RRETURN(MATCH_NOMATCH);
3666
0
      }
3667
#ifdef SUPPORT_UTF
3668
    if (utf)
3669
      {
3670
      register pcre_uint32 ch, och;
3671
3672
      ecode++;
3673
      GETCHARINC(ch, ecode);
3674
      GETCHARINC(c, eptr);
3675
3676
      if (op == OP_NOT)
3677
        {
3678
        if (ch == c) RRETURN(MATCH_NOMATCH);
3679
        }
3680
      else
3681
        {
3682
#ifdef SUPPORT_UCP
3683
        if (ch > 127)
3684
          och = UCD_OTHERCASE(ch);
3685
#else
3686
        if (ch > 127)
3687
          och = ch;
3688
#endif /* SUPPORT_UCP */
3689
        else
3690
          och = TABLE_GET(ch, md->fcc, ch);
3691
        if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3692
        }
3693
      }
3694
    else
3695
#endif
3696
0
      {
3697
0
      register pcre_uint32 ch = ecode[1];
3698
0
      c = *eptr++;
3699
0
      if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3700
0
        RRETURN(MATCH_NOMATCH);
3701
0
      ecode += 2;
3702
0
      }
3703
0
    break;
3704
3705
    /* Match a negated single one-byte character repeatedly. This is almost a
3706
    repeat of the code for a repeated single character, but I haven't found a
3707
    nice way of commoning these up that doesn't require a test of the
3708
    positive/negative option for each character match. Maybe that wouldn't add
3709
    very much to the time taken, but character matching *is* what this is all
3710
    about... */
3711
3712
0
    case OP_NOTEXACT:
3713
0
    case OP_NOTEXACTI:
3714
0
    min = max = GET2(ecode, 1);
3715
0
    ecode += 1 + IMM2_SIZE;
3716
0
    goto REPEATNOTCHAR;
3717
3718
0
    case OP_NOTUPTO:
3719
0
    case OP_NOTUPTOI:
3720
0
    case OP_NOTMINUPTO:
3721
0
    case OP_NOTMINUPTOI:
3722
0
    min = 0;
3723
0
    max = GET2(ecode, 1);
3724
0
    minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3725
0
    ecode += 1 + IMM2_SIZE;
3726
0
    goto REPEATNOTCHAR;
3727
3728
0
    case OP_NOTPOSSTAR:
3729
0
    case OP_NOTPOSSTARI:
3730
0
    possessive = TRUE;
3731
0
    min = 0;
3732
0
    max = INT_MAX;
3733
0
    ecode++;
3734
0
    goto REPEATNOTCHAR;
3735
3736
0
    case OP_NOTPOSPLUS:
3737
0
    case OP_NOTPOSPLUSI:
3738
0
    possessive = TRUE;
3739
0
    min = 1;
3740
0
    max = INT_MAX;
3741
0
    ecode++;
3742
0
    goto REPEATNOTCHAR;
3743
3744
0
    case OP_NOTPOSQUERY:
3745
0
    case OP_NOTPOSQUERYI:
3746
0
    possessive = TRUE;
3747
0
    min = 0;
3748
0
    max = 1;
3749
0
    ecode++;
3750
0
    goto REPEATNOTCHAR;
3751
3752
0
    case OP_NOTPOSUPTO:
3753
0
    case OP_NOTPOSUPTOI:
3754
0
    possessive = TRUE;
3755
0
    min = 0;
3756
0
    max = GET2(ecode, 1);
3757
0
    ecode += 1 + IMM2_SIZE;
3758
0
    goto REPEATNOTCHAR;
3759
3760
0
    case OP_NOTSTAR:
3761
0
    case OP_NOTSTARI:
3762
0
    case OP_NOTMINSTAR:
3763
0
    case OP_NOTMINSTARI:
3764
0
    case OP_NOTPLUS:
3765
0
    case OP_NOTPLUSI:
3766
0
    case OP_NOTMINPLUS:
3767
0
    case OP_NOTMINPLUSI:
3768
0
    case OP_NOTQUERY:
3769
0
    case OP_NOTQUERYI:
3770
0
    case OP_NOTMINQUERY:
3771
0
    case OP_NOTMINQUERYI:
3772
0
    c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3773
0
    minimize = (c & 1) != 0;
3774
0
    min = rep_min[c];                 /* Pick up values from tables; */
3775
0
    max = rep_max[c];                 /* zero for max => infinity */
3776
0
    if (max == 0) max = INT_MAX;
3777
3778
    /* Common code for all repeated single-byte matches. */
3779
3780
0
    REPEATNOTCHAR:
3781
0
    GETCHARINCTEST(fc, ecode);
3782
3783
    /* The code is duplicated for the caseless and caseful cases, for speed,
3784
    since matching characters is likely to be quite common. First, ensure the
3785
    minimum number of matches are present. If min = max, continue at the same
3786
    level without recursing. Otherwise, if minimizing, keep trying the rest of
3787
    the expression and advancing one matching character if failing, up to the
3788
    maximum. Alternatively, if maximizing, find the maximum number of
3789
    characters and work backwards. */
3790
3791
0
    DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3792
0
      max, (char *)eptr));
3793
3794
0
    if (op >= OP_NOTSTARI)     /* Caseless */
3795
0
      {
3796
#ifdef SUPPORT_UTF
3797
#ifdef SUPPORT_UCP
3798
      if (utf && fc > 127)
3799
        foc = UCD_OTHERCASE(fc);
3800
#else
3801
      if (utf && fc > 127)
3802
        foc = fc;
3803
#endif /* SUPPORT_UCP */
3804
      else
3805
#endif /* SUPPORT_UTF */
3806
0
        foc = TABLE_GET(fc, md->fcc, fc);
3807
3808
#ifdef SUPPORT_UTF
3809
      if (utf)
3810
        {
3811
        register pcre_uint32 d;
3812
        for (i = 1; i <= min; i++)
3813
          {
3814
          if (eptr >= md->end_subject)
3815
            {
3816
            SCHECK_PARTIAL();
3817
            RRETURN(MATCH_NOMATCH);
3818
            }
3819
          GETCHARINC(d, eptr);
3820
          if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3821
          }
3822
        }
3823
      else
3824
#endif  /* SUPPORT_UTF */
3825
      /* Not UTF mode */
3826
0
        {
3827
0
        for (i = 1; i <= min; i++)
3828
0
          {
3829
0
          if (eptr >= md->end_subject)
3830
0
            {
3831
0
            SCHECK_PARTIAL();
3832
0
            RRETURN(MATCH_NOMATCH);
3833
0
            }
3834
0
          if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3835
0
          eptr++;
3836
0
          }
3837
0
        }
3838
3839
0
      if (min == max) continue;
3840
3841
0
      if (minimize)
3842
0
        {
3843
#ifdef SUPPORT_UTF
3844
        if (utf)
3845
          {
3846
          register pcre_uint32 d;
3847
          for (fi = min;; fi++)
3848
            {
3849
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3850
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3851
            if (fi >= max) RRETURN(MATCH_NOMATCH);
3852
            if (eptr >= md->end_subject)
3853
              {
3854
              SCHECK_PARTIAL();
3855
              RRETURN(MATCH_NOMATCH);
3856
              }
3857
            GETCHARINC(d, eptr);
3858
            if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3859
            }
3860
          }
3861
        else
3862
#endif  /*SUPPORT_UTF */
3863
        /* Not UTF mode */
3864
0
          {
3865
0
          for (fi = min;; fi++)
3866
0
            {
3867
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3868
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3869
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
3870
0
            if (eptr >= md->end_subject)
3871
0
              {
3872
0
              SCHECK_PARTIAL();
3873
0
              RRETURN(MATCH_NOMATCH);
3874
0
              }
3875
0
            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3876
0
            eptr++;
3877
0
            }
3878
0
          }
3879
        /* Control never gets here */
3880
0
        }
3881
3882
      /* Maximize case */
3883
3884
0
      else
3885
0
        {
3886
0
        pp = eptr;
3887
3888
#ifdef SUPPORT_UTF
3889
        if (utf)
3890
          {
3891
          register pcre_uint32 d;
3892
          for (i = min; i < max; i++)
3893
            {
3894
            int len = 1;
3895
            if (eptr >= md->end_subject)
3896
              {
3897
              SCHECK_PARTIAL();
3898
              break;
3899
              }
3900
            GETCHARLEN(d, eptr, len);
3901
            if (fc == d || (unsigned int)foc == d) break;
3902
            eptr += len;
3903
            }
3904
          if (possessive) continue;    /* No backtracking */
3905
          for(;;)
3906
            {
3907
            if (eptr <= pp) goto TAIL_RECURSE;
3908
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3909
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3910
            eptr--;
3911
            BACKCHAR(eptr);
3912
            }
3913
          }
3914
        else
3915
#endif  /* SUPPORT_UTF */
3916
        /* Not UTF mode */
3917
0
          {
3918
0
          for (i = min; i < max; i++)
3919
0
            {
3920
0
            if (eptr >= md->end_subject)
3921
0
              {
3922
0
              SCHECK_PARTIAL();
3923
0
              break;
3924
0
              }
3925
0
            if (fc == *eptr || foc == *eptr) break;
3926
0
            eptr++;
3927
0
            }
3928
0
          if (possessive) continue;    /* No backtracking */
3929
0
          for (;;)
3930
0
            {
3931
0
            if (eptr == pp) goto TAIL_RECURSE;
3932
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3933
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3934
0
            eptr--;
3935
0
            }
3936
0
          }
3937
        /* Control never gets here */
3938
0
        }
3939
0
      }
3940
3941
    /* Caseful comparisons */
3942
3943
0
    else
3944
0
      {
3945
#ifdef SUPPORT_UTF
3946
      if (utf)
3947
        {
3948
        register pcre_uint32 d;
3949
        for (i = 1; i <= min; i++)
3950
          {
3951
          if (eptr >= md->end_subject)
3952
            {
3953
            SCHECK_PARTIAL();
3954
            RRETURN(MATCH_NOMATCH);
3955
            }
3956
          GETCHARINC(d, eptr);
3957
          if (fc == d) RRETURN(MATCH_NOMATCH);
3958
          }
3959
        }
3960
      else
3961
#endif
3962
      /* Not UTF mode */
3963
0
        {
3964
0
        for (i = 1; i <= min; i++)
3965
0
          {
3966
0
          if (eptr >= md->end_subject)
3967
0
            {
3968
0
            SCHECK_PARTIAL();
3969
0
            RRETURN(MATCH_NOMATCH);
3970
0
            }
3971
0
          if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3972
0
          }
3973
0
        }
3974
3975
0
      if (min == max) continue;
3976
3977
0
      if (minimize)
3978
0
        {
3979
#ifdef SUPPORT_UTF
3980
        if (utf)
3981
          {
3982
          register pcre_uint32 d;
3983
          for (fi = min;; fi++)
3984
            {
3985
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3986
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3987
            if (fi >= max) RRETURN(MATCH_NOMATCH);
3988
            if (eptr >= md->end_subject)
3989
              {
3990
              SCHECK_PARTIAL();
3991
              RRETURN(MATCH_NOMATCH);
3992
              }
3993
            GETCHARINC(d, eptr);
3994
            if (fc == d) RRETURN(MATCH_NOMATCH);
3995
            }
3996
          }
3997
        else
3998
#endif
3999
        /* Not UTF mode */
4000
0
          {
4001
0
          for (fi = min;; fi++)
4002
0
            {
4003
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
4004
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4005
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4006
0
            if (eptr >= md->end_subject)
4007
0
              {
4008
0
              SCHECK_PARTIAL();
4009
0
              RRETURN(MATCH_NOMATCH);
4010
0
              }
4011
0
            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
4012
0
            }
4013
0
          }
4014
        /* Control never gets here */
4015
0
        }
4016
4017
      /* Maximize case */
4018
4019
0
      else
4020
0
        {
4021
0
        pp = eptr;
4022
4023
#ifdef SUPPORT_UTF
4024
        if (utf)
4025
          {
4026
          register pcre_uint32 d;
4027
          for (i = min; i < max; i++)
4028
            {
4029
            int len = 1;
4030
            if (eptr >= md->end_subject)
4031
              {
4032
              SCHECK_PARTIAL();
4033
              break;
4034
              }
4035
            GETCHARLEN(d, eptr, len);
4036
            if (fc == d) break;
4037
            eptr += len;
4038
            }
4039
          if (possessive) continue;    /* No backtracking */
4040
          for(;;)
4041
            {
4042
            if (eptr <= pp) goto TAIL_RECURSE;
4043
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
4044
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4045
            eptr--;
4046
            BACKCHAR(eptr);
4047
            }
4048
          }
4049
        else
4050
#endif
4051
        /* Not UTF mode */
4052
0
          {
4053
0
          for (i = min; i < max; i++)
4054
0
            {
4055
0
            if (eptr >= md->end_subject)
4056
0
              {
4057
0
              SCHECK_PARTIAL();
4058
0
              break;
4059
0
              }
4060
0
            if (fc == *eptr) break;
4061
0
            eptr++;
4062
0
            }
4063
0
          if (possessive) continue;    /* No backtracking */
4064
0
          for (;;)
4065
0
            {
4066
0
            if (eptr == pp) goto TAIL_RECURSE;
4067
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
4068
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4069
0
            eptr--;
4070
0
            }
4071
0
          }
4072
        /* Control never gets here */
4073
0
        }
4074
0
      }
4075
    /* Control never gets here */
4076
4077
    /* Match a single character type repeatedly; several different opcodes
4078
    share code. This is very similar to the code for single characters, but we
4079
    repeat it in the interests of efficiency. */
4080
4081
0
    case OP_TYPEEXACT:
4082
0
    min = max = GET2(ecode, 1);
4083
0
    minimize = TRUE;
4084
0
    ecode += 1 + IMM2_SIZE;
4085
0
    goto REPEATTYPE;
4086
4087
0
    case OP_TYPEUPTO:
4088
0
    case OP_TYPEMINUPTO:
4089
0
    min = 0;
4090
0
    max = GET2(ecode, 1);
4091
0
    minimize = *ecode == OP_TYPEMINUPTO;
4092
0
    ecode += 1 + IMM2_SIZE;
4093
0
    goto REPEATTYPE;
4094
4095
0
    case OP_TYPEPOSSTAR:
4096
0
    possessive = TRUE;
4097
0
    min = 0;
4098
0
    max = INT_MAX;
4099
0
    ecode++;
4100
0
    goto REPEATTYPE;
4101
4102
0
    case OP_TYPEPOSPLUS:
4103
0
    possessive = TRUE;
4104
0
    min = 1;
4105
0
    max = INT_MAX;
4106
0
    ecode++;
4107
0
    goto REPEATTYPE;
4108
4109
0
    case OP_TYPEPOSQUERY:
4110
0
    possessive = TRUE;
4111
0
    min = 0;
4112
0
    max = 1;
4113
0
    ecode++;
4114
0
    goto REPEATTYPE;
4115
4116
0
    case OP_TYPEPOSUPTO:
4117
0
    possessive = TRUE;
4118
0
    min = 0;
4119
0
    max = GET2(ecode, 1);
4120
0
    ecode += 1 + IMM2_SIZE;
4121
0
    goto REPEATTYPE;
4122
4123
0
    case OP_TYPESTAR:
4124
0
    case OP_TYPEMINSTAR:
4125
0
    case OP_TYPEPLUS:
4126
0
    case OP_TYPEMINPLUS:
4127
0
    case OP_TYPEQUERY:
4128
0
    case OP_TYPEMINQUERY:
4129
0
    c = *ecode++ - OP_TYPESTAR;
4130
0
    minimize = (c & 1) != 0;
4131
0
    min = rep_min[c];                 /* Pick up values from tables; */
4132
0
    max = rep_max[c];                 /* zero for max => infinity */
4133
0
    if (max == 0) max = INT_MAX;
4134
4135
    /* Common code for all repeated single character type matches. Note that
4136
    in UTF-8 mode, '.' matches a character of any length, but for the other
4137
    character types, the valid characters are all one-byte long. */
4138
4139
0
    REPEATTYPE:
4140
0
    ctype = *ecode++;      /* Code for the character type */
4141
4142
#ifdef SUPPORT_UCP
4143
    if (ctype == OP_PROP || ctype == OP_NOTPROP)
4144
      {
4145
      prop_fail_result = ctype == OP_NOTPROP;
4146
      prop_type = *ecode++;
4147
      prop_value = *ecode++;
4148
      }
4149
    else prop_type = -1;
4150
#endif
4151
4152
    /* First, ensure the minimum number of matches are present. Use inline
4153
    code for maximizing the speed, and do the type test once at the start
4154
    (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4155
    is tidier. Also separate the UCP code, which can be the same for both UTF-8
4156
    and single-bytes. */
4157
4158
0
    if (min > 0)
4159
0
      {
4160
#ifdef SUPPORT_UCP
4161
      if (prop_type >= 0)
4162
        {
4163
        switch(prop_type)
4164
          {
4165
          case PT_ANY:
4166
          if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4167
          for (i = 1; i <= min; i++)
4168
            {
4169
            if (eptr >= md->end_subject)
4170
              {
4171
              SCHECK_PARTIAL();
4172
              RRETURN(MATCH_NOMATCH);
4173
              }
4174
            GETCHARINCTEST(c, eptr);
4175
            }
4176
          break;
4177
4178
          case PT_LAMP:
4179
          for (i = 1; i <= min; i++)
4180
            {
4181
            int chartype;
4182
            if (eptr >= md->end_subject)
4183
              {
4184
              SCHECK_PARTIAL();
4185
              RRETURN(MATCH_NOMATCH);
4186
              }
4187
            GETCHARINCTEST(c, eptr);
4188
            chartype = UCD_CHARTYPE(c);
4189
            if ((chartype == ucp_Lu ||
4190
                 chartype == ucp_Ll ||
4191
                 chartype == ucp_Lt) == prop_fail_result)
4192
              RRETURN(MATCH_NOMATCH);
4193
            }
4194
          break;
4195
4196
          case PT_GC:
4197
          for (i = 1; i <= min; i++)
4198
            {
4199
            if (eptr >= md->end_subject)
4200
              {
4201
              SCHECK_PARTIAL();
4202
              RRETURN(MATCH_NOMATCH);
4203
              }
4204
            GETCHARINCTEST(c, eptr);
4205
            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4206
              RRETURN(MATCH_NOMATCH);
4207
            }
4208
          break;
4209
4210
          case PT_PC:
4211
          for (i = 1; i <= min; i++)
4212
            {
4213
            if (eptr >= md->end_subject)
4214
              {
4215
              SCHECK_PARTIAL();
4216
              RRETURN(MATCH_NOMATCH);
4217
              }
4218
            GETCHARINCTEST(c, eptr);
4219
            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4220
              RRETURN(MATCH_NOMATCH);
4221
            }
4222
          break;
4223
4224
          case PT_SC:
4225
          for (i = 1; i <= min; i++)
4226
            {
4227
            if (eptr >= md->end_subject)
4228
              {
4229
              SCHECK_PARTIAL();
4230
              RRETURN(MATCH_NOMATCH);
4231
              }
4232
            GETCHARINCTEST(c, eptr);
4233
            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4234
              RRETURN(MATCH_NOMATCH);
4235
            }
4236
          break;
4237
4238
          case PT_ALNUM:
4239
          for (i = 1; i <= min; i++)
4240
            {
4241
            int category;
4242
            if (eptr >= md->end_subject)
4243
              {
4244
              SCHECK_PARTIAL();
4245
              RRETURN(MATCH_NOMATCH);
4246
              }
4247
            GETCHARINCTEST(c, eptr);
4248
            category = UCD_CATEGORY(c);
4249
            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4250
              RRETURN(MATCH_NOMATCH);
4251
            }
4252
          break;
4253
4254
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4255
          which means that Perl space and POSIX space are now identical. PCRE
4256
          was changed at release 8.34. */
4257
4258
          case PT_SPACE:    /* Perl space */
4259
          case PT_PXSPACE:  /* POSIX space */
4260
          for (i = 1; i <= min; i++)
4261
            {
4262
            if (eptr >= md->end_subject)
4263
              {
4264
              SCHECK_PARTIAL();
4265
              RRETURN(MATCH_NOMATCH);
4266
              }
4267
            GETCHARINCTEST(c, eptr);
4268
            switch(c)
4269
              {
4270
              HSPACE_CASES:
4271
              VSPACE_CASES:
4272
              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4273
              break;
4274
4275
              default:
4276
              if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
4277
                RRETURN(MATCH_NOMATCH);
4278
              break;
4279
              }
4280
            }
4281
          break;
4282
4283
          case PT_WORD:
4284
          for (i = 1; i <= min; i++)
4285
            {
4286
            int category;
4287
            if (eptr >= md->end_subject)
4288
              {
4289
              SCHECK_PARTIAL();
4290
              RRETURN(MATCH_NOMATCH);
4291
              }
4292
            GETCHARINCTEST(c, eptr);
4293
            category = UCD_CATEGORY(c);
4294
            if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4295
                   == prop_fail_result)
4296
              RRETURN(MATCH_NOMATCH);
4297
            }
4298
          break;
4299
4300
          case PT_CLIST:
4301
          for (i = 1; i <= min; i++)
4302
            {
4303
            const pcre_uint32 *cp;
4304
            if (eptr >= md->end_subject)
4305
              {
4306
              SCHECK_PARTIAL();
4307
              RRETURN(MATCH_NOMATCH);
4308
              }
4309
            GETCHARINCTEST(c, eptr);
4310
            cp = PRIV(ucd_caseless_sets) + prop_value;
4311
            for (;;)
4312
              {
4313
              if (c < *cp)
4314
                { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4315
              if (c == *cp++)
4316
                { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4317
              }
4318
            }
4319
          break;
4320
4321
          case PT_UCNC:
4322
          for (i = 1; i <= min; i++)
4323
            {
4324
            if (eptr >= md->end_subject)
4325
              {
4326
              SCHECK_PARTIAL();
4327
              RRETURN(MATCH_NOMATCH);
4328
              }
4329
            GETCHARINCTEST(c, eptr);
4330
            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4331
                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4332
                 c >= 0xe000) == prop_fail_result)
4333
              RRETURN(MATCH_NOMATCH);
4334
            }
4335
          break;
4336
4337
          /* This should not occur */
4338
4339
          default:
4340
          RRETURN(PCRE_ERROR_INTERNAL);
4341
          }
4342
        }
4343
4344
      /* Match extended Unicode sequences. We will get here only if the
4345
      support is in the binary; otherwise a compile-time error occurs. */
4346
4347
      else if (ctype == OP_EXTUNI)
4348
        {
4349
        for (i = 1; i <= min; i++)
4350
          {
4351
          if (eptr >= md->end_subject)
4352
            {
4353
            SCHECK_PARTIAL();
4354
            RRETURN(MATCH_NOMATCH);
4355
            }
4356
          else
4357
            {
4358
            int lgb, rgb;
4359
            GETCHARINCTEST(c, eptr);
4360
            lgb = UCD_GRAPHBREAK(c);
4361
           while (eptr < md->end_subject)
4362
              {
4363
              int len = 1;
4364
              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4365
              rgb = UCD_GRAPHBREAK(c);
4366
              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4367
              lgb = rgb;
4368
              eptr += len;
4369
              }
4370
            }
4371
          CHECK_PARTIAL();
4372
          }
4373
        }
4374
4375
      else
4376
#endif     /* SUPPORT_UCP */
4377
4378
/* Handle all other cases when the coding is UTF-8 */
4379
4380
#ifdef SUPPORT_UTF
4381
      if (utf) switch(ctype)
4382
        {
4383
        case OP_ANY:
4384
        for (i = 1; i <= min; i++)
4385
          {
4386
          if (eptr >= md->end_subject)
4387
            {
4388
            SCHECK_PARTIAL();
4389
            RRETURN(MATCH_NOMATCH);
4390
            }
4391
          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4392
          if (md->partial != 0 &&
4393
              eptr + 1 >= md->end_subject &&
4394
              NLBLOCK->nltype == NLTYPE_FIXED &&
4395
              NLBLOCK->nllen == 2 &&
4396
              UCHAR21(eptr) == NLBLOCK->nl[0])
4397
            {
4398
            md->hitend = TRUE;
4399
            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4400
            }
4401
          eptr++;
4402
          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4403
          }
4404
        break;
4405
4406
        case OP_ALLANY:
4407
        for (i = 1; i <= min; i++)
4408
          {
4409
          if (eptr >= md->end_subject)
4410
            {
4411
            SCHECK_PARTIAL();
4412
            RRETURN(MATCH_NOMATCH);
4413
            }
4414
          eptr++;
4415
          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4416
          }
4417
        break;
4418
4419
        case OP_ANYBYTE:
4420
        if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4421
        eptr += min;
4422
        break;
4423
4424
        case OP_ANYNL:
4425
        for (i = 1; i <= min; i++)
4426
          {
4427
          if (eptr >= md->end_subject)
4428
            {
4429
            SCHECK_PARTIAL();
4430
            RRETURN(MATCH_NOMATCH);
4431
            }
4432
          GETCHARINC(c, eptr);
4433
          switch(c)
4434
            {
4435
            default: RRETURN(MATCH_NOMATCH);
4436
4437
            case CHAR_CR:
4438
            if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
4439
            break;
4440
4441
            case CHAR_LF:
4442
            break;
4443
4444
            case CHAR_VT:
4445
            case CHAR_FF:
4446
            case CHAR_NEL:
4447
#ifndef EBCDIC
4448
            case 0x2028:
4449
            case 0x2029:
4450
#endif  /* Not EBCDIC */
4451
            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4452
            break;
4453
            }
4454
          }
4455
        break;
4456
4457
        case OP_NOT_HSPACE:
4458
        for (i = 1; i <= min; i++)
4459
          {
4460
          if (eptr >= md->end_subject)
4461
            {
4462
            SCHECK_PARTIAL();
4463
            RRETURN(MATCH_NOMATCH);
4464
            }
4465
          GETCHARINC(c, eptr);
4466
          switch(c)
4467
            {
4468
            HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
4469
            default: break;
4470
            }
4471
          }
4472
        break;
4473
4474
        case OP_HSPACE:
4475
        for (i = 1; i <= min; i++)
4476
          {
4477
          if (eptr >= md->end_subject)
4478
            {
4479
            SCHECK_PARTIAL();
4480
            RRETURN(MATCH_NOMATCH);
4481
            }
4482
          GETCHARINC(c, eptr);
4483
          switch(c)
4484
            {
4485
            HSPACE_CASES: break;  /* Byte and multibyte cases */
4486
            default: RRETURN(MATCH_NOMATCH);
4487
            }
4488
          }
4489
        break;
4490
4491
        case OP_NOT_VSPACE:
4492
        for (i = 1; i <= min; i++)
4493
          {
4494
          if (eptr >= md->end_subject)
4495
            {
4496
            SCHECK_PARTIAL();
4497
            RRETURN(MATCH_NOMATCH);
4498
            }
4499
          GETCHARINC(c, eptr);
4500
          switch(c)
4501
            {
4502
            VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4503
            default: break;
4504
            }
4505
          }
4506
        break;
4507
4508
        case OP_VSPACE:
4509
        for (i = 1; i <= min; i++)
4510
          {
4511
          if (eptr >= md->end_subject)
4512
            {
4513
            SCHECK_PARTIAL();
4514
            RRETURN(MATCH_NOMATCH);
4515
            }
4516
          GETCHARINC(c, eptr);
4517
          switch(c)
4518
            {
4519
            VSPACE_CASES: break;
4520
            default: RRETURN(MATCH_NOMATCH);
4521
            }
4522
          }
4523
        break;
4524
4525
        case OP_NOT_DIGIT:
4526
        for (i = 1; i <= min; i++)
4527
          {
4528
          if (eptr >= md->end_subject)
4529
            {
4530
            SCHECK_PARTIAL();
4531
            RRETURN(MATCH_NOMATCH);
4532
            }
4533
          GETCHARINC(c, eptr);
4534
          if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4535
            RRETURN(MATCH_NOMATCH);
4536
          }
4537
        break;
4538
4539
        case OP_DIGIT:
4540
        for (i = 1; i <= min; i++)
4541
          {
4542
          pcre_uint32 cc;
4543
          if (eptr >= md->end_subject)
4544
            {
4545
            SCHECK_PARTIAL();
4546
            RRETURN(MATCH_NOMATCH);
4547
            }
4548
          cc = UCHAR21(eptr);
4549
          if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
4550
            RRETURN(MATCH_NOMATCH);
4551
          eptr++;
4552
          /* No need to skip more bytes - we know it's a 1-byte character */
4553
          }
4554
        break;
4555
4556
        case OP_NOT_WHITESPACE:
4557
        for (i = 1; i <= min; i++)
4558
          {
4559
          pcre_uint32 cc;
4560
          if (eptr >= md->end_subject)
4561
            {
4562
            SCHECK_PARTIAL();
4563
            RRETURN(MATCH_NOMATCH);
4564
            }
4565
          cc = UCHAR21(eptr);
4566
          if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
4567
            RRETURN(MATCH_NOMATCH);
4568
          eptr++;
4569
          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4570
          }
4571
        break;
4572
4573
        case OP_WHITESPACE:
4574
        for (i = 1; i <= min; i++)
4575
          {
4576
          pcre_uint32 cc;
4577
          if (eptr >= md->end_subject)
4578
            {
4579
            SCHECK_PARTIAL();
4580
            RRETURN(MATCH_NOMATCH);
4581
            }
4582
          cc = UCHAR21(eptr);
4583
          if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
4584
            RRETURN(MATCH_NOMATCH);
4585
          eptr++;
4586
          /* No need to skip more bytes - we know it's a 1-byte character */
4587
          }
4588
        break;
4589
4590
        case OP_NOT_WORDCHAR:
4591
        for (i = 1; i <= min; i++)
4592
          {
4593
          pcre_uint32 cc;
4594
          if (eptr >= md->end_subject)
4595
            {
4596
            SCHECK_PARTIAL();
4597
            RRETURN(MATCH_NOMATCH);
4598
            }
4599
          cc = UCHAR21(eptr);
4600
          if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
4601
            RRETURN(MATCH_NOMATCH);
4602
          eptr++;
4603
          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4604
          }
4605
        break;
4606
4607
        case OP_WORDCHAR:
4608
        for (i = 1; i <= min; i++)
4609
          {
4610
          pcre_uint32 cc;
4611
          if (eptr >= md->end_subject)
4612
            {
4613
            SCHECK_PARTIAL();
4614
            RRETURN(MATCH_NOMATCH);
4615
            }
4616
          cc = UCHAR21(eptr);
4617
          if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
4618
            RRETURN(MATCH_NOMATCH);
4619
          eptr++;
4620
          /* No need to skip more bytes - we know it's a 1-byte character */
4621
          }
4622
        break;
4623
4624
        default:
4625
        RRETURN(PCRE_ERROR_INTERNAL);
4626
        }  /* End switch(ctype) */
4627
4628
      else
4629
#endif     /* SUPPORT_UTF */
4630
4631
      /* Code for the non-UTF-8 case for minimum matching of operators other
4632
      than OP_PROP and OP_NOTPROP. */
4633
4634
0
      switch(ctype)
4635
0
        {
4636
0
        case OP_ANY:
4637
0
        for (i = 1; i <= min; i++)
4638
0
          {
4639
0
          if (eptr >= md->end_subject)
4640
0
            {
4641
0
            SCHECK_PARTIAL();
4642
0
            RRETURN(MATCH_NOMATCH);
4643
0
            }
4644
0
          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4645
0
          if (md->partial != 0 &&
4646
0
              eptr + 1 >= md->end_subject &&
4647
0
              NLBLOCK->nltype == NLTYPE_FIXED &&
4648
0
              NLBLOCK->nllen == 2 &&
4649
0
              *eptr == NLBLOCK->nl[0])
4650
0
            {
4651
0
            md->hitend = TRUE;
4652
0
            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4653
0
            }
4654
0
          eptr++;
4655
0
          }
4656
0
        break;
4657
4658
0
        case OP_ALLANY:
4659
0
        if (eptr > md->end_subject - min)
4660
0
          {
4661
0
          SCHECK_PARTIAL();
4662
0
          RRETURN(MATCH_NOMATCH);
4663
0
          }
4664
0
        eptr += min;
4665
0
        break;
4666
4667
0
        case OP_ANYBYTE:
4668
0
        if (eptr > md->end_subject - min)
4669
0
          {
4670
0
          SCHECK_PARTIAL();
4671
0
          RRETURN(MATCH_NOMATCH);
4672
0
          }
4673
0
        eptr += min;
4674
0
        break;
4675
4676
0
        case OP_ANYNL:
4677
0
        for (i = 1; i <= min; i++)
4678
0
          {
4679
0
          if (eptr >= md->end_subject)
4680
0
            {
4681
0
            SCHECK_PARTIAL();
4682
0
            RRETURN(MATCH_NOMATCH);
4683
0
            }
4684
0
          switch(*eptr++)
4685
0
            {
4686
0
            default: RRETURN(MATCH_NOMATCH);
4687
4688
0
            case CHAR_CR:
4689
0
            if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
4690
0
            break;
4691
4692
0
            case CHAR_LF:
4693
0
            break;
4694
4695
0
            case CHAR_VT:
4696
0
            case CHAR_FF:
4697
0
            case CHAR_NEL:
4698
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4699
            case 0x2028:
4700
            case 0x2029:
4701
#endif
4702
0
            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4703
0
            break;
4704
0
            }
4705
0
          }
4706
0
        break;
4707
4708
0
        case OP_NOT_HSPACE:
4709
0
        for (i = 1; i <= min; i++)
4710
0
          {
4711
0
          if (eptr >= md->end_subject)
4712
0
            {
4713
0
            SCHECK_PARTIAL();
4714
0
            RRETURN(MATCH_NOMATCH);
4715
0
            }
4716
0
          switch(*eptr++)
4717
0
            {
4718
0
            default: break;
4719
0
            HSPACE_BYTE_CASES:
4720
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4721
            HSPACE_MULTIBYTE_CASES:
4722
#endif
4723
0
            RRETURN(MATCH_NOMATCH);
4724
0
            }
4725
0
          }
4726
0
        break;
4727
4728
0
        case OP_HSPACE:
4729
0
        for (i = 1; i <= min; i++)
4730
0
          {
4731
0
          if (eptr >= md->end_subject)
4732
0
            {
4733
0
            SCHECK_PARTIAL();
4734
0
            RRETURN(MATCH_NOMATCH);
4735
0
            }
4736
0
          switch(*eptr++)
4737
0
            {
4738
0
            default: RRETURN(MATCH_NOMATCH);
4739
0
            HSPACE_BYTE_CASES:
4740
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4741
            HSPACE_MULTIBYTE_CASES:
4742
#endif
4743
0
            break;
4744
0
            }
4745
0
          }
4746
0
        break;
4747
4748
0
        case OP_NOT_VSPACE:
4749
0
        for (i = 1; i <= min; i++)
4750
0
          {
4751
0
          if (eptr >= md->end_subject)
4752
0
            {
4753
0
            SCHECK_PARTIAL();
4754
0
            RRETURN(MATCH_NOMATCH);
4755
0
            }
4756
0
          switch(*eptr++)
4757
0
            {
4758
0
            VSPACE_BYTE_CASES:
4759
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4760
            VSPACE_MULTIBYTE_CASES:
4761
#endif
4762
0
            RRETURN(MATCH_NOMATCH);
4763
0
            default: break;
4764
0
            }
4765
0
          }
4766
0
        break;
4767
4768
0
        case OP_VSPACE:
4769
0
        for (i = 1; i <= min; i++)
4770
0
          {
4771
0
          if (eptr >= md->end_subject)
4772
0
            {
4773
0
            SCHECK_PARTIAL();
4774
0
            RRETURN(MATCH_NOMATCH);
4775
0
            }
4776
0
          switch(*eptr++)
4777
0
            {
4778
0
            default: RRETURN(MATCH_NOMATCH);
4779
0
            VSPACE_BYTE_CASES:
4780
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4781
            VSPACE_MULTIBYTE_CASES:
4782
#endif
4783
0
            break;
4784
0
            }
4785
0
          }
4786
0
        break;
4787
4788
0
        case OP_NOT_DIGIT:
4789
0
        for (i = 1; i <= min; i++)
4790
0
          {
4791
0
          if (eptr >= md->end_subject)
4792
0
            {
4793
0
            SCHECK_PARTIAL();
4794
0
            RRETURN(MATCH_NOMATCH);
4795
0
            }
4796
0
          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4797
0
            RRETURN(MATCH_NOMATCH);
4798
0
          eptr++;
4799
0
          }
4800
0
        break;
4801
4802
0
        case OP_DIGIT:
4803
0
        for (i = 1; i <= min; i++)
4804
0
          {
4805
0
          if (eptr >= md->end_subject)
4806
0
            {
4807
0
            SCHECK_PARTIAL();
4808
0
            RRETURN(MATCH_NOMATCH);
4809
0
            }
4810
0
          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4811
0
            RRETURN(MATCH_NOMATCH);
4812
0
          eptr++;
4813
0
          }
4814
0
        break;
4815
4816
0
        case OP_NOT_WHITESPACE:
4817
0
        for (i = 1; i <= min; i++)
4818
0
          {
4819
0
          if (eptr >= md->end_subject)
4820
0
            {
4821
0
            SCHECK_PARTIAL();
4822
0
            RRETURN(MATCH_NOMATCH);
4823
0
            }
4824
0
          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4825
0
            RRETURN(MATCH_NOMATCH);
4826
0
          eptr++;
4827
0
          }
4828
0
        break;
4829
4830
0
        case OP_WHITESPACE:
4831
0
        for (i = 1; i <= min; i++)
4832
0
          {
4833
0
          if (eptr >= md->end_subject)
4834
0
            {
4835
0
            SCHECK_PARTIAL();
4836
0
            RRETURN(MATCH_NOMATCH);
4837
0
            }
4838
0
          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4839
0
            RRETURN(MATCH_NOMATCH);
4840
0
          eptr++;
4841
0
          }
4842
0
        break;
4843
4844
0
        case OP_NOT_WORDCHAR:
4845
0
        for (i = 1; i <= min; i++)
4846
0
          {
4847
0
          if (eptr >= md->end_subject)
4848
0
            {
4849
0
            SCHECK_PARTIAL();
4850
0
            RRETURN(MATCH_NOMATCH);
4851
0
            }
4852
0
          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4853
0
            RRETURN(MATCH_NOMATCH);
4854
0
          eptr++;
4855
0
          }
4856
0
        break;
4857
4858
0
        case OP_WORDCHAR:
4859
0
        for (i = 1; i <= min; i++)
4860
0
          {
4861
0
          if (eptr >= md->end_subject)
4862
0
            {
4863
0
            SCHECK_PARTIAL();
4864
0
            RRETURN(MATCH_NOMATCH);
4865
0
            }
4866
0
          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4867
0
            RRETURN(MATCH_NOMATCH);
4868
0
          eptr++;
4869
0
          }
4870
0
        break;
4871
4872
0
        default:
4873
0
        RRETURN(PCRE_ERROR_INTERNAL);
4874
0
        }
4875
0
      }
4876
4877
    /* If min = max, continue at the same level without recursing */
4878
4879
0
    if (min == max) continue;
4880
4881
    /* If minimizing, we have to test the rest of the pattern before each
4882
    subsequent match. Again, separate the UTF-8 case for speed, and also
4883
    separate the UCP cases. */
4884
4885
0
    if (minimize)
4886
0
      {
4887
#ifdef SUPPORT_UCP
4888
      if (prop_type >= 0)
4889
        {
4890
        switch(prop_type)
4891
          {
4892
          case PT_ANY:
4893
          for (fi = min;; fi++)
4894
            {
4895
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4896
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4897
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4898
            if (eptr >= md->end_subject)
4899
              {
4900
              SCHECK_PARTIAL();
4901
              RRETURN(MATCH_NOMATCH);
4902
              }
4903
            GETCHARINCTEST(c, eptr);
4904
            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4905
            }
4906
          /* Control never gets here */
4907
4908
          case PT_LAMP:
4909
          for (fi = min;; fi++)
4910
            {
4911
            int chartype;
4912
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4913
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4914
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4915
            if (eptr >= md->end_subject)
4916
              {
4917
              SCHECK_PARTIAL();
4918
              RRETURN(MATCH_NOMATCH);
4919
              }
4920
            GETCHARINCTEST(c, eptr);
4921
            chartype = UCD_CHARTYPE(c);
4922
            if ((chartype == ucp_Lu ||
4923
                 chartype == ucp_Ll ||
4924
                 chartype == ucp_Lt) == prop_fail_result)
4925
              RRETURN(MATCH_NOMATCH);
4926
            }
4927
          /* Control never gets here */
4928
4929
          case PT_GC:
4930
          for (fi = min;; fi++)
4931
            {
4932
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4933
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4934
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4935
            if (eptr >= md->end_subject)
4936
              {
4937
              SCHECK_PARTIAL();
4938
              RRETURN(MATCH_NOMATCH);
4939
              }
4940
            GETCHARINCTEST(c, eptr);
4941
            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4942
              RRETURN(MATCH_NOMATCH);
4943
            }
4944
          /* Control never gets here */
4945
4946
          case PT_PC:
4947
          for (fi = min;; fi++)
4948
            {
4949
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4950
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4951
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4952
            if (eptr >= md->end_subject)
4953
              {
4954
              SCHECK_PARTIAL();
4955
              RRETURN(MATCH_NOMATCH);
4956
              }
4957
            GETCHARINCTEST(c, eptr);
4958
            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4959
              RRETURN(MATCH_NOMATCH);
4960
            }
4961
          /* Control never gets here */
4962
4963
          case PT_SC:
4964
          for (fi = min;; fi++)
4965
            {
4966
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4967
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4968
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4969
            if (eptr >= md->end_subject)
4970
              {
4971
              SCHECK_PARTIAL();
4972
              RRETURN(MATCH_NOMATCH);
4973
              }
4974
            GETCHARINCTEST(c, eptr);
4975
            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4976
              RRETURN(MATCH_NOMATCH);
4977
            }
4978
          /* Control never gets here */
4979
4980
          case PT_ALNUM:
4981
          for (fi = min;; fi++)
4982
            {
4983
            int category;
4984
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4985
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4986
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4987
            if (eptr >= md->end_subject)
4988
              {
4989
              SCHECK_PARTIAL();
4990
              RRETURN(MATCH_NOMATCH);
4991
              }
4992
            GETCHARINCTEST(c, eptr);
4993
            category = UCD_CATEGORY(c);
4994
            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4995
              RRETURN(MATCH_NOMATCH);
4996
            }
4997
          /* Control never gets here */
4998
4999
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5000
          which means that Perl space and POSIX space are now identical. PCRE
5001
          was changed at release 8.34. */
5002
5003
          case PT_SPACE:    /* Perl space */
5004
          case PT_PXSPACE:  /* POSIX space */
5005
          for (fi = min;; fi++)
5006
            {
5007
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
5008
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5009
            if (fi >= max) RRETURN(MATCH_NOMATCH);
5010
            if (eptr >= md->end_subject)
5011
              {
5012
              SCHECK_PARTIAL();
5013
              RRETURN(MATCH_NOMATCH);
5014
              }
5015
            GETCHARINCTEST(c, eptr);
5016
            switch(c)
5017
              {
5018
              HSPACE_CASES:
5019
              VSPACE_CASES:
5020
              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
5021
              break;
5022
5023
              default:
5024
              if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5025
                RRETURN(MATCH_NOMATCH);
5026
              break;
5027
              }
5028
            }
5029
          /* Control never gets here */
5030
5031
          case PT_WORD:
5032
          for (fi = min;; fi++)
5033
            {
5034
            int category;
5035
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
5036
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5037
            if (fi >= max) RRETURN(MATCH_NOMATCH);
5038
            if (eptr >= md->end_subject)
5039
              {
5040
              SCHECK_PARTIAL();
5041
              RRETURN(MATCH_NOMATCH);
5042
              }
5043
            GETCHARINCTEST(c, eptr);
5044
            category = UCD_CATEGORY(c);
5045
            if ((category == ucp_L ||
5046
                 category == ucp_N ||
5047
                 c == CHAR_UNDERSCORE)
5048
                   == prop_fail_result)
5049
              RRETURN(MATCH_NOMATCH);
5050
            }
5051
          /* Control never gets here */
5052
5053
          case PT_CLIST:
5054
          for (fi = min;; fi++)
5055
            {
5056
            const pcre_uint32 *cp;
5057
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5058
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5059
            if (fi >= max) RRETURN(MATCH_NOMATCH);
5060
            if (eptr >= md->end_subject)
5061
              {
5062
              SCHECK_PARTIAL();
5063
              RRETURN(MATCH_NOMATCH);
5064
              }
5065
            GETCHARINCTEST(c, eptr);
5066
            cp = PRIV(ucd_caseless_sets) + prop_value;
5067
            for (;;)
5068
              {
5069
              if (c < *cp)
5070
                { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
5071
              if (c == *cp++)
5072
                { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
5073
              }
5074
            }
5075
          /* Control never gets here */
5076
5077
          case PT_UCNC:
5078
          for (fi = min;; fi++)
5079
            {
5080
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
5081
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5082
            if (fi >= max) RRETURN(MATCH_NOMATCH);
5083
            if (eptr >= md->end_subject)
5084
              {
5085
              SCHECK_PARTIAL();
5086
              RRETURN(MATCH_NOMATCH);
5087
              }
5088
            GETCHARINCTEST(c, eptr);
5089
            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5090
                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5091
                 c >= 0xe000) == prop_fail_result)
5092
              RRETURN(MATCH_NOMATCH);
5093
            }
5094
          /* Control never gets here */
5095
5096
          /* This should never occur */
5097
          default:
5098
          RRETURN(PCRE_ERROR_INTERNAL);
5099
          }
5100
        }
5101
5102
      /* Match extended Unicode sequences. We will get here only if the
5103
      support is in the binary; otherwise a compile-time error occurs. */
5104
5105
      else if (ctype == OP_EXTUNI)
5106
        {
5107
        for (fi = min;; fi++)
5108
          {
5109
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5110
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5111
          if (fi >= max) RRETURN(MATCH_NOMATCH);
5112
          if (eptr >= md->end_subject)
5113
            {
5114
            SCHECK_PARTIAL();
5115
            RRETURN(MATCH_NOMATCH);
5116
            }
5117
          else
5118
            {
5119
            int lgb, rgb;
5120
            GETCHARINCTEST(c, eptr);
5121
            lgb = UCD_GRAPHBREAK(c);
5122
            while (eptr < md->end_subject)
5123
              {
5124
              int len = 1;
5125
              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5126
              rgb = UCD_GRAPHBREAK(c);
5127
              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5128
              lgb = rgb;
5129
              eptr += len;
5130
              }
5131
            }
5132
          CHECK_PARTIAL();
5133
          }
5134
        }
5135
      else
5136
#endif     /* SUPPORT_UCP */
5137
5138
#ifdef SUPPORT_UTF
5139
      if (utf)
5140
        {
5141
        for (fi = min;; fi++)
5142
          {
5143
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5144
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5145
          if (fi >= max) RRETURN(MATCH_NOMATCH);
5146
          if (eptr >= md->end_subject)
5147
            {
5148
            SCHECK_PARTIAL();
5149
            RRETURN(MATCH_NOMATCH);
5150
            }
5151
          if (ctype == OP_ANY && IS_NEWLINE(eptr))
5152
            RRETURN(MATCH_NOMATCH);
5153
          GETCHARINC(c, eptr);
5154
          switch(ctype)
5155
            {
5156
            case OP_ANY:               /* This is the non-NL case */
5157
            if (md->partial != 0 &&    /* Take care with CRLF partial */
5158
                eptr >= md->end_subject &&
5159
                NLBLOCK->nltype == NLTYPE_FIXED &&
5160
                NLBLOCK->nllen == 2 &&
5161
                c == NLBLOCK->nl[0])
5162
              {
5163
              md->hitend = TRUE;
5164
              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5165
              }
5166
            break;
5167
5168
            case OP_ALLANY:
5169
            case OP_ANYBYTE:
5170
            break;
5171
5172
            case OP_ANYNL:
5173
            switch(c)
5174
              {
5175
              default: RRETURN(MATCH_NOMATCH);
5176
              case CHAR_CR:
5177
              if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
5178
              break;
5179
5180
              case CHAR_LF:
5181
              break;
5182
5183
              case CHAR_VT:
5184
              case CHAR_FF:
5185
              case CHAR_NEL:
5186
#ifndef EBCDIC
5187
              case 0x2028:
5188
              case 0x2029:
5189
#endif  /* Not EBCDIC */
5190
              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5191
              break;
5192
              }
5193
            break;
5194
5195
            case OP_NOT_HSPACE:
5196
            switch(c)
5197
              {
5198
              HSPACE_CASES: RRETURN(MATCH_NOMATCH);
5199
              default: break;
5200
              }
5201
            break;
5202
5203
            case OP_HSPACE:
5204
            switch(c)
5205
              {
5206
              HSPACE_CASES: break;
5207
              default: RRETURN(MATCH_NOMATCH);
5208
              }
5209
            break;
5210
5211
            case OP_NOT_VSPACE:
5212
            switch(c)
5213
              {
5214
              VSPACE_CASES: RRETURN(MATCH_NOMATCH);
5215
              default: break;
5216
              }
5217
            break;
5218
5219
            case OP_VSPACE:
5220
            switch(c)
5221
              {
5222
              VSPACE_CASES: break;
5223
              default: RRETURN(MATCH_NOMATCH);
5224
              }
5225
            break;
5226
5227
            case OP_NOT_DIGIT:
5228
            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5229
              RRETURN(MATCH_NOMATCH);
5230
            break;
5231
5232
            case OP_DIGIT:
5233
            if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5234
              RRETURN(MATCH_NOMATCH);
5235
            break;
5236
5237
            case OP_NOT_WHITESPACE:
5238
            if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5239
              RRETURN(MATCH_NOMATCH);
5240
            break;
5241
5242
            case OP_WHITESPACE:
5243
            if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5244
              RRETURN(MATCH_NOMATCH);
5245
            break;
5246
5247
            case OP_NOT_WORDCHAR:
5248
            if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5249
              RRETURN(MATCH_NOMATCH);
5250
            break;
5251
5252
            case OP_WORDCHAR:
5253
            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5254
              RRETURN(MATCH_NOMATCH);
5255
            break;
5256
5257
            default:
5258
            RRETURN(PCRE_ERROR_INTERNAL);
5259
            }
5260
          }
5261
        }
5262
      else
5263
#endif
5264
      /* Not UTF mode */
5265
0
        {
5266
0
        for (fi = min;; fi++)
5267
0
          {
5268
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5269
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5270
0
          if (fi >= max) RRETURN(MATCH_NOMATCH);
5271
0
          if (eptr >= md->end_subject)
5272
0
            {
5273
0
            SCHECK_PARTIAL();
5274
0
            RRETURN(MATCH_NOMATCH);
5275
0
            }
5276
0
          if (ctype == OP_ANY && IS_NEWLINE(eptr))
5277
0
            RRETURN(MATCH_NOMATCH);
5278
0
          c = *eptr++;
5279
0
          switch(ctype)
5280
0
            {
5281
0
            case OP_ANY:               /* This is the non-NL case */
5282
0
            if (md->partial != 0 &&    /* Take care with CRLF partial */
5283
0
                eptr >= md->end_subject &&
5284
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
5285
0
                NLBLOCK->nllen == 2 &&
5286
0
                c == NLBLOCK->nl[0])
5287
0
              {
5288
0
              md->hitend = TRUE;
5289
0
              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5290
0
              }
5291
0
            break;
5292
5293
0
            case OP_ALLANY:
5294
0
            case OP_ANYBYTE:
5295
0
            break;
5296
5297
0
            case OP_ANYNL:
5298
0
            switch(c)
5299
0
              {
5300
0
              default: RRETURN(MATCH_NOMATCH);
5301
0
              case CHAR_CR:
5302
0
              if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
5303
0
              break;
5304
5305
0
              case CHAR_LF:
5306
0
              break;
5307
5308
0
              case CHAR_VT:
5309
0
              case CHAR_FF:
5310
0
              case CHAR_NEL:
5311
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5312
              case 0x2028:
5313
              case 0x2029:
5314
#endif
5315
0
              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5316
0
              break;
5317
0
              }
5318
0
            break;
5319
5320
0
            case OP_NOT_HSPACE:
5321
0
            switch(c)
5322
0
              {
5323
0
              default: break;
5324
0
              HSPACE_BYTE_CASES:
5325
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5326
              HSPACE_MULTIBYTE_CASES:
5327
#endif
5328
0
              RRETURN(MATCH_NOMATCH);
5329
0
              }
5330
0
            break;
5331
5332
0
            case OP_HSPACE:
5333
0
            switch(c)
5334
0
              {
5335
0
              default: RRETURN(MATCH_NOMATCH);
5336
0
              HSPACE_BYTE_CASES:
5337
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5338
              HSPACE_MULTIBYTE_CASES:
5339
#endif
5340
0
              break;
5341
0
              }
5342
0
            break;
5343
5344
0
            case OP_NOT_VSPACE:
5345
0
            switch(c)
5346
0
              {
5347
0
              default: break;
5348
0
              VSPACE_BYTE_CASES:
5349
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5350
              VSPACE_MULTIBYTE_CASES:
5351
#endif
5352
0
              RRETURN(MATCH_NOMATCH);
5353
0
              }
5354
0
            break;
5355
5356
0
            case OP_VSPACE:
5357
0
            switch(c)
5358
0
              {
5359
0
              default: RRETURN(MATCH_NOMATCH);
5360
0
              VSPACE_BYTE_CASES:
5361
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5362
              VSPACE_MULTIBYTE_CASES:
5363
#endif
5364
0
              break;
5365
0
              }
5366
0
            break;
5367
5368
0
            case OP_NOT_DIGIT:
5369
0
            if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5370
0
            break;
5371
5372
0
            case OP_DIGIT:
5373
0
            if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5374
0
            break;
5375
5376
0
            case OP_NOT_WHITESPACE:
5377
0
            if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5378
0
            break;
5379
5380
0
            case OP_WHITESPACE:
5381
0
            if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5382
0
            break;
5383
5384
0
            case OP_NOT_WORDCHAR:
5385
0
            if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5386
0
            break;
5387
5388
0
            case OP_WORDCHAR:
5389
0
            if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5390
0
            break;
5391
5392
0
            default:
5393
0
            RRETURN(PCRE_ERROR_INTERNAL);
5394
0
            }
5395
0
          }
5396
0
        }
5397
      /* Control never gets here */
5398
0
      }
5399
5400
    /* If maximizing, it is worth using inline code for speed, doing the type
5401
    test once at the start (i.e. keep it out of the loop). Again, keep the
5402
    UTF-8 and UCP stuff separate. */
5403
5404
0
    else
5405
0
      {
5406
0
      pp = eptr;  /* Remember where we started */
5407
5408
#ifdef SUPPORT_UCP
5409
      if (prop_type >= 0)
5410
        {
5411
        switch(prop_type)
5412
          {
5413
          case PT_ANY:
5414
          for (i = min; i < max; i++)
5415
            {
5416
            int len = 1;
5417
            if (eptr >= md->end_subject)
5418
              {
5419
              SCHECK_PARTIAL();
5420
              break;
5421
              }
5422
            GETCHARLENTEST(c, eptr, len);
5423
            if (prop_fail_result) break;
5424
            eptr+= len;
5425
            }
5426
          break;
5427
5428
          case PT_LAMP:
5429
          for (i = min; i < max; i++)
5430
            {
5431
            int chartype;
5432
            int len = 1;
5433
            if (eptr >= md->end_subject)
5434
              {
5435
              SCHECK_PARTIAL();
5436
              break;
5437
              }
5438
            GETCHARLENTEST(c, eptr, len);
5439
            chartype = UCD_CHARTYPE(c);
5440
            if ((chartype == ucp_Lu ||
5441
                 chartype == ucp_Ll ||
5442
                 chartype == ucp_Lt) == prop_fail_result)
5443
              break;
5444
            eptr+= len;
5445
            }
5446
          break;
5447
5448
          case PT_GC:
5449
          for (i = min; i < max; i++)
5450
            {
5451
            int len = 1;
5452
            if (eptr >= md->end_subject)
5453
              {
5454
              SCHECK_PARTIAL();
5455
              break;
5456
              }
5457
            GETCHARLENTEST(c, eptr, len);
5458
            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5459
            eptr+= len;
5460
            }
5461
          break;
5462
5463
          case PT_PC:
5464
          for (i = min; i < max; i++)
5465
            {
5466
            int len = 1;
5467
            if (eptr >= md->end_subject)
5468
              {
5469
              SCHECK_PARTIAL();
5470
              break;
5471
              }
5472
            GETCHARLENTEST(c, eptr, len);
5473
            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5474
            eptr+= len;
5475
            }
5476
          break;
5477
5478
          case PT_SC:
5479
          for (i = min; i < max; i++)
5480
            {
5481
            int len = 1;
5482
            if (eptr >= md->end_subject)
5483
              {
5484
              SCHECK_PARTIAL();
5485
              break;
5486
              }
5487
            GETCHARLENTEST(c, eptr, len);
5488
            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5489
            eptr+= len;
5490
            }
5491
          break;
5492
5493
          case PT_ALNUM:
5494
          for (i = min; i < max; i++)
5495
            {
5496
            int category;
5497
            int len = 1;
5498
            if (eptr >= md->end_subject)
5499
              {
5500
              SCHECK_PARTIAL();
5501
              break;
5502
              }
5503
            GETCHARLENTEST(c, eptr, len);
5504
            category = UCD_CATEGORY(c);
5505
            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5506
              break;
5507
            eptr+= len;
5508
            }
5509
          break;
5510
5511
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5512
          which means that Perl space and POSIX space are now identical. PCRE
5513
          was changed at release 8.34. */
5514
5515
          case PT_SPACE:    /* Perl space */
5516
          case PT_PXSPACE:  /* POSIX space */
5517
          for (i = min; i < max; i++)
5518
            {
5519
            int len = 1;
5520
            if (eptr >= md->end_subject)
5521
              {
5522
              SCHECK_PARTIAL();
5523
              break;
5524
              }
5525
            GETCHARLENTEST(c, eptr, len);
5526
            switch(c)
5527
              {
5528
              HSPACE_CASES:
5529
              VSPACE_CASES:
5530
              if (prop_fail_result) goto ENDLOOP99;  /* Break the loop */
5531
              break;
5532
5533
              default:
5534
              if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5535
                goto ENDLOOP99;   /* Break the loop */
5536
              break;
5537
              }
5538
            eptr+= len;
5539
            }
5540
          ENDLOOP99:
5541
          break;
5542
5543
          case PT_WORD:
5544
          for (i = min; i < max; i++)
5545
            {
5546
            int category;
5547
            int len = 1;
5548
            if (eptr >= md->end_subject)
5549
              {
5550
              SCHECK_PARTIAL();
5551
              break;
5552
              }
5553
            GETCHARLENTEST(c, eptr, len);
5554
            category = UCD_CATEGORY(c);
5555
            if ((category == ucp_L || category == ucp_N ||
5556
                 c == CHAR_UNDERSCORE) == prop_fail_result)
5557
              break;
5558
            eptr+= len;
5559
            }
5560
          break;
5561
5562
          case PT_CLIST:
5563
          for (i = min; i < max; i++)
5564
            {
5565
            const pcre_uint32 *cp;
5566
            int len = 1;
5567
            if (eptr >= md->end_subject)
5568
              {
5569
              SCHECK_PARTIAL();
5570
              break;
5571
              }
5572
            GETCHARLENTEST(c, eptr, len);
5573
            cp = PRIV(ucd_caseless_sets) + prop_value;
5574
            for (;;)
5575
              {
5576
              if (c < *cp)
5577
                { if (prop_fail_result) break; else goto GOT_MAX; }
5578
              if (c == *cp++)
5579
                { if (prop_fail_result) goto GOT_MAX; else break; }
5580
              }
5581
            eptr += len;
5582
            }
5583
          GOT_MAX:
5584
          break;
5585
5586
          case PT_UCNC:
5587
          for (i = min; i < max; i++)
5588
            {
5589
            int len = 1;
5590
            if (eptr >= md->end_subject)
5591
              {
5592
              SCHECK_PARTIAL();
5593
              break;
5594
              }
5595
            GETCHARLENTEST(c, eptr, len);
5596
            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5597
                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5598
                 c >= 0xe000) == prop_fail_result)
5599
              break;
5600
            eptr += len;
5601
            }
5602
          break;
5603
5604
          default:
5605
          RRETURN(PCRE_ERROR_INTERNAL);
5606
          }
5607
5608
        /* eptr is now past the end of the maximum run */
5609
5610
        if (possessive) continue;    /* No backtracking */
5611
        for(;;)
5612
          {
5613
          if (eptr <= pp) goto TAIL_RECURSE;
5614
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5615
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5616
          eptr--;
5617
          if (utf) BACKCHAR(eptr);
5618
          }
5619
        }
5620
5621
      /* Match extended Unicode grapheme clusters. We will get here only if the
5622
      support is in the binary; otherwise a compile-time error occurs. */
5623
5624
      else if (ctype == OP_EXTUNI)
5625
        {
5626
        for (i = min; i < max; i++)
5627
          {
5628
          if (eptr >= md->end_subject)
5629
            {
5630
            SCHECK_PARTIAL();
5631
            break;
5632
            }
5633
          else
5634
            {
5635
            int lgb, rgb;
5636
            GETCHARINCTEST(c, eptr);
5637
            lgb = UCD_GRAPHBREAK(c);
5638
            while (eptr < md->end_subject)
5639
              {
5640
              int len = 1;
5641
              if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5642
              rgb = UCD_GRAPHBREAK(c);
5643
              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5644
              lgb = rgb;
5645
              eptr += len;
5646
              }
5647
            }
5648
          CHECK_PARTIAL();
5649
          }
5650
5651
        /* eptr is now past the end of the maximum run */
5652
5653
        if (possessive) continue;    /* No backtracking */
5654
5655
        /* We use <= pp rather than == pp to detect the start of the run while
5656
        backtracking because the use of \C in UTF mode can cause BACKCHAR to
5657
        move back past pp. This is just palliative; the use of \C in UTF mode
5658
        is fraught with danger. */
5659
5660
        for(;;)
5661
          {
5662
          int lgb, rgb;
5663
          PCRE_PUCHAR fptr;
5664
5665
          if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
5666
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5667
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5668
5669
          /* Backtracking over an extended grapheme cluster involves inspecting
5670
          the previous two characters (if present) to see if a break is
5671
          permitted between them. */
5672
5673
          eptr--;
5674
          if (!utf) c = *eptr; else
5675
            {
5676
            BACKCHAR(eptr);
5677
            GETCHAR(c, eptr);
5678
            }
5679
          rgb = UCD_GRAPHBREAK(c);
5680
5681
          for (;;)
5682
            {
5683
            if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
5684
            fptr = eptr - 1;
5685
            if (!utf) c = *fptr; else
5686
              {
5687
              BACKCHAR(fptr);
5688
              GETCHAR(c, fptr);
5689
              }
5690
            lgb = UCD_GRAPHBREAK(c);
5691
            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5692
            eptr = fptr;
5693
            rgb = lgb;
5694
            }
5695
          }
5696
        }
5697
5698
      else
5699
#endif   /* SUPPORT_UCP */
5700
5701
#ifdef SUPPORT_UTF
5702
      if (utf)
5703
        {
5704
        switch(ctype)
5705
          {
5706
          case OP_ANY:
5707
          for (i = min; i < max; i++)
5708
            {
5709
            if (eptr >= md->end_subject)
5710
              {
5711
              SCHECK_PARTIAL();
5712
              break;
5713
              }
5714
            if (IS_NEWLINE(eptr)) break;
5715
            if (md->partial != 0 &&    /* Take care with CRLF partial */
5716
                eptr + 1 >= md->end_subject &&
5717
                NLBLOCK->nltype == NLTYPE_FIXED &&
5718
                NLBLOCK->nllen == 2 &&
5719
                UCHAR21(eptr) == NLBLOCK->nl[0])
5720
              {
5721
              md->hitend = TRUE;
5722
              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5723
              }
5724
            eptr++;
5725
            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5726
            }
5727
          break;
5728
5729
          case OP_ALLANY:
5730
          if (max < INT_MAX)
5731
            {
5732
            for (i = min; i < max; i++)
5733
              {
5734
              if (eptr >= md->end_subject)
5735
                {
5736
                SCHECK_PARTIAL();
5737
                break;
5738
                }
5739
              eptr++;
5740
              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5741
              }
5742
            }
5743
          else
5744
            {
5745
            eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5746
            SCHECK_PARTIAL();
5747
            }
5748
          break;
5749
5750
          /* The byte case is the same as non-UTF8 */
5751
5752
          case OP_ANYBYTE:
5753
          c = max - min;
5754
          if (c > (unsigned int)(md->end_subject - eptr))
5755
            {
5756
            eptr = md->end_subject;
5757
            SCHECK_PARTIAL();
5758
            }
5759
          else eptr += c;
5760
          break;
5761
5762
          case OP_ANYNL:
5763
          for (i = min; i < max; i++)
5764
            {
5765
            int len = 1;
5766
            if (eptr >= md->end_subject)
5767
              {
5768
              SCHECK_PARTIAL();
5769
              break;
5770
              }
5771
            GETCHARLEN(c, eptr, len);
5772
            if (c == CHAR_CR)
5773
              {
5774
              if (++eptr >= md->end_subject) break;
5775
              if (UCHAR21(eptr) == CHAR_LF) eptr++;
5776
              }
5777
            else
5778
              {
5779
              if (c != CHAR_LF &&
5780
                  (md->bsr_anycrlf ||
5781
                   (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5782
#ifndef EBCDIC
5783
                    && c != 0x2028 && c != 0x2029
5784
#endif  /* Not EBCDIC */
5785
                    )))
5786
                break;
5787
              eptr += len;
5788
              }
5789
            }
5790
          break;
5791
5792
          case OP_NOT_HSPACE:
5793
          case OP_HSPACE:
5794
          for (i = min; i < max; i++)
5795
            {
5796
            BOOL gotspace;
5797
            int len = 1;
5798
            if (eptr >= md->end_subject)
5799
              {
5800
              SCHECK_PARTIAL();
5801
              break;
5802
              }
5803
            GETCHARLEN(c, eptr, len);
5804
            switch(c)
5805
              {
5806
              HSPACE_CASES: gotspace = TRUE; break;
5807
              default: gotspace = FALSE; break;
5808
              }
5809
            if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5810
            eptr += len;
5811
            }
5812
          break;
5813
5814
          case OP_NOT_VSPACE:
5815
          case OP_VSPACE:
5816
          for (i = min; i < max; i++)
5817
            {
5818
            BOOL gotspace;
5819
            int len = 1;
5820
            if (eptr >= md->end_subject)
5821
              {
5822
              SCHECK_PARTIAL();
5823
              break;
5824
              }
5825
            GETCHARLEN(c, eptr, len);
5826
            switch(c)
5827
              {
5828
              VSPACE_CASES: gotspace = TRUE; break;
5829
              default: gotspace = FALSE; break;
5830
              }
5831
            if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5832
            eptr += len;
5833
            }
5834
          break;
5835
5836
          case OP_NOT_DIGIT:
5837
          for (i = min; i < max; i++)
5838
            {
5839
            int len = 1;
5840
            if (eptr >= md->end_subject)
5841
              {
5842
              SCHECK_PARTIAL();
5843
              break;
5844
              }
5845
            GETCHARLEN(c, eptr, len);
5846
            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5847
            eptr+= len;
5848
            }
5849
          break;
5850
5851
          case OP_DIGIT:
5852
          for (i = min; i < max; i++)
5853
            {
5854
            int len = 1;
5855
            if (eptr >= md->end_subject)
5856
              {
5857
              SCHECK_PARTIAL();
5858
              break;
5859
              }
5860
            GETCHARLEN(c, eptr, len);
5861
            if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5862
            eptr+= len;
5863
            }
5864
          break;
5865
5866
          case OP_NOT_WHITESPACE:
5867
          for (i = min; i < max; i++)
5868
            {
5869
            int len = 1;
5870
            if (eptr >= md->end_subject)
5871
              {
5872
              SCHECK_PARTIAL();
5873
              break;
5874
              }
5875
            GETCHARLEN(c, eptr, len);
5876
            if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5877
            eptr+= len;
5878
            }
5879
          break;
5880
5881
          case OP_WHITESPACE:
5882
          for (i = min; i < max; i++)
5883
            {
5884
            int len = 1;
5885
            if (eptr >= md->end_subject)
5886
              {
5887
              SCHECK_PARTIAL();
5888
              break;
5889
              }
5890
            GETCHARLEN(c, eptr, len);
5891
            if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5892
            eptr+= len;
5893
            }
5894
          break;
5895
5896
          case OP_NOT_WORDCHAR:
5897
          for (i = min; i < max; i++)
5898
            {
5899
            int len = 1;
5900
            if (eptr >= md->end_subject)
5901
              {
5902
              SCHECK_PARTIAL();
5903
              break;
5904
              }
5905
            GETCHARLEN(c, eptr, len);
5906
            if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5907
            eptr+= len;
5908
            }
5909
          break;
5910
5911
          case OP_WORDCHAR:
5912
          for (i = min; i < max; i++)
5913
            {
5914
            int len = 1;
5915
            if (eptr >= md->end_subject)
5916
              {
5917
              SCHECK_PARTIAL();
5918
              break;
5919
              }
5920
            GETCHARLEN(c, eptr, len);
5921
            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5922
            eptr+= len;
5923
            }
5924
          break;
5925
5926
          default:
5927
          RRETURN(PCRE_ERROR_INTERNAL);
5928
          }
5929
5930
        if (possessive) continue;    /* No backtracking */
5931
        for(;;)
5932
          {
5933
          if (eptr <= pp) goto TAIL_RECURSE;
5934
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5935
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5936
          eptr--;
5937
          BACKCHAR(eptr);
5938
          if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
5939
              UCHAR21(eptr - 1) == CHAR_CR) eptr--;
5940
          }
5941
        }
5942
      else
5943
#endif  /* SUPPORT_UTF */
5944
      /* Not UTF mode */
5945
0
        {
5946
0
        switch(ctype)
5947
0
          {
5948
0
          case OP_ANY:
5949
0
          for (i = min; i < max; i++)
5950
0
            {
5951
0
            if (eptr >= md->end_subject)
5952
0
              {
5953
0
              SCHECK_PARTIAL();
5954
0
              break;
5955
0
              }
5956
0
            if (IS_NEWLINE(eptr)) break;
5957
0
            if (md->partial != 0 &&    /* Take care with CRLF partial */
5958
0
                eptr + 1 >= md->end_subject &&
5959
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
5960
0
                NLBLOCK->nllen == 2 &&
5961
0
                *eptr == NLBLOCK->nl[0])
5962
0
              {
5963
0
              md->hitend = TRUE;
5964
0
              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5965
0
              }
5966
0
            eptr++;
5967
0
            }
5968
0
          break;
5969
5970
0
          case OP_ALLANY:
5971
0
          case OP_ANYBYTE:
5972
0
          c = max - min;
5973
0
          if (c > (unsigned int)(md->end_subject - eptr))
5974
0
            {
5975
0
            eptr = md->end_subject;
5976
0
            SCHECK_PARTIAL();
5977
0
            }
5978
0
          else eptr += c;
5979
0
          break;
5980
5981
0
          case OP_ANYNL:
5982
0
          for (i = min; i < max; i++)
5983
0
            {
5984
0
            if (eptr >= md->end_subject)
5985
0
              {
5986
0
              SCHECK_PARTIAL();
5987
0
              break;
5988
0
              }
5989
0
            c = *eptr;
5990
0
            if (c == CHAR_CR)
5991
0
              {
5992
0
              if (++eptr >= md->end_subject) break;
5993
0
              if (*eptr == CHAR_LF) eptr++;
5994
0
              }
5995
0
            else
5996
0
              {
5997
0
              if (c != CHAR_LF && (md->bsr_anycrlf ||
5998
0
                 (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5999
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6000
                 && c != 0x2028 && c != 0x2029
6001
#endif
6002
0
                 ))) break;
6003
0
              eptr++;
6004
0
              }
6005
0
            }
6006
0
          break;
6007
6008
0
          case OP_NOT_HSPACE:
6009
0
          for (i = min; i < max; i++)
6010
0
            {
6011
0
            if (eptr >= md->end_subject)
6012
0
              {
6013
0
              SCHECK_PARTIAL();
6014
0
              break;
6015
0
              }
6016
0
            switch(*eptr)
6017
0
              {
6018
0
              default: eptr++; break;
6019
0
              HSPACE_BYTE_CASES:
6020
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6021
              HSPACE_MULTIBYTE_CASES:
6022
#endif
6023
0
              goto ENDLOOP00;
6024
0
              }
6025
0
            }
6026
0
          ENDLOOP00:
6027
0
          break;
6028
6029
0
          case OP_HSPACE:
6030
0
          for (i = min; i < max; i++)
6031
0
            {
6032
0
            if (eptr >= md->end_subject)
6033
0
              {
6034
0
              SCHECK_PARTIAL();
6035
0
              break;
6036
0
              }
6037
0
            switch(*eptr)
6038
0
              {
6039
0
              default: goto ENDLOOP01;
6040
0
              HSPACE_BYTE_CASES:
6041
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6042
              HSPACE_MULTIBYTE_CASES:
6043
#endif
6044
0
              eptr++; break;
6045
0
              }
6046
0
            }
6047
0
          ENDLOOP01:
6048
0
          break;
6049
6050
0
          case OP_NOT_VSPACE:
6051
0
          for (i = min; i < max; i++)
6052
0
            {
6053
0
            if (eptr >= md->end_subject)
6054
0
              {
6055
0
              SCHECK_PARTIAL();
6056
0
              break;
6057
0
              }
6058
0
            switch(*eptr)
6059
0
              {
6060
0
              default: eptr++; break;
6061
0
              VSPACE_BYTE_CASES:
6062
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6063
              VSPACE_MULTIBYTE_CASES:
6064
#endif
6065
0
              goto ENDLOOP02;
6066
0
              }
6067
0
            }
6068
0
          ENDLOOP02:
6069
0
          break;
6070
6071
0
          case OP_VSPACE:
6072
0
          for (i = min; i < max; i++)
6073
0
            {
6074
0
            if (eptr >= md->end_subject)
6075
0
              {
6076
0
              SCHECK_PARTIAL();
6077
0
              break;
6078
0
              }
6079
0
            switch(*eptr)
6080
0
              {
6081
0
              default: goto ENDLOOP03;
6082
0
              VSPACE_BYTE_CASES:
6083
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6084
              VSPACE_MULTIBYTE_CASES:
6085
#endif
6086
0
              eptr++; break;
6087
0
              }
6088
0
            }
6089
0
          ENDLOOP03:
6090
0
          break;
6091
6092
0
          case OP_NOT_DIGIT:
6093
0
          for (i = min; i < max; i++)
6094
0
            {
6095
0
            if (eptr >= md->end_subject)
6096
0
              {
6097
0
              SCHECK_PARTIAL();
6098
0
              break;
6099
0
              }
6100
0
            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6101
0
            eptr++;
6102
0
            }
6103
0
          break;
6104
6105
0
          case OP_DIGIT:
6106
0
          for (i = min; i < max; i++)
6107
0
            {
6108
0
            if (eptr >= md->end_subject)
6109
0
              {
6110
0
              SCHECK_PARTIAL();
6111
0
              break;
6112
0
              }
6113
0
            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6114
0
            eptr++;
6115
0
            }
6116
0
          break;
6117
6118
0
          case OP_NOT_WHITESPACE:
6119
0
          for (i = min; i < max; i++)
6120
0
            {
6121
0
            if (eptr >= md->end_subject)
6122
0
              {
6123
0
              SCHECK_PARTIAL();
6124
0
              break;
6125
0
              }
6126
0
            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6127
0
            eptr++;
6128
0
            }
6129
0
          break;
6130
6131
0
          case OP_WHITESPACE:
6132
0
          for (i = min; i < max; i++)
6133
0
            {
6134
0
            if (eptr >= md->end_subject)
6135
0
              {
6136
0
              SCHECK_PARTIAL();
6137
0
              break;
6138
0
              }
6139
0
            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6140
0
            eptr++;
6141
0
            }
6142
0
          break;
6143
6144
0
          case OP_NOT_WORDCHAR:
6145
0
          for (i = min; i < max; i++)
6146
0
            {
6147
0
            if (eptr >= md->end_subject)
6148
0
              {
6149
0
              SCHECK_PARTIAL();
6150
0
              break;
6151
0
              }
6152
0
            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6153
0
            eptr++;
6154
0
            }
6155
0
          break;
6156
6157
0
          case OP_WORDCHAR:
6158
0
          for (i = min; i < max; i++)
6159
0
            {
6160
0
            if (eptr >= md->end_subject)
6161
0
              {
6162
0
              SCHECK_PARTIAL();
6163
0
              break;
6164
0
              }
6165
0
            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6166
0
            eptr++;
6167
0
            }
6168
0
          break;
6169
6170
0
          default:
6171
0
          RRETURN(PCRE_ERROR_INTERNAL);
6172
0
          }
6173
6174
0
        if (possessive) continue;    /* No backtracking */
6175
0
        for (;;)
6176
0
          {
6177
0
          if (eptr == pp) goto TAIL_RECURSE;
6178
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6179
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6180
0
          eptr--;
6181
0
          if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
6182
0
              eptr[-1] == CHAR_CR) eptr--;
6183
0
          }
6184
0
        }
6185
6186
      /* Control never gets here */
6187
0
      }
6188
6189
    /* There's been some horrible disaster. Arrival here can only mean there is
6190
    something seriously wrong in the code above or the OP_xxx definitions. */
6191
6192
0
    default:
6193
0
    DPRINTF(("Unknown opcode %d\n", *ecode));
6194
0
    RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6195
0
    }
6196
6197
  /* Do not stick any code in here without much thought; it is assumed
6198
  that "continue" in the code above comes out to here to repeat the main
6199
  loop. */
6200
6201
0
  }             /* End of main loop */
6202
/* Control never reaches here */
6203
6204
6205
/* When compiling to use the heap rather than the stack for recursive calls to
6206
match(), the RRETURN() macro jumps here. The number that is saved in
6207
frame->Xwhere indicates which label we actually want to return to. */
6208
6209
0
#ifdef NO_RECURSE
6210
0
#define LBL(val) case val: goto L_RM##val;
6211
0
HEAP_RETURN:
6212
0
switch (frame->Xwhere)
6213
0
  {
6214
0
  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6215
0
  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6216
0
  LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6217
0
  LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6218
0
  LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6219
0
  LBL(65) LBL(66)
6220
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6221
  LBL(20) LBL(21)
6222
#endif
6223
#ifdef SUPPORT_UTF
6224
  LBL(16) LBL(18)
6225
  LBL(22) LBL(23) LBL(28) LBL(30)
6226
  LBL(32) LBL(34) LBL(42) LBL(46)
6227
#ifdef SUPPORT_UCP
6228
  LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6229
  LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
6230
#endif  /* SUPPORT_UCP */
6231
#endif  /* SUPPORT_UTF */
6232
0
  default:
6233
0
  DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6234
0
  return PCRE_ERROR_INTERNAL;
6235
0
  }
6236
0
#undef LBL
6237
0
#endif  /* NO_RECURSE */
6238
0
}
6239
6240
6241
/***************************************************************************
6242
****************************************************************************
6243
                   RECURSION IN THE match() FUNCTION
6244
6245
Undefine all the macros that were defined above to handle this. */
6246
6247
#ifdef NO_RECURSE
6248
#undef eptr
6249
#undef ecode
6250
#undef mstart
6251
#undef offset_top
6252
#undef eptrb
6253
#undef flags
6254
6255
#undef callpat
6256
#undef charptr
6257
#undef data
6258
#undef next
6259
#undef pp
6260
#undef prev
6261
#undef saved_eptr
6262
6263
#undef new_recursive
6264
6265
#undef cur_is_word
6266
#undef condition
6267
#undef prev_is_word
6268
6269
#undef ctype
6270
#undef length
6271
#undef max
6272
#undef min
6273
#undef number
6274
#undef offset
6275
#undef op
6276
#undef save_capture_last
6277
#undef save_offset1
6278
#undef save_offset2
6279
#undef save_offset3
6280
#undef stacksave
6281
6282
#undef newptrb
6283
6284
#endif
6285
6286
/* These two are defined as macros in both cases */
6287
6288
#undef fc
6289
#undef fi
6290
6291
/***************************************************************************
6292
***************************************************************************/
6293
6294
6295
#ifdef NO_RECURSE
6296
/*************************************************
6297
*          Release allocated heap frames         *
6298
*************************************************/
6299
6300
/* This function releases all the allocated frames. The base frame is on the
6301
machine stack, and so must not be freed.
6302
6303
Argument: the address of the base frame
6304
Returns:  nothing
6305
*/
6306
6307
static void
6308
release_match_heapframes (heapframe *frame_base)
6309
28.4k
{
6310
28.4k
heapframe *nextframe = frame_base->Xnextframe;
6311
28.4k
while (nextframe != NULL)
6312
0
  {
6313
0
  heapframe *oldframe = nextframe;
6314
0
  nextframe = nextframe->Xnextframe;
6315
0
  (PUBL(stack_free))(oldframe);
6316
0
  }
6317
28.4k
}
6318
#endif
6319
6320
6321
/*************************************************
6322
*         Execute a Regular Expression           *
6323
*************************************************/
6324
6325
/* This function applies a compiled re to a subject string and picks out
6326
portions of the string if it matches. Two elements in the vector are set for
6327
each substring: the offsets to the start and end of the substring.
6328
6329
Arguments:
6330
  argument_re     points to the compiled expression
6331
  extra_data      points to extra data or is NULL
6332
  subject         points to the subject string
6333
  length          length of subject string (may contain binary zeros)
6334
  start_offset    where to start in the subject string
6335
  options         option bits
6336
  offsets         points to a vector of ints to be filled in with offsets
6337
  offsetcount     the number of elements in the vector
6338
6339
Returns:          > 0 => success; value is the number of elements filled in
6340
                  = 0 => success, but offsets is not big enough
6341
                   -1 => failed to match
6342
                 < -1 => some kind of unexpected problem
6343
*/
6344
6345
#if defined COMPILE_PCRE8
6346
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6347
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6348
  PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6349
  int offsetcount)
6350
#elif defined COMPILE_PCRE16
6351
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6352
pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6353
  PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6354
  int offsetcount)
6355
#elif defined COMPILE_PCRE32
6356
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6357
pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
6358
  PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
6359
  int offsetcount)
6360
#endif
6361
28.4k
{
6362
28.4k
int rc, ocount, arg_offset_max;
6363
28.4k
int newline;
6364
28.4k
BOOL using_temporary_offsets = FALSE;
6365
28.4k
BOOL anchored;
6366
28.4k
BOOL startline;
6367
28.4k
BOOL firstline;
6368
28.4k
BOOL utf;
6369
28.4k
BOOL has_first_char = FALSE;
6370
28.4k
BOOL has_req_char = FALSE;
6371
28.4k
pcre_uchar first_char = 0;
6372
28.4k
pcre_uchar first_char2 = 0;
6373
28.4k
pcre_uchar req_char = 0;
6374
28.4k
pcre_uchar req_char2 = 0;
6375
28.4k
match_data match_block;
6376
28.4k
match_data *md = &match_block;
6377
28.4k
const pcre_uint8 *tables;
6378
28.4k
const pcre_uint8 *start_bits = NULL;
6379
28.4k
PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6380
28.4k
PCRE_PUCHAR end_subject;
6381
28.4k
PCRE_PUCHAR start_partial = NULL;
6382
28.4k
PCRE_PUCHAR match_partial = NULL;
6383
28.4k
PCRE_PUCHAR req_char_ptr = start_match - 1;
6384
6385
28.4k
const pcre_study_data *study;
6386
28.4k
const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6387
6388
28.4k
#ifdef NO_RECURSE
6389
28.4k
heapframe frame_zero;
6390
28.4k
frame_zero.Xprevframe = NULL;            /* Marks the top level */
6391
28.4k
frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6392
28.4k
md->match_frames_base = &frame_zero;
6393
28.4k
#endif
6394
6395
/* Check for the special magic call that measures the size of the stack used
6396
per recursive call of match(). Without the funny casting for sizeof, a Windows
6397
compiler gave this error: "unary minus operator applied to unsigned type,
6398
result still unsigned". Hopefully the cast fixes that. */
6399
6400
28.4k
if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6401
28.4k
    start_offset == -999)
6402
0
#ifdef NO_RECURSE
6403
0
  return -((int)sizeof(heapframe));
6404
#else
6405
  return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6406
#endif
6407
6408
/* Plausibility checks */
6409
6410
28.4k
if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6411
28.4k
if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6412
0
  return PCRE_ERROR_NULL;
6413
28.4k
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6414
28.4k
if (length < 0) return PCRE_ERROR_BADLENGTH;
6415
28.4k
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6416
6417
/* Check that the first field in the block is the magic number. If it is not,
6418
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6419
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6420
means that the pattern is likely compiled with different endianness. */
6421
6422
28.4k
if (re->magic_number != MAGIC_NUMBER)
6423
0
  return re->magic_number == REVERSED_MAGIC_NUMBER?
6424
0
    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6425
28.4k
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6426
6427
/* These two settings are used in the code for checking a UTF-8 string that
6428
follows immediately afterwards. Other values in the md block are used only
6429
during "normal" pcre_exec() processing, not when the JIT support is in use,
6430
so they are set up later. */
6431
6432
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
6433
28.4k
utf = md->utf = (re->options & PCRE_UTF8) != 0;
6434
28.4k
md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6435
28.4k
              ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6436
6437
/* Check a UTF-8 string if required. Pass back the character offset and error
6438
code for an invalid string if a results vector is available. */
6439
6440
#ifdef SUPPORT_UTF
6441
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6442
  {
6443
  int erroroffset;
6444
  int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6445
  if (errorcode != 0)
6446
    {
6447
    if (offsetcount >= 2)
6448
      {
6449
      offsets[0] = erroroffset;
6450
      offsets[1] = errorcode;
6451
      }
6452
#if defined COMPILE_PCRE8
6453
    return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6454
      PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6455
#elif defined COMPILE_PCRE16
6456
    return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6457
      PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6458
#elif defined COMPILE_PCRE32
6459
    return PCRE_ERROR_BADUTF32;
6460
#endif
6461
    }
6462
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6463
  /* Check that a start_offset points to the start of a UTF character. */
6464
  if (start_offset > 0 && start_offset < length &&
6465
      NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6466
    return PCRE_ERROR_BADUTF8_OFFSET;
6467
#endif
6468
  }
6469
#endif
6470
6471
/* If the pattern was successfully studied with JIT support, run the JIT
6472
executable instead of the rest of this function. Most options must be set at
6473
compile time for the JIT code to be usable. Fallback to the normal code path if
6474
an unsupported flag is set. */
6475
6476
#ifdef SUPPORT_JIT
6477
if (extra_data != NULL
6478
    && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6479
                             PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6480
    && extra_data->executable_jit != NULL
6481
    && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
6482
  {
6483
  rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6484
       start_offset, options, offsets, offsetcount);
6485
6486
  /* PCRE_ERROR_NULL means that the selected normal or partial matching
6487
  mode is not compiled. In this case we simply fallback to interpreter. */
6488
6489
  if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6490
  }
6491
#endif
6492
6493
/* Carry on with non-JIT matching. This information is for finding all the
6494
numbers associated with a given name, for condition testing. */
6495
6496
28.4k
md->name_table = (pcre_uchar *)re + re->name_table_offset;
6497
28.4k
md->name_count = re->name_count;
6498
28.4k
md->name_entry_size = re->name_entry_size;
6499
6500
/* Fish out the optional data from the extra_data structure, first setting
6501
the default values. */
6502
6503
28.4k
study = NULL;
6504
28.4k
md->match_limit = MATCH_LIMIT;
6505
28.4k
md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6506
28.4k
md->callout_data = NULL;
6507
6508
/* The table pointer is always in native byte order. */
6509
6510
28.4k
tables = re->tables;
6511
6512
/* The two limit values override the defaults, whatever their value. */
6513
6514
28.4k
if (extra_data != NULL)
6515
0
  {
6516
0
  unsigned long int flags = extra_data->flags;
6517
0
  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6518
0
    study = (const pcre_study_data *)extra_data->study_data;
6519
0
  if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6520
0
    md->match_limit = extra_data->match_limit;
6521
0
  if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6522
0
    md->match_limit_recursion = extra_data->match_limit_recursion;
6523
0
  if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6524
0
    md->callout_data = extra_data->callout_data;
6525
0
  if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6526
0
  }
6527
6528
/* Limits in the regex override only if they are smaller. */
6529
6530
28.4k
if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
6531
0
  md->match_limit = re->limit_match;
6532
6533
28.4k
if ((re->flags & PCRE_RLSET) != 0 &&
6534
28.4k
    re->limit_recursion < md->match_limit_recursion)
6535
0
  md->match_limit_recursion = re->limit_recursion;
6536
6537
/* If the exec call supplied NULL for tables, use the inbuilt ones. This
6538
is a feature that makes it possible to save compiled regex and re-use them
6539
in other programs later. */
6540
6541
28.4k
if (tables == NULL) tables = PRIV(default_tables);
6542
6543
/* Set up other data */
6544
6545
28.4k
anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6546
28.4k
startline = (re->flags & PCRE_STARTLINE) != 0;
6547
28.4k
firstline = (re->options & PCRE_FIRSTLINE) != 0;
6548
6549
/* The code starts after the real_pcre block and the capture name table. */
6550
6551
28.4k
md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6552
28.4k
  re->name_count * re->name_entry_size;
6553
6554
28.4k
md->start_subject = (PCRE_PUCHAR)subject;
6555
28.4k
md->start_offset = start_offset;
6556
28.4k
md->end_subject = md->start_subject + length;
6557
28.4k
end_subject = md->end_subject;
6558
6559
28.4k
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6560
28.4k
md->use_ucp = (re->options & PCRE_UCP) != 0;
6561
28.4k
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6562
28.4k
md->ignore_skip_arg = 0;
6563
6564
/* Some options are unpacked into BOOL variables in the hope that testing
6565
them will be faster than individual option bits. */
6566
6567
28.4k
md->notbol = (options & PCRE_NOTBOL) != 0;
6568
28.4k
md->noteol = (options & PCRE_NOTEOL) != 0;
6569
28.4k
md->notempty = (options & PCRE_NOTEMPTY) != 0;
6570
28.4k
md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6571
6572
28.4k
md->hitend = FALSE;
6573
28.4k
md->mark = md->nomatch_mark = NULL;     /* In case never set */
6574
6575
28.4k
md->recursive = NULL;                   /* No recursion at top level */
6576
28.4k
md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6577
6578
28.4k
md->lcc = tables + lcc_offset;
6579
28.4k
md->fcc = tables + fcc_offset;
6580
28.4k
md->ctypes = tables + ctypes_offset;
6581
6582
/* Handle different \R options. */
6583
6584
28.4k
switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6585
28.4k
  {
6586
28.4k
  case 0:
6587
28.4k
  if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6588
0
    md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6589
28.4k
  else
6590
#ifdef BSR_ANYCRLF
6591
  md->bsr_anycrlf = TRUE;
6592
#else
6593
28.4k
  md->bsr_anycrlf = FALSE;
6594
28.4k
#endif
6595
28.4k
  break;
6596
6597
0
  case PCRE_BSR_ANYCRLF:
6598
0
  md->bsr_anycrlf = TRUE;
6599
0
  break;
6600
6601
0
  case PCRE_BSR_UNICODE:
6602
0
  md->bsr_anycrlf = FALSE;
6603
0
  break;
6604
6605
0
  default: return PCRE_ERROR_BADNEWLINE;
6606
28.4k
  }
6607
6608
/* Handle different types of newline. The three bits give eight cases. If
6609
nothing is set at run time, whatever was used at compile time applies. */
6610
6611
28.4k
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6612
28.4k
        (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6613
28.4k
  {
6614
28.4k
  case 0: newline = NEWLINE; break;   /* Compile-time default */
6615
0
  case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6616
0
  case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6617
0
  case PCRE_NEWLINE_CR+
6618
0
       PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6619
0
  case PCRE_NEWLINE_ANY: newline = -1; break;
6620
0
  case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6621
0
  default: return PCRE_ERROR_BADNEWLINE;
6622
28.4k
  }
6623
6624
28.4k
if (newline == -2)
6625
0
  {
6626
0
  md->nltype = NLTYPE_ANYCRLF;
6627
0
  }
6628
28.4k
else if (newline < 0)
6629
0
  {
6630
0
  md->nltype = NLTYPE_ANY;
6631
0
  }
6632
28.4k
else
6633
28.4k
  {
6634
28.4k
  md->nltype = NLTYPE_FIXED;
6635
28.4k
  if (newline > 255)
6636
0
    {
6637
0
    md->nllen = 2;
6638
0
    md->nl[0] = (newline >> 8) & 255;
6639
0
    md->nl[1] = newline & 255;
6640
0
    }
6641
28.4k
  else
6642
28.4k
    {
6643
28.4k
    md->nllen = 1;
6644
28.4k
    md->nl[0] = newline;
6645
28.4k
    }
6646
28.4k
  }
6647
6648
/* Partial matching was originally supported only for a restricted set of
6649
regexes; from release 8.00 there are no restrictions, but the bits are still
6650
defined (though never set). So there's no harm in leaving this code. */
6651
6652
28.4k
if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6653
0
  return PCRE_ERROR_BADPARTIAL;
6654
6655
/* If the expression has got more back references than the offsets supplied can
6656
hold, we get a temporary chunk of working store to use during the matching.
6657
Otherwise, we can use the vector supplied, rounding down its size to a multiple
6658
of 3. */
6659
6660
28.4k
ocount = offsetcount - (offsetcount % 3);
6661
28.4k
arg_offset_max = (2*ocount)/3;
6662
6663
28.4k
if (re->top_backref > 0 && re->top_backref >= ocount/3)
6664
0
  {
6665
0
  ocount = re->top_backref * 3 + 3;
6666
0
  md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6667
0
  if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6668
0
  using_temporary_offsets = TRUE;
6669
0
  DPRINTF(("Got memory to hold back references\n"));
6670
0
  }
6671
28.4k
else md->offset_vector = offsets;
6672
28.4k
md->offset_end = ocount;
6673
28.4k
md->offset_max = (2*ocount)/3;
6674
28.4k
md->capture_last = 0;
6675
6676
/* Reset the working variable associated with each extraction. These should
6677
never be used unless previously set, but they get saved and restored, and so we
6678
initialize them to avoid reading uninitialized locations. Also, unset the
6679
offsets for the matched string. This is really just for tidiness with callouts,
6680
in case they inspect these fields. */
6681
6682
28.4k
if (md->offset_vector != NULL)
6683
0
  {
6684
0
  register int *iptr = md->offset_vector + ocount;
6685
0
  register int *iend = iptr - re->top_bracket;
6686
0
  if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6687
0
  while (--iptr >= iend) *iptr = -1;
6688
0
  if (offsetcount > 0) md->offset_vector[0] = -1;
6689
0
  if (offsetcount > 1) md->offset_vector[1] = -1;
6690
0
  }
6691
6692
/* Set up the first character to match, if available. The first_char value is
6693
never set for an anchored regular expression, but the anchoring may be forced
6694
at run time, so we have to test for anchoring. The first char may be unset for
6695
an unanchored pattern, of course. If there's no first char and the pattern was
6696
studied, there may be a bitmap of possible first characters. */
6697
6698
28.4k
if (!anchored)
6699
28.4k
  {
6700
28.4k
  if ((re->flags & PCRE_FIRSTSET) != 0)
6701
28.4k
    {
6702
28.4k
    has_first_char = TRUE;
6703
28.4k
    first_char = first_char2 = (pcre_uchar)(re->first_char);
6704
28.4k
    if ((re->flags & PCRE_FCH_CASELESS) != 0)
6705
0
      {
6706
0
      first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6707
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6708
      if (utf && first_char > 127)
6709
        first_char2 = UCD_OTHERCASE(first_char);
6710
#endif
6711
0
      }
6712
28.4k
    }
6713
0
  else
6714
0
    if (!startline && study != NULL &&
6715
0
      (study->flags & PCRE_STUDY_MAPPED) != 0)
6716
0
        start_bits = study->start_bits;
6717
28.4k
  }
6718
6719
/* For anchored or unanchored matches, there may be a "last known required
6720
character" set. */
6721
6722
28.4k
if ((re->flags & PCRE_REQCHSET) != 0)
6723
28.4k
  {
6724
28.4k
  has_req_char = TRUE;
6725
28.4k
  req_char = req_char2 = (pcre_uchar)(re->req_char);
6726
28.4k
  if ((re->flags & PCRE_RCH_CASELESS) != 0)
6727
0
    {
6728
0
    req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6729
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6730
    if (utf && req_char > 127)
6731
      req_char2 = UCD_OTHERCASE(req_char);
6732
#endif
6733
0
    }
6734
28.4k
  }
6735
6736
6737
/* ==========================================================================*/
6738
6739
/* Loop for handling unanchored repeated matching attempts; for anchored regexs
6740
the loop runs just once. */
6741
6742
28.4k
for(;;)
6743
28.4k
  {
6744
28.4k
  PCRE_PUCHAR save_end_subject = end_subject;
6745
28.4k
  PCRE_PUCHAR new_start_match;
6746
6747
  /* If firstline is TRUE, the start of the match is constrained to the first
6748
  line of a multiline string. That is, the match must be before or at the first
6749
  newline. Implement this by temporarily adjusting end_subject so that we stop
6750
  scanning at a newline. If the match fails at the newline, later code breaks
6751
  this loop. */
6752
6753
28.4k
  if (firstline)
6754
0
    {
6755
0
    PCRE_PUCHAR t = start_match;
6756
#ifdef SUPPORT_UTF
6757
    if (utf)
6758
      {
6759
      while (t < md->end_subject && !IS_NEWLINE(t))
6760
        {
6761
        t++;
6762
        ACROSSCHAR(t < end_subject, *t, t++);
6763
        }
6764
      }
6765
    else
6766
#endif
6767
0
    while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6768
0
    end_subject = t;
6769
0
    }
6770
6771
  /* There are some optimizations that avoid running the match if a known
6772
  starting point is not found, or if a known later character is not present.
6773
  However, there is an option that disables these, for testing and for ensuring
6774
  that all callouts do actually occur. The option can be set in the regex by
6775
  (*NO_START_OPT) or passed in match-time options. */
6776
6777
28.4k
  if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6778
28.4k
    {
6779
    /* Advance to a unique first char if there is one. */
6780
6781
28.4k
    if (has_first_char)
6782
28.4k
      {
6783
28.4k
      pcre_uchar smc;
6784
6785
28.4k
      if (first_char != first_char2)
6786
0
        while (start_match < end_subject &&
6787
0
          (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
6788
0
          start_match++;
6789
28.4k
      else
6790
502k
        while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
6791
473k
          start_match++;
6792
28.4k
      }
6793
6794
    /* Or to just after a linebreak for a multiline match */
6795
6796
0
    else if (startline)
6797
0
      {
6798
0
      if (start_match > md->start_subject + start_offset)
6799
0
        {
6800
#ifdef SUPPORT_UTF
6801
        if (utf)
6802
          {
6803
          while (start_match < end_subject && !WAS_NEWLINE(start_match))
6804
            {
6805
            start_match++;
6806
            ACROSSCHAR(start_match < end_subject, *start_match,
6807
              start_match++);
6808
            }
6809
          }
6810
        else
6811
#endif
6812
0
        while (start_match < end_subject && !WAS_NEWLINE(start_match))
6813
0
          start_match++;
6814
6815
        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6816
        and we are now at a LF, advance the match position by one more character.
6817
        */
6818
6819
0
        if (start_match[-1] == CHAR_CR &&
6820
0
             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6821
0
             start_match < end_subject &&
6822
0
             UCHAR21TEST(start_match) == CHAR_NL)
6823
0
          start_match++;
6824
0
        }
6825
0
      }
6826
6827
    /* Or to a non-unique first byte after study */
6828
6829
0
    else if (start_bits != NULL)
6830
0
      {
6831
0
      while (start_match < end_subject)
6832
0
        {
6833
0
        register pcre_uint32 c = UCHAR21TEST(start_match);
6834
#ifndef COMPILE_PCRE8
6835
        if (c > 255) c = 255;
6836
#endif
6837
0
        if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
6838
0
        start_match++;
6839
0
        }
6840
0
      }
6841
28.4k
    }   /* Starting optimizations */
6842
6843
  /* Restore fudged end_subject */
6844
6845
28.4k
  end_subject = save_end_subject;
6846
6847
  /* The following two optimizations are disabled for partial matching or if
6848
  disabling is explicitly requested. */
6849
6850
28.4k
  if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6851
28.4k
    {
6852
    /* If the pattern was studied, a minimum subject length may be set. This is
6853
    a lower bound; no actual string of that length may actually match the
6854
    pattern. Although the value is, strictly, in characters, we treat it as
6855
    bytes to avoid spending too much time in this optimization. */
6856
6857
28.4k
    if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6858
28.4k
        (pcre_uint32)(end_subject - start_match) < study->minlength)
6859
0
      {
6860
0
      rc = MATCH_NOMATCH;
6861
0
      break;
6862
0
      }
6863
6864
    /* If req_char is set, we know that that character must appear in the
6865
    subject for the match to succeed. If the first character is set, req_char
6866
    must be later in the subject; otherwise the test starts at the match point.
6867
    This optimization can save a huge amount of backtracking in patterns with
6868
    nested unlimited repeats that aren't going to match. Writing separate code
6869
    for cased/caseless versions makes it go faster, as does using an
6870
    autoincrement and backing off on a match.
6871
6872
    HOWEVER: when the subject string is very, very long, searching to its end
6873
    can take a long time, and give bad performance on quite ordinary patterns.
6874
    This showed up when somebody was matching something like /^\d+C/ on a
6875
    32-megabyte string... so we don't do this when the string is sufficiently
6876
    long. */
6877
6878
28.4k
    if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6879
28.4k
      {
6880
28.4k
      register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6881
6882
      /* We don't need to repeat the search if we haven't yet reached the
6883
      place we found it at last time. */
6884
6885
28.4k
      if (p > req_char_ptr)
6886
28.4k
        {
6887
28.4k
        if (req_char != req_char2)
6888
0
          {
6889
0
          while (p < end_subject)
6890
0
            {
6891
0
            register pcre_uint32 pp = UCHAR21INCTEST(p);
6892
0
            if (pp == req_char || pp == req_char2) { p--; break; }
6893
0
            }
6894
0
          }
6895
28.4k
        else
6896
28.4k
          {
6897
28.4k
          while (p < end_subject)
6898
0
            {
6899
0
            if (UCHAR21INCTEST(p) == req_char) { p--; break; }
6900
0
            }
6901
28.4k
          }
6902
6903
        /* If we can't find the required character, break the matching loop,
6904
        forcing a match failure. */
6905
6906
28.4k
        if (p >= end_subject)
6907
28.4k
          {
6908
28.4k
          rc = MATCH_NOMATCH;
6909
28.4k
          break;
6910
28.4k
          }
6911
6912
        /* If we have found the required character, save the point where we
6913
        found it, so that we don't search again next time round the loop if
6914
        the start hasn't passed this character yet. */
6915
6916
0
        req_char_ptr = p;
6917
0
        }
6918
28.4k
      }
6919
28.4k
    }
6920
6921
#ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
6922
  printf(">>>> Match against: ");
6923
  pchars(start_match, end_subject - start_match, TRUE, md);
6924
  printf("\n");
6925
#endif
6926
6927
  /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6928
  first starting point for which a partial match was found. */
6929
6930
0
  md->start_match_ptr = start_match;
6931
0
  md->start_used_ptr = start_match;
6932
0
  md->match_call_count = 0;
6933
0
  md->match_function_type = 0;
6934
0
  md->end_offset_top = 0;
6935
0
  md->skip_arg_count = 0;
6936
0
  rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6937
0
  if (md->hitend && start_partial == NULL)
6938
0
    {
6939
0
    start_partial = md->start_used_ptr;
6940
0
    match_partial = start_match;
6941
0
    }
6942
6943
0
  switch(rc)
6944
0
    {
6945
    /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6946
    the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6947
    entirely. The only way we can do that is to re-do the match at the same
6948
    point, with a flag to force SKIP with an argument to be ignored. Just
6949
    treating this case as NOMATCH does not work because it does not check other
6950
    alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6951
6952
0
    case MATCH_SKIP_ARG:
6953
0
    new_start_match = start_match;
6954
0
    md->ignore_skip_arg = md->skip_arg_count;
6955
0
    break;
6956
6957
    /* SKIP passes back the next starting point explicitly, but if it is no
6958
    greater than the match we have just done, treat it as NOMATCH. */
6959
6960
0
    case MATCH_SKIP:
6961
0
    if (md->start_match_ptr > start_match)
6962
0
      {
6963
0
      new_start_match = md->start_match_ptr;
6964
0
      break;
6965
0
      }
6966
    /* Fall through */
6967
6968
    /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6969
    exactly like PRUNE. Unset ignore SKIP-with-argument. */
6970
6971
0
    case MATCH_NOMATCH:
6972
0
    case MATCH_PRUNE:
6973
0
    case MATCH_THEN:
6974
0
    md->ignore_skip_arg = 0;
6975
0
    new_start_match = start_match + 1;
6976
#ifdef SUPPORT_UTF
6977
    if (utf)
6978
      ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6979
        new_start_match++);
6980
#endif
6981
0
    break;
6982
6983
    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6984
6985
0
    case MATCH_COMMIT:
6986
0
    rc = MATCH_NOMATCH;
6987
0
    goto ENDLOOP;
6988
6989
    /* Any other return is either a match, or some kind of error. */
6990
6991
0
    default:
6992
0
    goto ENDLOOP;
6993
0
    }
6994
6995
  /* Control reaches here for the various types of "no match at this point"
6996
  result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6997
6998
0
  rc = MATCH_NOMATCH;
6999
7000
  /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
7001
  newline in the subject (though it may continue over the newline). Therefore,
7002
  if we have just failed to match, starting at a newline, do not continue. */
7003
7004
0
  if (firstline && IS_NEWLINE(start_match)) break;
7005
7006
  /* Advance to new matching position */
7007
7008
0
  start_match = new_start_match;
7009
7010
  /* Break the loop if the pattern is anchored or if we have passed the end of
7011
  the subject. */
7012
7013
0
  if (anchored || start_match > end_subject) break;
7014
7015
  /* If we have just passed a CR and we are now at a LF, and the pattern does
7016
  not contain any explicit matches for \r or \n, and the newline option is CRLF
7017
  or ANY or ANYCRLF, advance the match position by one more character. In
7018
  normal matching start_match will aways be greater than the first position at
7019
  this stage, but a failed *SKIP can cause a return at the same point, which is
7020
  why the first test exists. */
7021
7022
0
  if (start_match > (PCRE_PUCHAR)subject + start_offset &&
7023
0
      start_match[-1] == CHAR_CR &&
7024
0
      start_match < end_subject &&
7025
0
      *start_match == CHAR_NL &&
7026
0
      (re->flags & PCRE_HASCRORLF) == 0 &&
7027
0
        (md->nltype == NLTYPE_ANY ||
7028
0
         md->nltype == NLTYPE_ANYCRLF ||
7029
0
         md->nllen == 2))
7030
0
    start_match++;
7031
7032
0
  md->mark = NULL;   /* Reset for start of next match attempt */
7033
0
  }                  /* End of for(;;) "bumpalong" loop */
7034
7035
/* ==========================================================================*/
7036
7037
/* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
7038
conditions is true:
7039
7040
(1) The pattern is anchored or the match was failed by (*COMMIT);
7041
7042
(2) We are past the end of the subject;
7043
7044
(3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
7045
    this option requests that a match occur at or before the first newline in
7046
    the subject.
7047
7048
When we have a match and the offset vector is big enough to deal with any
7049
backreferences, captured substring offsets will already be set up. In the case
7050
where we had to get some local store to hold offsets for backreference
7051
processing, copy those that we can. In this case there need not be overflow if
7052
certain parts of the pattern were not used, even though there are more
7053
capturing parentheses than vector slots. */
7054
7055
28.4k
ENDLOOP:
7056
7057
28.4k
if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
7058
0
  {
7059
0
  if (using_temporary_offsets)
7060
0
    {
7061
0
    if (arg_offset_max >= 4)
7062
0
      {
7063
0
      memcpy(offsets + 2, md->offset_vector + 2,
7064
0
        (arg_offset_max - 2) * sizeof(int));
7065
0
      DPRINTF(("Copied offsets from temporary memory\n"));
7066
0
      }
7067
0
    if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
7068
0
    DPRINTF(("Freeing temporary memory\n"));
7069
0
    (PUBL(free))(md->offset_vector);
7070
0
    }
7071
7072
  /* Set the return code to the number of captured strings, or 0 if there were
7073
  too many to fit into the vector. */
7074
7075
0
  rc = ((md->capture_last & OVFLBIT) != 0 &&
7076
0
         md->end_offset_top >= arg_offset_max)?
7077
0
    0 : md->end_offset_top/2;
7078
7079
  /* If there is space in the offset vector, set any unused pairs at the end of
7080
  the pattern to -1 for backwards compatibility. It is documented that this
7081
  happens. In earlier versions, the whole set of potential capturing offsets
7082
  was set to -1 each time round the loop, but this is handled differently now.
7083
  "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
7084
  those at the end that need unsetting here. We can't just unset them all at
7085
  the start of the whole thing because they may get set in one branch that is
7086
  not the final matching branch. */
7087
7088
0
  if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
7089
0
    {
7090
0
    register int *iptr, *iend;
7091
0
    int resetcount = 2 + re->top_bracket * 2;
7092
0
    if (resetcount > offsetcount) resetcount = offsetcount;
7093
0
    iptr = offsets + md->end_offset_top;
7094
0
    iend = offsets + resetcount;
7095
0
    while (iptr < iend) *iptr++ = -1;
7096
0
    }
7097
7098
  /* If there is space, set up the whole thing as substring 0. The value of
7099
  md->start_match_ptr might be modified if \K was encountered on the success
7100
  matching path. */
7101
7102
0
  if (offsetcount < 2) rc = 0; else
7103
0
    {
7104
0
    offsets[0] = (int)(md->start_match_ptr - md->start_subject);
7105
0
    offsets[1] = (int)(md->end_match_ptr - md->start_subject);
7106
0
    }
7107
7108
  /* Return MARK data if requested */
7109
7110
0
  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7111
0
    *(extra_data->mark) = (pcre_uchar *)md->mark;
7112
0
  DPRINTF((">>>> returning %d\n", rc));
7113
0
#ifdef NO_RECURSE
7114
0
  release_match_heapframes(&frame_zero);
7115
0
#endif
7116
0
  return rc;
7117
0
  }
7118
7119
/* Control gets here if there has been an error, or if the overall match
7120
attempt has failed at all permitted starting positions. */
7121
7122
28.4k
if (using_temporary_offsets)
7123
0
  {
7124
0
  DPRINTF(("Freeing temporary memory\n"));
7125
0
  (PUBL(free))(md->offset_vector);
7126
0
  }
7127
7128
/* For anything other than nomatch or partial match, just return the code. */
7129
7130
28.4k
if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7131
0
  {
7132
0
  DPRINTF((">>>> error: returning %d\n", rc));
7133
0
#ifdef NO_RECURSE
7134
0
  release_match_heapframes(&frame_zero);
7135
0
#endif
7136
0
  return rc;
7137
0
  }
7138
7139
/* Handle partial matches - disable any mark data */
7140
7141
28.4k
if (match_partial != NULL)
7142
0
  {
7143
0
  DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7144
0
  md->mark = NULL;
7145
0
  if (offsetcount > 1)
7146
0
    {
7147
0
    offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7148
0
    offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7149
0
    if (offsetcount > 2)
7150
0
      offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7151
0
    }
7152
0
  rc = PCRE_ERROR_PARTIAL;
7153
0
  }
7154
7155
/* This is the classic nomatch case */
7156
7157
28.4k
else
7158
28.4k
  {
7159
28.4k
  DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7160
28.4k
  rc = PCRE_ERROR_NOMATCH;
7161
28.4k
  }
7162
7163
/* Return the MARK data if it has been requested. */
7164
7165
28.4k
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7166
0
  *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7167
28.4k
#ifdef NO_RECURSE
7168
28.4k
  release_match_heapframes(&frame_zero);
7169
28.4k
#endif
7170
28.4k
return rc;
7171
28.4k
}
7172
7173
/* End of pcre_exec.c */