Coverage Report

Created: 2026-01-16 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/glib/glib/pcre/pcre_exec.c
Line
Count
Source
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
           Copyright (c) 1997-2012 University of Cambridge
10
11
-----------------------------------------------------------------------------
12
Redistribution and use in source and binary forms, with or without
13
modification, are permitted provided that the following conditions are met:
14
15
    * Redistributions of source code must retain the above copyright notice,
16
      this list of conditions and the following disclaimer.
17
18
    * Redistributions in binary form must reproduce the above copyright
19
      notice, this list of conditions and the following disclaimer in the
20
      documentation and/or other materials provided with the distribution.
21
22
    * Neither the name of the University of Cambridge nor the names of its
23
      contributors may be used to endorse or promote products derived from
24
      this software without specific prior written permission.
25
26
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36
POSSIBILITY OF SUCH DAMAGE.
37
-----------------------------------------------------------------------------
38
*/
39
40
/* This module contains pcre_exec(), the externally visible function that does
41
pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42
possible. There are also some static supporting functions. */
43
44
#include "config.h"
45
46
0
#define NLBLOCK md             /* Block containing newline information */
47
0
#define PSSTART start_subject  /* Field containing processed string start */
48
0
#define PSEND   end_subject    /* Field containing processed string end */
49
50
#include "pcre_internal.h"
51
52
/* Undefine some potentially clashing cpp symbols */
53
54
#undef min
55
#undef max
56
57
/* Values for setting in md->match_function_type to indicate two special types
58
of call to match(). We do it this way to save on using another stack variable,
59
as stack usage is to be discouraged. */
60
61
0
#define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
62
0
#define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
63
64
/* Non-error returns from the match() function. Error returns are externally
65
defined PCRE_ERROR_xxx codes, which are all negative. */
66
67
0
#define MATCH_MATCH        1
68
0
#define MATCH_NOMATCH      0
69
70
/* Special internal returns from the match() function. Make them sufficiently
71
negative to avoid the external error codes. */
72
73
0
#define MATCH_ACCEPT       (-999)
74
0
#define MATCH_COMMIT       (-998)
75
0
#define MATCH_KETRPOS      (-997)
76
0
#define MATCH_ONCE         (-996)
77
0
#define MATCH_PRUNE        (-995)
78
0
#define MATCH_SKIP         (-994)
79
0
#define MATCH_SKIP_ARG     (-993)
80
0
#define MATCH_THEN         (-992)
81
82
/* Maximum number of ints of offset to save on the stack for recursive calls.
83
If the offset vector is bigger, malloc is used. This should be a multiple of 3,
84
because the offset vector is always a multiple of 3 long. */
85
86
0
#define REC_STACK_SAVE_MAX 30
87
88
/* Min and max values for the common repeats; for the maxima, 0 => infinity */
89
90
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
91
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
92
93
94
95
#ifdef PCRE_DEBUG
96
/*************************************************
97
*        Debugging function to print chars       *
98
*************************************************/
99
100
/* Print a sequence of chars in printable format, stopping at the end of the
101
subject if the requested.
102
103
Arguments:
104
  p           points to characters
105
  length      number to print
106
  is_subject  TRUE if printing from within md->start_subject
107
  md          pointer to matching data block, if is_subject is TRUE
108
109
Returns:     nothing
110
*/
111
112
static void
113
pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
114
{
115
unsigned int c;
116
if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
117
while (length-- > 0)
118
  if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
119
}
120
#endif
121
122
123
124
/*************************************************
125
*          Match a back-reference                *
126
*************************************************/
127
128
/* Normally, if a back reference hasn't been set, the length that is passed is
129
negative, so the match always fails. However, in JavaScript compatibility mode,
130
the length passed is zero. Note that in caseless UTF-8 mode, the number of
131
subject bytes matched may be different to the number of reference bytes.
132
133
Arguments:
134
  offset      index into the offset vector
135
  eptr        pointer into the subject
136
  length      length of reference to be matched (number of bytes)
137
  md          points to match data block
138
  caseless    TRUE if caseless
139
140
Returns:      >= 0 the number of subject bytes matched
141
              -1 no match
142
              -2 partial match; always given if at end subject
143
*/
144
145
static int
146
match_ref(int offset, PCRE_PUCHAR eptr, int length, match_data *md,
147
  BOOL caseless)
148
0
{
149
0
PCRE_PUCHAR eptr_start = eptr;
150
0
PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
151
152
#ifdef PCRE_DEBUG
153
if (eptr >= md->end_subject)
154
  printf("matching subject <null>");
155
else
156
  {
157
  printf("matching subject ");
158
  pchars(eptr, length, TRUE, md);
159
  }
160
printf(" against backref ");
161
pchars(p, length, FALSE, md);
162
printf("\n");
163
#endif
164
165
/* Always fail if reference not set (and not JavaScript compatible - in that
166
case the length is passed as zero). */
167
168
0
if (length < 0) return -1;
169
170
/* Separate the caseless case for speed. In UTF-8 mode we can only do this
171
properly if Unicode properties are supported. Otherwise, we can check only
172
ASCII characters. */
173
174
0
if (caseless)
175
0
  {
176
0
#ifdef SUPPORT_UTF
177
0
#ifdef SUPPORT_UCP
178
0
  if (md->utf)
179
0
    {
180
    /* Match characters up to the end of the reference. NOTE: the number of
181
    bytes matched may differ, because there are some characters whose upper and
182
    lower case versions code as different numbers of bytes. For example, U+023A
183
    (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
184
    a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
185
    the latter. It is important, therefore, to check the length along the
186
    reference, not along the subject (earlier code did this wrong). */
187
188
0
    PCRE_PUCHAR endptr = p + length;
189
0
    while (p < endptr)
190
0
      {
191
0
      int c, d;
192
0
      if (eptr >= md->end_subject) return -2;   /* Partial match */
193
0
      GETCHARINC(c, eptr);
194
0
      GETCHARINC(d, p);
195
0
      if (c != d && c != UCD_OTHERCASE(d)) return -1;
196
0
      }
197
0
    }
198
0
  else
199
0
#endif
200
0
#endif
201
202
  /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
203
  is no UCP support. */
204
0
    {
205
0
    while (length-- > 0)
206
0
      {
207
0
      if (eptr >= md->end_subject) return -2;   /* Partial match */
208
0
      if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
209
0
      p++;
210
0
      eptr++;
211
0
      }
212
0
    }
213
0
  }
214
215
/* In the caseful case, we can just compare the bytes, whether or not we
216
are in UTF-8 mode. */
217
218
0
else
219
0
  {
220
0
  while (length-- > 0)
221
0
    {
222
0
    if (eptr >= md->end_subject) return -2;   /* Partial match */
223
0
    if (*p++ != *eptr++) return -1;
224
0
    }
225
0
  }
226
227
0
return (int)(eptr - eptr_start);
228
0
}
229
230
231
232
/***************************************************************************
233
****************************************************************************
234
                   RECURSION IN THE match() FUNCTION
235
236
The match() function is highly recursive, though not every recursive call
237
increases the recursive depth. Nevertheless, some regular expressions can cause
238
it to recurse to a great depth. I was writing for Unix, so I just let it call
239
itself recursively. This uses the stack for saving everything that has to be
240
saved for a recursive call. On Unix, the stack can be large, and this works
241
fine.
242
243
It turns out that on some non-Unix-like systems there are problems with
244
programs that use a lot of stack. (This despite the fact that every last chip
245
has oodles of memory these days, and techniques for extending the stack have
246
been known for decades.) So....
247
248
There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
249
calls by keeping local variables that need to be preserved in blocks of memory
250
obtained from malloc() instead instead of on the stack. Macros are used to
251
achieve this so that the actual code doesn't look very different to what it
252
always used to.
253
254
The original heap-recursive code used longjmp(). However, it seems that this
255
can be very slow on some operating systems. Following a suggestion from Stan
256
Switzer, the use of longjmp() has been abolished, at the cost of having to
257
provide a unique number for each call to RMATCH. There is no way of generating
258
a sequence of numbers at compile time in C. I have given them names, to make
259
them stand out more clearly.
260
261
Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
262
FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
263
tests. Furthermore, not using longjmp() means that local dynamic variables
264
don't have indeterminate values; this has meant that the frame size can be
265
reduced because the result can be "passed back" by straight setting of the
266
variable instead of being passed in the frame.
267
****************************************************************************
268
***************************************************************************/
269
270
/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
271
below must be updated in sync.  */
272
273
enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
274
       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
275
       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
276
       RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
277
       RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
278
       RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
279
       RM61,  RM62, RM63, RM64, RM65, RM66 };
280
281
/* These versions of the macros use the stack, as normal. There are debugging
282
versions and production versions. Note that the "rw" argument of RMATCH isn't
283
actually used in this definition. */
284
285
#ifndef NO_RECURSE
286
287
#ifdef PCRE_DEBUG
288
#define RMATCH(ra,rb,rc,rd,re,rw) \
289
  { \
290
  printf("match() called in line %d\n", __LINE__); \
291
  rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
292
  printf("to line %d\n", __LINE__); \
293
  }
294
#define RRETURN(ra) \
295
  { \
296
  printf("match() returned %d from line %d ", ra, __LINE__); \
297
  return ra; \
298
  }
299
#else
300
#define RMATCH(ra,rb,rc,rd,re,rw) \
301
0
  rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
302
0
#define RRETURN(ra) return ra
303
#endif
304
305
#else
306
307
308
/* These versions of the macros manage a private stack on the heap. Note that
309
the "rd" argument of RMATCH isn't actually used in this definition. It's the md
310
argument of match(), which never changes. */
311
312
#define RMATCH(ra,rb,rc,rd,re,rw)\
313
  {\
314
  heapframe *newframe = frame->Xnextframe;\
315
  if (newframe == NULL)\
316
    {\
317
    newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
318
    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
319
    newframe->Xnextframe = NULL;\
320
    frame->Xnextframe = newframe;\
321
    }\
322
  frame->Xwhere = rw;\
323
  newframe->Xeptr = ra;\
324
  newframe->Xecode = rb;\
325
  newframe->Xmstart = mstart;\
326
  newframe->Xoffset_top = rc;\
327
  newframe->Xeptrb = re;\
328
  newframe->Xrdepth = frame->Xrdepth + 1;\
329
  newframe->Xprevframe = frame;\
330
  frame = newframe;\
331
  DPRINTF(("restarting from line %d\n", __LINE__));\
332
  goto HEAP_RECURSE;\
333
  L_##rw:\
334
  DPRINTF(("jumped back to line %d\n", __LINE__));\
335
  }
336
337
#define RRETURN(ra)\
338
  {\
339
  heapframe *oldframe = frame;\
340
  frame = oldframe->Xprevframe;\
341
  if (frame != NULL)\
342
    {\
343
    rrc = ra;\
344
    goto HEAP_RETURN;\
345
    }\
346
  return ra;\
347
  }
348
349
350
/* Structure for remembering the local variables in a private frame */
351
352
typedef struct heapframe {
353
  struct heapframe *Xprevframe;
354
  struct heapframe *Xnextframe;
355
356
  /* Function arguments that may change */
357
358
  PCRE_PUCHAR Xeptr;
359
  const pcre_uchar *Xecode;
360
  PCRE_PUCHAR Xmstart;
361
  int Xoffset_top;
362
  eptrblock *Xeptrb;
363
  unsigned int Xrdepth;
364
365
  /* Function local variables */
366
367
  PCRE_PUCHAR Xcallpat;
368
#ifdef SUPPORT_UTF
369
  PCRE_PUCHAR Xcharptr;
370
#endif
371
  PCRE_PUCHAR Xdata;
372
  PCRE_PUCHAR Xnext;
373
  PCRE_PUCHAR Xpp;
374
  PCRE_PUCHAR Xprev;
375
  PCRE_PUCHAR Xsaved_eptr;
376
377
  recursion_info Xnew_recursive;
378
379
  BOOL Xcur_is_word;
380
  BOOL Xcondition;
381
  BOOL Xprev_is_word;
382
383
#ifdef SUPPORT_UCP
384
  int Xprop_type;
385
  int Xprop_value;
386
  int Xprop_fail_result;
387
  int Xoclength;
388
  pcre_uchar Xocchars[6];
389
#endif
390
391
  int Xcodelink;
392
  int Xctype;
393
  unsigned int Xfc;
394
  int Xfi;
395
  int Xlength;
396
  int Xmax;
397
  int Xmin;
398
  int Xnumber;
399
  int Xoffset;
400
  int Xop;
401
  int Xsave_capture_last;
402
  int Xsave_offset1, Xsave_offset2, Xsave_offset3;
403
  int Xstacksave[REC_STACK_SAVE_MAX];
404
405
  eptrblock Xnewptrb;
406
407
  /* Where to jump back to */
408
409
  int Xwhere;
410
411
} heapframe;
412
413
#endif
414
415
416
/***************************************************************************
417
***************************************************************************/
418
419
420
421
/*************************************************
422
*         Match from current position            *
423
*************************************************/
424
425
/* This function is called recursively in many circumstances. Whenever it
426
returns a negative (error) response, the outer incarnation must also return the
427
same response. */
428
429
/* These macros pack up tests that are used for partial matching, and which
430
appear several times in the code. We set the "hit end" flag if the pointer is
431
at the end of the subject and also past the start of the subject (i.e.
432
something has been matched). For hard partial matching, we then return
433
immediately. The second one is used when we already know we are past the end of
434
the subject. */
435
436
#define CHECK_PARTIAL()\
437
0
  if (md->partial != 0 && eptr >= md->end_subject && \
438
0
      eptr > md->start_used_ptr) \
439
0
    { \
440
0
    md->hitend = TRUE; \
441
0
    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
442
0
    }
443
444
#define SCHECK_PARTIAL()\
445
0
  if (md->partial != 0 && eptr > md->start_used_ptr) \
446
0
    { \
447
0
    md->hitend = TRUE; \
448
0
    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
449
0
    }
450
451
452
/* Performance note: It might be tempting to extract commonly used fields from
453
the md structure (e.g. utf, end_subject) into individual variables to improve
454
performance. Tests using gcc on a SPARC disproved this; in the first case, it
455
made performance worse.
456
457
Arguments:
458
   eptr        pointer to current character in subject
459
   ecode       pointer to current position in compiled code
460
   mstart      pointer to the current match start position (can be modified
461
                 by encountering \K)
462
   offset_top  current top pointer
463
   md          pointer to "static" info for the match
464
   eptrb       pointer to chain of blocks containing eptr at start of
465
                 brackets - for testing for empty matches
466
   rdepth      the recursion depth
467
468
Returns:       MATCH_MATCH if matched            )  these values are >= 0
469
               MATCH_NOMATCH if failed to match  )
470
               a negative MATCH_xxx value for PRUNE, SKIP, etc
471
               a negative PCRE_ERROR_xxx value if aborted by an error condition
472
                 (e.g. stopped by repeated call or recursion limit)
473
*/
474
475
static int
476
match(PCRE_PUCHAR eptr, const pcre_uchar *ecode,
477
  PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
478
  unsigned int rdepth)
479
0
{
480
/* These variables do not need to be preserved over recursion in this function,
481
so they can be ordinary variables in all cases. Mark some of them with
482
"register" because they are used a lot in loops. */
483
484
0
int  rrc;         /* Returns from recursive calls */
485
0
int  i;           /* Used for loops not involving calls to RMATCH() */
486
0
unsigned int c;   /* Character values not kept over RMATCH() calls */
487
0
BOOL utf;         /* Local copy of UTF flag for speed */
488
489
0
BOOL minimize, possessive; /* Quantifier options */
490
0
BOOL caseless;
491
0
int condcode;
492
493
/* When recursion is not being used, all "local" variables that have to be
494
preserved over calls to RMATCH() are part of a "frame". We set up the top-level
495
frame on the stack here; subsequent instantiations are obtained from the heap
496
whenever RMATCH() does a "recursion". See the macro definitions above. Putting
497
the top-level on the stack rather than malloc-ing them all gives a performance
498
boost in many cases where there is not much "recursion". */
499
500
#ifdef NO_RECURSE
501
heapframe *frame = (heapframe *)md->match_frames_base;
502
503
/* Copy in the original argument variables */
504
505
frame->Xeptr = eptr;
506
frame->Xecode = ecode;
507
frame->Xmstart = mstart;
508
frame->Xoffset_top = offset_top;
509
frame->Xeptrb = eptrb;
510
frame->Xrdepth = rdepth;
511
512
/* This is where control jumps back to to effect "recursion" */
513
514
HEAP_RECURSE:
515
516
/* Macros make the argument variables come from the current frame */
517
518
#define eptr               frame->Xeptr
519
#define ecode              frame->Xecode
520
#define mstart             frame->Xmstart
521
#define offset_top         frame->Xoffset_top
522
#define eptrb              frame->Xeptrb
523
#define rdepth             frame->Xrdepth
524
525
/* Ditto for the local variables */
526
527
#ifdef SUPPORT_UTF
528
#define charptr            frame->Xcharptr
529
#endif
530
#define callpat            frame->Xcallpat
531
#define codelink           frame->Xcodelink
532
#define data               frame->Xdata
533
#define next               frame->Xnext
534
#define pp                 frame->Xpp
535
#define prev               frame->Xprev
536
#define saved_eptr         frame->Xsaved_eptr
537
538
#define new_recursive      frame->Xnew_recursive
539
540
#define cur_is_word        frame->Xcur_is_word
541
#define condition          frame->Xcondition
542
#define prev_is_word       frame->Xprev_is_word
543
544
#ifdef SUPPORT_UCP
545
#define prop_type          frame->Xprop_type
546
#define prop_value         frame->Xprop_value
547
#define prop_fail_result   frame->Xprop_fail_result
548
#define oclength           frame->Xoclength
549
#define occhars            frame->Xocchars
550
#endif
551
552
#define ctype              frame->Xctype
553
#define fc                 frame->Xfc
554
#define fi                 frame->Xfi
555
#define length             frame->Xlength
556
#define max                frame->Xmax
557
#define min                frame->Xmin
558
#define number             frame->Xnumber
559
#define offset             frame->Xoffset
560
#define op                 frame->Xop
561
#define save_capture_last  frame->Xsave_capture_last
562
#define save_offset1       frame->Xsave_offset1
563
#define save_offset2       frame->Xsave_offset2
564
#define save_offset3       frame->Xsave_offset3
565
#define stacksave          frame->Xstacksave
566
567
#define newptrb            frame->Xnewptrb
568
569
/* When recursion is being used, local variables are allocated on the stack and
570
get preserved during recursion in the normal way. In this environment, fi and
571
i, and fc and c, can be the same variables. */
572
573
#else         /* NO_RECURSE not defined */
574
0
#define fi i
575
0
#define fc c
576
577
/* Many of the following variables are used only in small blocks of the code.
578
My normal style of coding would have declared them within each of those blocks.
579
However, in order to accommodate the version of this code that uses an external
580
"stack" implemented on the heap, it is easier to declare them all here, so the
581
declarations can be cut out in a block. The only declarations within blocks
582
below are for variables that do not have to be preserved over a recursive call
583
to RMATCH(). */
584
585
0
#ifdef SUPPORT_UTF
586
0
const pcre_uchar *charptr;
587
0
#endif
588
0
const pcre_uchar *callpat;
589
0
const pcre_uchar *data;
590
0
const pcre_uchar *next;
591
0
PCRE_PUCHAR       pp;
592
0
const pcre_uchar *prev;
593
0
PCRE_PUCHAR       saved_eptr;
594
595
0
recursion_info new_recursive;
596
597
0
BOOL cur_is_word;
598
0
BOOL condition;
599
0
BOOL prev_is_word;
600
601
0
#ifdef SUPPORT_UCP
602
0
int prop_type;
603
0
int prop_value;
604
0
int prop_fail_result;
605
0
int oclength;
606
0
pcre_uchar occhars[6];
607
0
#endif
608
609
0
int codelink;
610
0
int ctype;
611
0
int length;
612
0
int max;
613
0
int min;
614
0
int number;
615
0
int offset;
616
0
int op;
617
0
int save_capture_last;
618
0
int save_offset1, save_offset2, save_offset3;
619
0
int stacksave[REC_STACK_SAVE_MAX];
620
621
0
eptrblock newptrb;
622
623
/* There is a special fudge for calling match() in a way that causes it to
624
measure the size of its basic stack frame when the stack is being used for
625
recursion. The second argument (ecode) being NULL triggers this behaviour. It
626
cannot normally ever be NULL. The return is the negated value of the frame
627
size. */
628
629
0
if (ecode == NULL)
630
0
  {
631
0
  if (rdepth == 0)
632
0
    return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
633
0
  else
634
0
    {
635
0
    int len = (char *)&rdepth - (char *)eptr;
636
0
    return (len > 0)? -len : len;
637
0
    }
638
0
  }
639
0
#endif     /* NO_RECURSE */
640
641
/* To save space on the stack and in the heap frame, I have doubled up on some
642
of the local variables that are used only in localised parts of the code, but
643
still need to be preserved over recursive calls of match(). These macros define
644
the alternative names that are used. */
645
646
0
#define allow_zero    cur_is_word
647
0
#define cbegroup      condition
648
0
#define code_offset   codelink
649
0
#define condassert    condition
650
0
#define matched_once  prev_is_word
651
0
#define foc           number
652
0
#define save_mark     data
653
654
/* These statements are here to stop the compiler complaining about unitialized
655
variables. */
656
657
0
#ifdef SUPPORT_UCP
658
0
prop_value = 0;
659
0
prop_fail_result = 0;
660
0
#endif
661
662
663
/* This label is used for tail recursion, which is used in a few cases even
664
when NO_RECURSE is not defined, in order to reduce the amount of stack that is
665
used. Thanks to Ian Taylor for noticing this possibility and sending the
666
original patch. */
667
668
0
TAIL_RECURSE:
669
670
/* OK, now we can get on with the real code of the function. Recursive calls
671
are specified by the macro RMATCH and RRETURN is used to return. When
672
NO_RECURSE is *not* defined, these just turn into a recursive call to match()
673
and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
674
defined). However, RMATCH isn't like a function call because it's quite a
675
complicated macro. It has to be used in one particular way. This shouldn't,
676
however, impact performance when true recursion is being used. */
677
678
0
#ifdef SUPPORT_UTF
679
0
utf = md->utf;       /* Local copy of the flag */
680
#else
681
utf = FALSE;
682
#endif
683
684
/* First check that we haven't called match() too many times, or that we
685
haven't exceeded the recursive call limit. */
686
687
0
if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
688
0
if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
689
690
/* At the start of a group with an unlimited repeat that may match an empty
691
string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
692
done this way to save having to use another function argument, which would take
693
up space on the stack. See also MATCH_CONDASSERT below.
694
695
When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
696
such remembered pointers, to be checked when we hit the closing ket, in order
697
to break infinite loops that match no characters. When match() is called in
698
other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
699
NOT be used with tail recursion, because the memory block that is used is on
700
the stack, so a new one may be required for each match(). */
701
702
0
if (md->match_function_type == MATCH_CBEGROUP)
703
0
  {
704
0
  newptrb.epb_saved_eptr = eptr;
705
0
  newptrb.epb_prev = eptrb;
706
0
  eptrb = &newptrb;
707
0
  md->match_function_type = 0;
708
0
  }
709
710
/* Now start processing the opcodes. */
711
712
0
for (;;)
713
0
  {
714
0
  minimize = possessive = FALSE;
715
0
  op = *ecode;
716
717
0
  switch(op)
718
0
    {
719
0
    case OP_MARK:
720
0
    md->nomatch_mark = ecode + 2;
721
0
    md->mark = NULL;    /* In case previously set by assertion */
722
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
723
0
      eptrb, RM55);
724
0
    if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
725
0
         md->mark == NULL) md->mark = ecode + 2;
726
727
    /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
728
    argument, and we must check whether that argument matches this MARK's
729
    argument. It is passed back in md->start_match_ptr (an overloading of that
730
    variable). If it does match, we reset that variable to the current subject
731
    position and return MATCH_SKIP. Otherwise, pass back the return code
732
    unaltered. */
733
734
0
    else if (rrc == MATCH_SKIP_ARG &&
735
0
        STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
736
0
      {
737
0
      md->start_match_ptr = eptr;
738
0
      RRETURN(MATCH_SKIP);
739
0
      }
740
0
    RRETURN(rrc);
741
742
0
    case OP_FAIL:
743
0
    RRETURN(MATCH_NOMATCH);
744
745
    /* COMMIT overrides PRUNE, SKIP, and THEN */
746
747
0
    case OP_COMMIT:
748
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
749
0
      eptrb, RM52);
750
0
    if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
751
0
        rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
752
0
        rrc != MATCH_THEN)
753
0
      RRETURN(rrc);
754
0
    RRETURN(MATCH_COMMIT);
755
756
    /* PRUNE overrides THEN */
757
758
0
    case OP_PRUNE:
759
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
760
0
      eptrb, RM51);
761
0
    if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
762
0
    RRETURN(MATCH_PRUNE);
763
764
0
    case OP_PRUNE_ARG:
765
0
    md->nomatch_mark = ecode + 2;
766
0
    md->mark = NULL;    /* In case previously set by assertion */
767
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
768
0
      eptrb, RM56);
769
0
    if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
770
0
         md->mark == NULL) md->mark = ecode + 2;
771
0
    if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
772
0
    RRETURN(MATCH_PRUNE);
773
774
    /* SKIP overrides PRUNE and THEN */
775
776
0
    case OP_SKIP:
777
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
778
0
      eptrb, RM53);
779
0
    if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
780
0
      RRETURN(rrc);
781
0
    md->start_match_ptr = eptr;   /* Pass back current position */
782
0
    RRETURN(MATCH_SKIP);
783
784
    /* Note that, for Perl compatibility, SKIP with an argument does NOT set
785
    nomatch_mark. There is a flag that disables this opcode when re-matching a
786
    pattern that ended with a SKIP for which there was not a matching MARK. */
787
788
0
    case OP_SKIP_ARG:
789
0
    if (md->ignore_skip_arg)
790
0
      {
791
0
      ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
792
0
      break;
793
0
      }
794
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
795
0
      eptrb, RM57);
796
0
    if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
797
0
      RRETURN(rrc);
798
799
    /* Pass back the current skip name by overloading md->start_match_ptr and
800
    returning the special MATCH_SKIP_ARG return code. This will either be
801
    caught by a matching MARK, or get to the top, where it causes a rematch
802
    with the md->ignore_skip_arg flag set. */
803
804
0
    md->start_match_ptr = ecode + 2;
805
0
    RRETURN(MATCH_SKIP_ARG);
806
807
    /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
808
    the branch in which it occurs can be determined. Overload the start of
809
    match pointer to do this. */
810
811
0
    case OP_THEN:
812
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
813
0
      eptrb, RM54);
814
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
815
0
    md->start_match_ptr = ecode;
816
0
    RRETURN(MATCH_THEN);
817
818
0
    case OP_THEN_ARG:
819
0
    md->nomatch_mark = ecode + 2;
820
0
    md->mark = NULL;    /* In case previously set by assertion */
821
0
    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
822
0
      md, eptrb, RM58);
823
0
    if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
824
0
         md->mark == NULL) md->mark = ecode + 2;
825
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
826
0
    md->start_match_ptr = ecode;
827
0
    RRETURN(MATCH_THEN);
828
829
    /* Handle an atomic group that does not contain any capturing parentheses.
830
    This can be handled like an assertion. Prior to 8.13, all atomic groups
831
    were handled this way. In 8.13, the code was changed as below for ONCE, so
832
    that backups pass through the group and thereby reset captured values.
833
    However, this uses a lot more stack, so in 8.20, atomic groups that do not
834
    contain any captures generate OP_ONCE_NC, which can be handled in the old,
835
    less stack intensive way.
836
837
    Check the alternative branches in turn - the matching won't pass the KET
838
    for this kind of subpattern. If any one branch matches, we carry on as at
839
    the end of a normal bracket, leaving the subject pointer, but resetting
840
    the start-of-match value in case it was changed by \K. */
841
842
0
    case OP_ONCE_NC:
843
0
    prev = ecode;
844
0
    saved_eptr = eptr;
845
0
    save_mark = md->mark;
846
0
    do
847
0
      {
848
0
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
849
0
      if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
850
0
        {
851
0
        mstart = md->start_match_ptr;
852
0
        break;
853
0
        }
854
0
      if (rrc == MATCH_THEN)
855
0
        {
856
0
        next = ecode + GET(ecode,1);
857
0
        if (md->start_match_ptr < next &&
858
0
            (*ecode == OP_ALT || *next == OP_ALT))
859
0
          rrc = MATCH_NOMATCH;
860
0
        }
861
862
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
863
0
      ecode += GET(ecode,1);
864
0
      md->mark = save_mark;
865
0
      }
866
0
    while (*ecode == OP_ALT);
867
868
    /* If hit the end of the group (which could be repeated), fail */
869
870
0
    if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
871
872
    /* Continue as from after the group, updating the offsets high water
873
    mark, since extracts may have been taken. */
874
875
0
    do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
876
877
0
    offset_top = md->end_offset_top;
878
0
    eptr = md->end_match_ptr;
879
880
    /* For a non-repeating ket, just continue at this level. This also
881
    happens for a repeating ket if no characters were matched in the group.
882
    This is the forcible breaking of infinite loops as implemented in Perl
883
    5.005. */
884
885
0
    if (*ecode == OP_KET || eptr == saved_eptr)
886
0
      {
887
0
      ecode += 1+LINK_SIZE;
888
0
      break;
889
0
      }
890
891
    /* The repeating kets try the rest of the pattern or restart from the
892
    preceding bracket, in the appropriate order. The second "call" of match()
893
    uses tail recursion, to avoid using another stack frame. */
894
895
0
    if (*ecode == OP_KETRMIN)
896
0
      {
897
0
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
898
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
899
0
      ecode = prev;
900
0
      goto TAIL_RECURSE;
901
0
      }
902
0
    else  /* OP_KETRMAX */
903
0
      {
904
0
      RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
905
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
906
0
      ecode += 1 + LINK_SIZE;
907
0
      goto TAIL_RECURSE;
908
0
      }
909
    /* Control never gets here */
910
911
    /* Handle a capturing bracket, other than those that are possessive with an
912
    unlimited repeat. If there is space in the offset vector, save the current
913
    subject position in the working slot at the top of the vector. We mustn't
914
    change the current values of the data slot, because they may be set from a
915
    previous iteration of this group, and be referred to by a reference inside
916
    the group. A failure to match might occur after the group has succeeded,
917
    if something later on doesn't match. For this reason, we need to restore
918
    the working value and also the values of the final offsets, in case they
919
    were set by a previous iteration of the same bracket.
920
921
    If there isn't enough space in the offset vector, treat this as if it were
922
    a non-capturing bracket. Don't worry about setting the flag for the error
923
    case here; that is handled in the code for KET. */
924
925
0
    case OP_CBRA:
926
0
    case OP_SCBRA:
927
0
    number = GET2(ecode, 1+LINK_SIZE);
928
0
    offset = number << 1;
929
930
#ifdef PCRE_DEBUG
931
    printf("start bracket %d\n", number);
932
    printf("subject=");
933
    pchars(eptr, 16, TRUE, md);
934
    printf("\n");
935
#endif
936
937
0
    if (offset < md->offset_max)
938
0
      {
939
0
      save_offset1 = md->offset_vector[offset];
940
0
      save_offset2 = md->offset_vector[offset+1];
941
0
      save_offset3 = md->offset_vector[md->offset_end - number];
942
0
      save_capture_last = md->capture_last;
943
0
      save_mark = md->mark;
944
945
0
      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
946
0
      md->offset_vector[md->offset_end - number] =
947
0
        (int)(eptr - md->start_subject);
948
949
0
      for (;;)
950
0
        {
951
0
        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
952
0
        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
953
0
          eptrb, RM1);
954
0
        if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
955
956
        /* If we backed up to a THEN, check whether it is within the current
957
        branch by comparing the address of the THEN that is passed back with
958
        the end of the branch. If it is within the current branch, and the
959
        branch is one of two or more alternatives (it either starts or ends
960
        with OP_ALT), we have reached the limit of THEN's action, so convert
961
        the return code to NOMATCH, which will cause normal backtracking to
962
        happen from now on. Otherwise, THEN is passed back to an outer
963
        alternative. This implements Perl's treatment of parenthesized groups,
964
        where a group not containing | does not affect the current alternative,
965
        that is, (X) is NOT the same as (X|(*F)). */
966
967
0
        if (rrc == MATCH_THEN)
968
0
          {
969
0
          next = ecode + GET(ecode,1);
970
0
          if (md->start_match_ptr < next &&
971
0
              (*ecode == OP_ALT || *next == OP_ALT))
972
0
            rrc = MATCH_NOMATCH;
973
0
          }
974
975
        /* Anything other than NOMATCH is passed back. */
976
977
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
978
0
        md->capture_last = save_capture_last;
979
0
        ecode += GET(ecode, 1);
980
0
        md->mark = save_mark;
981
0
        if (*ecode != OP_ALT) break;
982
0
        }
983
984
0
      DPRINTF(("bracket %d failed\n", number));
985
0
      md->offset_vector[offset] = save_offset1;
986
0
      md->offset_vector[offset+1] = save_offset2;
987
0
      md->offset_vector[md->offset_end - number] = save_offset3;
988
989
      /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
990
991
0
      RRETURN(rrc);
992
0
      }
993
994
    /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
995
    as a non-capturing bracket. */
996
997
    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
998
    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
999
1000
0
    DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1001
1002
    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1003
    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1004
1005
    /* Non-capturing or atomic group, except for possessive with unlimited
1006
    repeat and ONCE group with no captures. Loop for all the alternatives.
1007
1008
    When we get to the final alternative within the brackets, we used to return
1009
    the result of a recursive call to match() whatever happened so it was
1010
    possible to reduce stack usage by turning this into a tail recursion,
1011
    except in the case of a possibly empty group. However, now that there is
1012
    the possiblity of (*THEN) occurring in the final alternative, this
1013
    optimization is no longer always possible.
1014
1015
    We can optimize if we know there are no (*THEN)s in the pattern; at present
1016
    this is the best that can be done.
1017
1018
    MATCH_ONCE is returned when the end of an atomic group is successfully
1019
    reached, but subsequent matching fails. It passes back up the tree (causing
1020
    captured values to be reset) until the original atomic group level is
1021
    reached. This is tested by comparing md->once_target with the start of the
1022
    group. At this point, the return is converted into MATCH_NOMATCH so that
1023
    previous backup points can be taken. */
1024
1025
0
    case OP_ONCE:
1026
0
    case OP_BRA:
1027
0
    case OP_SBRA:
1028
0
    DPRINTF(("start non-capturing bracket\n"));
1029
1030
0
    for (;;)
1031
0
      {
1032
0
      if (op >= OP_SBRA || op == OP_ONCE)
1033
0
        md->match_function_type = MATCH_CBEGROUP;
1034
1035
      /* If this is not a possibly empty group, and there are no (*THEN)s in
1036
      the pattern, and this is the final alternative, optimize as described
1037
      above. */
1038
1039
0
      else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1040
0
        {
1041
0
        ecode += PRIV(OP_lengths)[*ecode];
1042
0
        goto TAIL_RECURSE;
1043
0
        }
1044
1045
      /* In all other cases, we have to make another call to match(). */
1046
1047
0
      save_mark = md->mark;
1048
0
      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1049
0
        RM2);
1050
1051
      /* See comment in the code for capturing groups above about handling
1052
      THEN. */
1053
1054
0
      if (rrc == MATCH_THEN)
1055
0
        {
1056
0
        next = ecode + GET(ecode,1);
1057
0
        if (md->start_match_ptr < next &&
1058
0
            (*ecode == OP_ALT || *next == OP_ALT))
1059
0
          rrc = MATCH_NOMATCH;
1060
0
        }
1061
1062
0
      if (rrc != MATCH_NOMATCH)
1063
0
        {
1064
0
        if (rrc == MATCH_ONCE)
1065
0
          {
1066
0
          const pcre_uchar *scode = ecode;
1067
0
          if (*scode != OP_ONCE)           /* If not at start, find it */
1068
0
            {
1069
0
            while (*scode == OP_ALT) scode += GET(scode, 1);
1070
0
            scode -= GET(scode, 1);
1071
0
            }
1072
0
          if (md->once_target == scode) rrc = MATCH_NOMATCH;
1073
0
          }
1074
0
        RRETURN(rrc);
1075
0
        }
1076
0
      ecode += GET(ecode, 1);
1077
0
      md->mark = save_mark;
1078
0
      if (*ecode != OP_ALT) break;
1079
0
      }
1080
1081
0
    RRETURN(MATCH_NOMATCH);
1082
1083
    /* Handle possessive capturing brackets with an unlimited repeat. We come
1084
    here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1085
    handled similarly to the normal case above. However, the matching is
1086
    different. The end of these brackets will always be OP_KETRPOS, which
1087
    returns MATCH_KETRPOS without going further in the pattern. By this means
1088
    we can handle the group by iteration rather than recursion, thereby
1089
    reducing the amount of stack needed. */
1090
1091
0
    case OP_CBRAPOS:
1092
0
    case OP_SCBRAPOS:
1093
0
    allow_zero = FALSE;
1094
1095
0
    POSSESSIVE_CAPTURE:
1096
0
    number = GET2(ecode, 1+LINK_SIZE);
1097
0
    offset = number << 1;
1098
1099
#ifdef PCRE_DEBUG
1100
    printf("start possessive bracket %d\n", number);
1101
    printf("subject=");
1102
    pchars(eptr, 16, TRUE, md);
1103
    printf("\n");
1104
#endif
1105
1106
0
    if (offset < md->offset_max)
1107
0
      {
1108
0
      matched_once = FALSE;
1109
0
      code_offset = (int)(ecode - md->start_code);
1110
1111
0
      save_offset1 = md->offset_vector[offset];
1112
0
      save_offset2 = md->offset_vector[offset+1];
1113
0
      save_offset3 = md->offset_vector[md->offset_end - number];
1114
0
      save_capture_last = md->capture_last;
1115
1116
0
      DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1117
1118
      /* Each time round the loop, save the current subject position for use
1119
      when the group matches. For MATCH_MATCH, the group has matched, so we
1120
      restart it with a new subject starting position, remembering that we had
1121
      at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1122
      usual. If we haven't matched any alternatives in any iteration, check to
1123
      see if a previous iteration matched. If so, the group has matched;
1124
      continue from afterwards. Otherwise it has failed; restore the previous
1125
      capture values before returning NOMATCH. */
1126
1127
0
      for (;;)
1128
0
        {
1129
0
        md->offset_vector[md->offset_end - number] =
1130
0
          (int)(eptr - md->start_subject);
1131
0
        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1132
0
        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1133
0
          eptrb, RM63);
1134
0
        if (rrc == MATCH_KETRPOS)
1135
0
          {
1136
0
          offset_top = md->end_offset_top;
1137
0
          eptr = md->end_match_ptr;
1138
0
          ecode = md->start_code + code_offset;
1139
0
          save_capture_last = md->capture_last;
1140
0
          matched_once = TRUE;
1141
0
          continue;
1142
0
          }
1143
1144
        /* See comment in the code for capturing groups above about handling
1145
        THEN. */
1146
1147
0
        if (rrc == MATCH_THEN)
1148
0
          {
1149
0
          next = ecode + GET(ecode,1);
1150
0
          if (md->start_match_ptr < next &&
1151
0
              (*ecode == OP_ALT || *next == OP_ALT))
1152
0
            rrc = MATCH_NOMATCH;
1153
0
          }
1154
1155
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1156
0
        md->capture_last = save_capture_last;
1157
0
        ecode += GET(ecode, 1);
1158
0
        if (*ecode != OP_ALT) break;
1159
0
        }
1160
1161
0
      if (!matched_once)
1162
0
        {
1163
0
        md->offset_vector[offset] = save_offset1;
1164
0
        md->offset_vector[offset+1] = save_offset2;
1165
0
        md->offset_vector[md->offset_end - number] = save_offset3;
1166
0
        }
1167
1168
0
      if (allow_zero || matched_once)
1169
0
        {
1170
0
        ecode += 1 + LINK_SIZE;
1171
0
        break;
1172
0
        }
1173
1174
0
      RRETURN(MATCH_NOMATCH);
1175
0
      }
1176
1177
    /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1178
    as a non-capturing bracket. */
1179
1180
    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1181
    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1182
1183
0
    DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1184
1185
    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1186
    /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1187
1188
    /* Non-capturing possessive bracket with unlimited repeat. We come here
1189
    from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1190
    without the capturing complication. It is written out separately for speed
1191
    and cleanliness. */
1192
1193
0
    case OP_BRAPOS:
1194
0
    case OP_SBRAPOS:
1195
0
    allow_zero = FALSE;
1196
1197
0
    POSSESSIVE_NON_CAPTURE:
1198
0
    matched_once = FALSE;
1199
0
    code_offset = (int)(ecode - md->start_code);
1200
1201
0
    for (;;)
1202
0
      {
1203
0
      if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1204
0
      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1205
0
        eptrb, RM48);
1206
0
      if (rrc == MATCH_KETRPOS)
1207
0
        {
1208
0
        offset_top = md->end_offset_top;
1209
0
        eptr = md->end_match_ptr;
1210
0
        ecode = md->start_code + code_offset;
1211
0
        matched_once = TRUE;
1212
0
        continue;
1213
0
        }
1214
1215
      /* See comment in the code for capturing groups above about handling
1216
      THEN. */
1217
1218
0
      if (rrc == MATCH_THEN)
1219
0
        {
1220
0
        next = ecode + GET(ecode,1);
1221
0
        if (md->start_match_ptr < next &&
1222
0
            (*ecode == OP_ALT || *next == OP_ALT))
1223
0
          rrc = MATCH_NOMATCH;
1224
0
        }
1225
1226
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1227
0
      ecode += GET(ecode, 1);
1228
0
      if (*ecode != OP_ALT) break;
1229
0
      }
1230
1231
0
    if (matched_once || allow_zero)
1232
0
      {
1233
0
      ecode += 1 + LINK_SIZE;
1234
0
      break;
1235
0
      }
1236
0
    RRETURN(MATCH_NOMATCH);
1237
1238
    /* Control never reaches here. */
1239
1240
    /* Conditional group: compilation checked that there are no more than
1241
    two branches. If the condition is false, skipping the first branch takes us
1242
    past the end if there is only one branch, but that's OK because that is
1243
    exactly what going to the ket would do. */
1244
1245
0
    case OP_COND:
1246
0
    case OP_SCOND:
1247
0
    codelink = GET(ecode, 1);
1248
1249
    /* Because of the way auto-callout works during compile, a callout item is
1250
    inserted between OP_COND and an assertion condition. */
1251
1252
0
    if (ecode[LINK_SIZE+1] == OP_CALLOUT)
1253
0
      {
1254
0
      if (PUBL(callout) != NULL)
1255
0
        {
1256
0
        PUBL(callout_block) cb;
1257
0
        cb.version          = 2;   /* Version 1 of the callout block */
1258
0
        cb.callout_number   = ecode[LINK_SIZE+2];
1259
0
        cb.offset_vector    = md->offset_vector;
1260
0
#ifdef COMPILE_PCRE8
1261
0
        cb.subject          = (PCRE_SPTR)md->start_subject;
1262
#else
1263
        cb.subject          = (PCRE_SPTR16)md->start_subject;
1264
#endif
1265
0
        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1266
0
        cb.start_match      = (int)(mstart - md->start_subject);
1267
0
        cb.current_position = (int)(eptr - md->start_subject);
1268
0
        cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1269
0
        cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1270
0
        cb.capture_top      = offset_top/2;
1271
0
        cb.capture_last     = md->capture_last;
1272
0
        cb.callout_data     = md->callout_data;
1273
0
        cb.mark             = md->nomatch_mark;
1274
0
        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1275
0
        if (rrc < 0) RRETURN(rrc);
1276
0
        }
1277
0
      ecode += PRIV(OP_lengths)[OP_CALLOUT];
1278
0
      }
1279
1280
0
    condcode = ecode[LINK_SIZE+1];
1281
1282
    /* Now see what the actual condition is */
1283
1284
0
    if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
1285
0
      {
1286
0
      if (md->recursive == NULL)                /* Not recursing => FALSE */
1287
0
        {
1288
0
        condition = FALSE;
1289
0
        ecode += GET(ecode, 1);
1290
0
        }
1291
0
      else
1292
0
        {
1293
0
        int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1294
0
        condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1295
1296
        /* If the test is for recursion into a specific subpattern, and it is
1297
        false, but the test was set up by name, scan the table to see if the
1298
        name refers to any other numbers, and test them. The condition is true
1299
        if any one is set. */
1300
1301
0
        if (!condition && condcode == OP_NRREF)
1302
0
          {
1303
0
          pcre_uchar *slotA = md->name_table;
1304
0
          for (i = 0; i < md->name_count; i++)
1305
0
            {
1306
0
            if (GET2(slotA, 0) == recno) break;
1307
0
            slotA += md->name_entry_size;
1308
0
            }
1309
1310
          /* Found a name for the number - there can be only one; duplicate
1311
          names for different numbers are allowed, but not vice versa. First
1312
          scan down for duplicates. */
1313
1314
0
          if (i < md->name_count)
1315
0
            {
1316
0
            pcre_uchar *slotB = slotA;
1317
0
            while (slotB > md->name_table)
1318
0
              {
1319
0
              slotB -= md->name_entry_size;
1320
0
              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1321
0
                {
1322
0
                condition = GET2(slotB, 0) == md->recursive->group_num;
1323
0
                if (condition) break;
1324
0
                }
1325
0
              else break;
1326
0
              }
1327
1328
            /* Scan up for duplicates */
1329
1330
0
            if (!condition)
1331
0
              {
1332
0
              slotB = slotA;
1333
0
              for (i++; i < md->name_count; i++)
1334
0
                {
1335
0
                slotB += md->name_entry_size;
1336
0
                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1337
0
                  {
1338
0
                  condition = GET2(slotB, 0) == md->recursive->group_num;
1339
0
                  if (condition) break;
1340
0
                  }
1341
0
                else break;
1342
0
                }
1343
0
              }
1344
0
            }
1345
0
          }
1346
1347
        /* Chose branch according to the condition */
1348
1349
0
        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1350
0
        }
1351
0
      }
1352
1353
0
    else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1354
0
      {
1355
0
      offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1356
0
      condition = offset < offset_top && md->offset_vector[offset] >= 0;
1357
1358
      /* If the numbered capture is unset, but the reference was by name,
1359
      scan the table to see if the name refers to any other numbers, and test
1360
      them. The condition is true if any one is set. This is tediously similar
1361
      to the code above, but not close enough to try to amalgamate. */
1362
1363
0
      if (!condition && condcode == OP_NCREF)
1364
0
        {
1365
0
        int refno = offset >> 1;
1366
0
        pcre_uchar *slotA = md->name_table;
1367
1368
0
        for (i = 0; i < md->name_count; i++)
1369
0
          {
1370
0
          if (GET2(slotA, 0) == refno) break;
1371
0
          slotA += md->name_entry_size;
1372
0
          }
1373
1374
        /* Found a name for the number - there can be only one; duplicate names
1375
        for different numbers are allowed, but not vice versa. First scan down
1376
        for duplicates. */
1377
1378
0
        if (i < md->name_count)
1379
0
          {
1380
0
          pcre_uchar *slotB = slotA;
1381
0
          while (slotB > md->name_table)
1382
0
            {
1383
0
            slotB -= md->name_entry_size;
1384
0
            if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1385
0
              {
1386
0
              offset = GET2(slotB, 0) << 1;
1387
0
              condition = offset < offset_top &&
1388
0
                md->offset_vector[offset] >= 0;
1389
0
              if (condition) break;
1390
0
              }
1391
0
            else break;
1392
0
            }
1393
1394
          /* Scan up for duplicates */
1395
1396
0
          if (!condition)
1397
0
            {
1398
0
            slotB = slotA;
1399
0
            for (i++; i < md->name_count; i++)
1400
0
              {
1401
0
              slotB += md->name_entry_size;
1402
0
              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
1403
0
                {
1404
0
                offset = GET2(slotB, 0) << 1;
1405
0
                condition = offset < offset_top &&
1406
0
                  md->offset_vector[offset] >= 0;
1407
0
                if (condition) break;
1408
0
                }
1409
0
              else break;
1410
0
              }
1411
0
            }
1412
0
          }
1413
0
        }
1414
1415
      /* Chose branch according to the condition */
1416
1417
0
      ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
1418
0
      }
1419
1420
0
    else if (condcode == OP_DEF)     /* DEFINE - always false */
1421
0
      {
1422
0
      condition = FALSE;
1423
0
      ecode += GET(ecode, 1);
1424
0
      }
1425
1426
    /* The condition is an assertion. Call match() to evaluate it - setting
1427
    md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
1428
    an assertion. */
1429
1430
0
    else
1431
0
      {
1432
0
      md->match_function_type = MATCH_CONDASSERT;
1433
0
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
1434
0
      if (rrc == MATCH_MATCH)
1435
0
        {
1436
0
        if (md->end_offset_top > offset_top)
1437
0
          offset_top = md->end_offset_top;  /* Captures may have happened */
1438
0
        condition = TRUE;
1439
0
        ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1440
0
        while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1441
0
        }
1442
1443
      /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1444
      assertion; it is therefore treated as NOMATCH. */
1445
1446
0
      else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1447
0
        {
1448
0
        RRETURN(rrc);         /* Need braces because of following else */
1449
0
        }
1450
0
      else
1451
0
        {
1452
0
        condition = FALSE;
1453
0
        ecode += codelink;
1454
0
        }
1455
0
      }
1456
1457
    /* We are now at the branch that is to be obeyed. As there is only one, can
1458
    use tail recursion to avoid using another stack frame, except when there is
1459
    unlimited repeat of a possibly empty group. In the latter case, a recursive
1460
    call to match() is always required, unless the second alternative doesn't
1461
    exist, in which case we can just plough on. Note that, for compatibility
1462
    with Perl, the | in a conditional group is NOT treated as creating two
1463
    alternatives. If a THEN is encountered in the branch, it propagates out to
1464
    the enclosing alternative (unless nested in a deeper set of alternatives,
1465
    of course). */
1466
1467
0
    if (condition || *ecode == OP_ALT)
1468
0
      {
1469
0
      if (op != OP_SCOND)
1470
0
        {
1471
0
        ecode += 1 + LINK_SIZE;
1472
0
        goto TAIL_RECURSE;
1473
0
        }
1474
1475
0
      md->match_function_type = MATCH_CBEGROUP;
1476
0
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1477
0
      RRETURN(rrc);
1478
0
      }
1479
1480
     /* Condition false & no alternative; continue after the group. */
1481
1482
0
    else
1483
0
      {
1484
0
      ecode += 1 + LINK_SIZE;
1485
0
      }
1486
0
    break;
1487
1488
1489
    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1490
    to close any currently open capturing brackets. */
1491
1492
0
    case OP_CLOSE:
1493
0
    number = GET2(ecode, 1);
1494
0
    offset = number << 1;
1495
1496
#ifdef PCRE_DEBUG
1497
      printf("end bracket %d at *ACCEPT", number);
1498
      printf("\n");
1499
#endif
1500
1501
0
    md->capture_last = number;
1502
0
    if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1503
0
      {
1504
0
      md->offset_vector[offset] =
1505
0
        md->offset_vector[md->offset_end - number];
1506
0
      md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1507
0
      if (offset_top <= offset) offset_top = offset + 2;
1508
0
      }
1509
0
    ecode += 1 + IMM2_SIZE;
1510
0
    break;
1511
1512
1513
    /* End of the pattern, either real or forced. */
1514
1515
0
    case OP_END:
1516
0
    case OP_ACCEPT:
1517
0
    case OP_ASSERT_ACCEPT:
1518
1519
    /* If we have matched an empty string, fail if not in an assertion and not
1520
    in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1521
    is set and we have matched at the start of the subject. In both cases,
1522
    backtracking will then try other alternatives, if any. */
1523
1524
0
    if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1525
0
         md->recursive == NULL &&
1526
0
         (md->notempty ||
1527
0
           (md->notempty_atstart &&
1528
0
             mstart == md->start_subject + md->start_offset)))
1529
0
      RRETURN(MATCH_NOMATCH);
1530
1531
    /* Otherwise, we have a match. */
1532
1533
0
    md->end_match_ptr = eptr;           /* Record where we ended */
1534
0
    md->end_offset_top = offset_top;    /* and how many extracts were taken */
1535
0
    md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1536
1537
    /* For some reason, the macros don't work properly if an expression is
1538
    given as the argument to RRETURN when the heap is in use. */
1539
1540
0
    rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1541
0
    RRETURN(rrc);
1542
1543
    /* Assertion brackets. Check the alternative branches in turn - the
1544
    matching won't pass the KET for an assertion. If any one branch matches,
1545
    the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1546
    start of each branch to move the current point backwards, so the code at
1547
    this level is identical to the lookahead case. When the assertion is part
1548
    of a condition, we want to return immediately afterwards. The caller of
1549
    this incarnation of the match() function will have set MATCH_CONDASSERT in
1550
    md->match_function type, and one of these opcodes will be the first opcode
1551
    that is processed. We use a local variable that is preserved over calls to
1552
    match() to remember this case. */
1553
1554
0
    case OP_ASSERT:
1555
0
    case OP_ASSERTBACK:
1556
0
    save_mark = md->mark;
1557
0
    if (md->match_function_type == MATCH_CONDASSERT)
1558
0
      {
1559
0
      condassert = TRUE;
1560
0
      md->match_function_type = 0;
1561
0
      }
1562
0
    else condassert = FALSE;
1563
1564
0
    do
1565
0
      {
1566
0
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1567
0
      if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1568
0
        {
1569
0
        mstart = md->start_match_ptr;   /* In case \K reset it */
1570
0
        break;
1571
0
        }
1572
0
      md->mark = save_mark;
1573
1574
      /* A COMMIT failure must fail the entire assertion, without trying any
1575
      subsequent branches. */
1576
1577
0
      if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
1578
1579
      /* PCRE does not allow THEN to escape beyond an assertion; it
1580
      is treated as NOMATCH. */
1581
1582
0
      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1583
0
      ecode += GET(ecode, 1);
1584
0
      }
1585
0
    while (*ecode == OP_ALT);
1586
1587
0
    if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1588
1589
    /* If checking an assertion for a condition, return MATCH_MATCH. */
1590
1591
0
    if (condassert) RRETURN(MATCH_MATCH);
1592
1593
    /* Continue from after the assertion, updating the offsets high water
1594
    mark, since extracts may have been taken during the assertion. */
1595
1596
0
    do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1597
0
    ecode += 1 + LINK_SIZE;
1598
0
    offset_top = md->end_offset_top;
1599
0
    continue;
1600
1601
    /* Negative assertion: all branches must fail to match. Encountering SKIP,
1602
    PRUNE, or COMMIT means we must assume failure without checking subsequent
1603
    branches. */
1604
1605
0
    case OP_ASSERT_NOT:
1606
0
    case OP_ASSERTBACK_NOT:
1607
0
    save_mark = md->mark;
1608
0
    if (md->match_function_type == MATCH_CONDASSERT)
1609
0
      {
1610
0
      condassert = TRUE;
1611
0
      md->match_function_type = 0;
1612
0
      }
1613
0
    else condassert = FALSE;
1614
1615
0
    do
1616
0
      {
1617
0
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1618
0
      md->mark = save_mark;
1619
0
      if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1620
0
      if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1621
0
        {
1622
0
        do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1623
0
        break;
1624
0
        }
1625
1626
      /* PCRE does not allow THEN to escape beyond an assertion; it is treated
1627
      as NOMATCH. */
1628
1629
0
      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1630
0
      ecode += GET(ecode,1);
1631
0
      }
1632
0
    while (*ecode == OP_ALT);
1633
1634
0
    if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1635
1636
0
    ecode += 1 + LINK_SIZE;
1637
0
    continue;
1638
1639
    /* Move the subject pointer back. This occurs only at the start of
1640
    each branch of a lookbehind assertion. If we are too close to the start to
1641
    move back, this match function fails. When working with UTF-8 we move
1642
    back a number of characters, not bytes. */
1643
1644
0
    case OP_REVERSE:
1645
0
#ifdef SUPPORT_UTF
1646
0
    if (utf)
1647
0
      {
1648
0
      i = GET(ecode, 1);
1649
0
      while (i-- > 0)
1650
0
        {
1651
0
        eptr--;
1652
0
        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1653
0
        BACKCHAR(eptr);
1654
0
        }
1655
0
      }
1656
0
    else
1657
0
#endif
1658
1659
    /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1660
1661
0
      {
1662
0
      eptr -= GET(ecode, 1);
1663
0
      if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1664
0
      }
1665
1666
    /* Save the earliest consulted character, then skip to next op code */
1667
1668
0
    if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1669
0
    ecode += 1 + LINK_SIZE;
1670
0
    break;
1671
1672
    /* The callout item calls an external function, if one is provided, passing
1673
    details of the match so far. This is mainly for debugging, though the
1674
    function is able to force a failure. */
1675
1676
0
    case OP_CALLOUT:
1677
0
    if (PUBL(callout) != NULL)
1678
0
      {
1679
0
      PUBL(callout_block) cb;
1680
0
      cb.version          = 2;   /* Version 1 of the callout block */
1681
0
      cb.callout_number   = ecode[1];
1682
0
      cb.offset_vector    = md->offset_vector;
1683
0
#ifdef COMPILE_PCRE8
1684
0
      cb.subject          = (PCRE_SPTR)md->start_subject;
1685
#else
1686
      cb.subject          = (PCRE_SPTR16)md->start_subject;
1687
#endif
1688
0
      cb.subject_length   = (int)(md->end_subject - md->start_subject);
1689
0
      cb.start_match      = (int)(mstart - md->start_subject);
1690
0
      cb.current_position = (int)(eptr - md->start_subject);
1691
0
      cb.pattern_position = GET(ecode, 2);
1692
0
      cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1693
0
      cb.capture_top      = offset_top/2;
1694
0
      cb.capture_last     = md->capture_last;
1695
0
      cb.callout_data     = md->callout_data;
1696
0
      cb.mark             = md->nomatch_mark;
1697
0
      if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1698
0
      if (rrc < 0) RRETURN(rrc);
1699
0
      }
1700
0
    ecode += 2 + 2*LINK_SIZE;
1701
0
    break;
1702
1703
    /* Recursion either matches the current regex, or some subexpression. The
1704
    offset data is the offset to the starting bracket from the start of the
1705
    whole pattern. (This is so that it works from duplicated subpatterns.)
1706
1707
    The state of the capturing groups is preserved over recursion, and
1708
    re-instated afterwards. We don't know how many are started and not yet
1709
    finished (offset_top records the completed total) so we just have to save
1710
    all the potential data. There may be up to 65535 such values, which is too
1711
    large to put on the stack, but using malloc for small numbers seems
1712
    expensive. As a compromise, the stack is used when there are no more than
1713
    REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1714
1715
    There are also other values that have to be saved. We use a chained
1716
    sequence of blocks that actually live on the stack. Thanks to Robin Houston
1717
    for the original version of this logic. It has, however, been hacked around
1718
    a lot, so he is not to blame for the current way it works. */
1719
1720
0
    case OP_RECURSE:
1721
0
      {
1722
0
      recursion_info *ri;
1723
0
      int recno;
1724
1725
0
      callpat = md->start_code + GET(ecode, 1);
1726
0
      recno = (callpat == md->start_code)? 0 :
1727
0
        GET2(callpat, 1 + LINK_SIZE);
1728
1729
      /* Check for repeating a recursion without advancing the subject pointer.
1730
      This should catch convoluted mutual recursions. (Some simple cases are
1731
      caught at compile time.) */
1732
1733
0
      for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1734
0
        if (recno == ri->group_num && eptr == ri->subject_position)
1735
0
          RRETURN(PCRE_ERROR_RECURSELOOP);
1736
1737
      /* Add to "recursing stack" */
1738
1739
0
      new_recursive.group_num = recno;
1740
0
      new_recursive.subject_position = eptr;
1741
0
      new_recursive.prevrec = md->recursive;
1742
0
      md->recursive = &new_recursive;
1743
1744
      /* Where to continue from afterwards */
1745
1746
0
      ecode += 1 + LINK_SIZE;
1747
1748
      /* Now save the offset data */
1749
1750
0
      new_recursive.saved_max = md->offset_end;
1751
0
      if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1752
0
        new_recursive.offset_save = stacksave;
1753
0
      else
1754
0
        {
1755
0
        new_recursive.offset_save =
1756
0
          (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1757
0
        if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1758
0
        }
1759
0
      memcpy(new_recursive.offset_save, md->offset_vector,
1760
0
            new_recursive.saved_max * sizeof(int));
1761
1762
      /* OK, now we can do the recursion. After processing each alternative,
1763
      restore the offset data. If there were nested recursions, md->recursive
1764
      might be changed, so reset it before looping. */
1765
1766
0
      DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1767
0
      cbegroup = (*callpat >= OP_SBRA);
1768
0
      do
1769
0
        {
1770
0
        if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1771
0
        RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1772
0
          md, eptrb, RM6);
1773
0
        memcpy(md->offset_vector, new_recursive.offset_save,
1774
0
            new_recursive.saved_max * sizeof(int));
1775
0
        md->recursive = new_recursive.prevrec;
1776
0
        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1777
0
          {
1778
0
          DPRINTF(("Recursion matched\n"));
1779
0
          if (new_recursive.offset_save != stacksave)
1780
0
            (PUBL(free))(new_recursive.offset_save);
1781
1782
          /* Set where we got to in the subject, and reset the start in case
1783
          it was changed by \K. This *is* propagated back out of a recursion,
1784
          for Perl compatibility. */
1785
1786
0
          eptr = md->end_match_ptr;
1787
0
          mstart = md->start_match_ptr;
1788
0
          goto RECURSION_MATCHED;        /* Exit loop; end processing */
1789
0
          }
1790
1791
        /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
1792
        is treated as NOMATCH. */
1793
1794
0
        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
1795
0
                 rrc != MATCH_COMMIT)
1796
0
          {
1797
0
          DPRINTF(("Recursion gave error %d\n", rrc));
1798
0
          if (new_recursive.offset_save != stacksave)
1799
0
            (PUBL(free))(new_recursive.offset_save);
1800
0
          RRETURN(rrc);
1801
0
          }
1802
1803
0
        md->recursive = &new_recursive;
1804
0
        callpat += GET(callpat, 1);
1805
0
        }
1806
0
      while (*callpat == OP_ALT);
1807
1808
0
      DPRINTF(("Recursion didn't match\n"));
1809
0
      md->recursive = new_recursive.prevrec;
1810
0
      if (new_recursive.offset_save != stacksave)
1811
0
        (PUBL(free))(new_recursive.offset_save);
1812
0
      RRETURN(MATCH_NOMATCH);
1813
0
      }
1814
1815
0
    RECURSION_MATCHED:
1816
0
    break;
1817
1818
    /* An alternation is the end of a branch; scan along to find the end of the
1819
    bracketed group and go to there. */
1820
1821
0
    case OP_ALT:
1822
0
    do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1823
0
    break;
1824
1825
    /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1826
    indicating that it may occur zero times. It may repeat infinitely, or not
1827
    at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1828
    with fixed upper repeat limits are compiled as a number of copies, with the
1829
    optional ones preceded by BRAZERO or BRAMINZERO. */
1830
1831
0
    case OP_BRAZERO:
1832
0
    next = ecode + 1;
1833
0
    RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1834
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1835
0
    do next += GET(next, 1); while (*next == OP_ALT);
1836
0
    ecode = next + 1 + LINK_SIZE;
1837
0
    break;
1838
1839
0
    case OP_BRAMINZERO:
1840
0
    next = ecode + 1;
1841
0
    do next += GET(next, 1); while (*next == OP_ALT);
1842
0
    RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1843
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1844
0
    ecode++;
1845
0
    break;
1846
1847
0
    case OP_SKIPZERO:
1848
0
    next = ecode+1;
1849
0
    do next += GET(next,1); while (*next == OP_ALT);
1850
0
    ecode = next + 1 + LINK_SIZE;
1851
0
    break;
1852
1853
    /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1854
    here; just jump to the group, with allow_zero set TRUE. */
1855
1856
0
    case OP_BRAPOSZERO:
1857
0
    op = *(++ecode);
1858
0
    allow_zero = TRUE;
1859
0
    if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1860
0
      goto POSSESSIVE_NON_CAPTURE;
1861
1862
    /* End of a group, repeated or non-repeating. */
1863
1864
0
    case OP_KET:
1865
0
    case OP_KETRMIN:
1866
0
    case OP_KETRMAX:
1867
0
    case OP_KETRPOS:
1868
0
    prev = ecode - GET(ecode, 1);
1869
1870
    /* If this was a group that remembered the subject start, in order to break
1871
    infinite repeats of empty string matches, retrieve the subject start from
1872
    the chain. Otherwise, set it NULL. */
1873
1874
0
    if (*prev >= OP_SBRA || *prev == OP_ONCE)
1875
0
      {
1876
0
      saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1877
0
      eptrb = eptrb->epb_prev;              /* Backup to previous group */
1878
0
      }
1879
0
    else saved_eptr = NULL;
1880
1881
    /* If we are at the end of an assertion group or a non-capturing atomic
1882
    group, stop matching and return MATCH_MATCH, but record the current high
1883
    water mark for use by positive assertions. We also need to record the match
1884
    start in case it was changed by \K. */
1885
1886
0
    if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1887
0
         *prev == OP_ONCE_NC)
1888
0
      {
1889
0
      md->end_match_ptr = eptr;      /* For ONCE_NC */
1890
0
      md->end_offset_top = offset_top;
1891
0
      md->start_match_ptr = mstart;
1892
0
      RRETURN(MATCH_MATCH);         /* Sets md->mark */
1893
0
      }
1894
1895
    /* For capturing groups we have to check the group number back at the start
1896
    and if necessary complete handling an extraction by setting the offsets and
1897
    bumping the high water mark. Whole-pattern recursion is coded as a recurse
1898
    into group 0, so it won't be picked up here. Instead, we catch it when the
1899
    OP_END is reached. Other recursion is handled here. We just have to record
1900
    the current subject position and start match pointer and give a MATCH
1901
    return. */
1902
1903
0
    if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1904
0
        *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1905
0
      {
1906
0
      number = GET2(prev, 1+LINK_SIZE);
1907
0
      offset = number << 1;
1908
1909
#ifdef PCRE_DEBUG
1910
      printf("end bracket %d", number);
1911
      printf("\n");
1912
#endif
1913
1914
      /* Handle a recursively called group. */
1915
1916
0
      if (md->recursive != NULL && md->recursive->group_num == number)
1917
0
        {
1918
0
        md->end_match_ptr = eptr;
1919
0
        md->start_match_ptr = mstart;
1920
0
        RRETURN(MATCH_MATCH);
1921
0
        }
1922
1923
      /* Deal with capturing */
1924
1925
0
      md->capture_last = number;
1926
0
      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1927
0
        {
1928
        /* If offset is greater than offset_top, it means that we are
1929
        "skipping" a capturing group, and that group's offsets must be marked
1930
        unset. In earlier versions of PCRE, all the offsets were unset at the
1931
        start of matching, but this doesn't work because atomic groups and
1932
        assertions can cause a value to be set that should later be unset.
1933
        Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1934
        part of the atomic group, but this is not on the final matching path,
1935
        so must be unset when 2 is set. (If there is no group 2, there is no
1936
        problem, because offset_top will then be 2, indicating no capture.) */
1937
1938
0
        if (offset > offset_top)
1939
0
          {
1940
0
          int *iptr = md->offset_vector + offset_top;
1941
0
          int *iend = md->offset_vector + offset;
1942
0
          while (iptr < iend) *iptr++ = -1;
1943
0
          }
1944
1945
        /* Now make the extraction */
1946
1947
0
        md->offset_vector[offset] =
1948
0
          md->offset_vector[md->offset_end - number];
1949
0
        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1950
0
        if (offset_top <= offset) offset_top = offset + 2;
1951
0
        }
1952
0
      }
1953
1954
    /* For an ordinary non-repeating ket, just continue at this level. This
1955
    also happens for a repeating ket if no characters were matched in the
1956
    group. This is the forcible breaking of infinite loops as implemented in
1957
    Perl 5.005. For a non-repeating atomic group that includes captures,
1958
    establish a backup point by processing the rest of the pattern at a lower
1959
    level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1960
    original OP_ONCE level, thereby bypassing intermediate backup points, but
1961
    resetting any captures that happened along the way. */
1962
1963
0
    if (*ecode == OP_KET || eptr == saved_eptr)
1964
0
      {
1965
0
      if (*prev == OP_ONCE)
1966
0
        {
1967
0
        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1968
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1969
0
        md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
1970
0
        RRETURN(MATCH_ONCE);
1971
0
        }
1972
0
      ecode += 1 + LINK_SIZE;    /* Carry on at this level */
1973
0
      break;
1974
0
      }
1975
1976
    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1977
    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1978
    at a time from the outer level, thus saving stack. */
1979
1980
0
    if (*ecode == OP_KETRPOS)
1981
0
      {
1982
0
      md->end_match_ptr = eptr;
1983
0
      md->end_offset_top = offset_top;
1984
0
      RRETURN(MATCH_KETRPOS);
1985
0
      }
1986
1987
    /* The normal repeating kets try the rest of the pattern or restart from
1988
    the preceding bracket, in the appropriate order. In the second case, we can
1989
    use tail recursion to avoid using another stack frame, unless we have an
1990
    an atomic group or an unlimited repeat of a group that can match an empty
1991
    string. */
1992
1993
0
    if (*ecode == OP_KETRMIN)
1994
0
      {
1995
0
      RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1996
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1997
0
      if (*prev == OP_ONCE)
1998
0
        {
1999
0
        RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2000
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2001
0
        md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2002
0
        RRETURN(MATCH_ONCE);
2003
0
        }
2004
0
      if (*prev >= OP_SBRA)    /* Could match an empty string */
2005
0
        {
2006
0
        RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2007
0
        RRETURN(rrc);
2008
0
        }
2009
0
      ecode = prev;
2010
0
      goto TAIL_RECURSE;
2011
0
      }
2012
0
    else  /* OP_KETRMAX */
2013
0
      {
2014
0
      RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2015
0
      if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2016
0
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2017
0
      if (*prev == OP_ONCE)
2018
0
        {
2019
0
        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2020
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2021
0
        md->once_target = prev;
2022
0
        RRETURN(MATCH_ONCE);
2023
0
        }
2024
0
      ecode += 1 + LINK_SIZE;
2025
0
      goto TAIL_RECURSE;
2026
0
      }
2027
    /* Control never gets here */
2028
2029
    /* Not multiline mode: start of subject assertion, unless notbol. */
2030
2031
0
    case OP_CIRC:
2032
0
    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2033
2034
    /* Start of subject assertion */
2035
2036
0
    case OP_SOD:
2037
0
    if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2038
0
    ecode++;
2039
0
    break;
2040
2041
    /* Multiline mode: start of subject unless notbol, or after any newline. */
2042
2043
0
    case OP_CIRCM:
2044
0
    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2045
0
    if (eptr != md->start_subject &&
2046
0
        (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2047
0
      RRETURN(MATCH_NOMATCH);
2048
0
    ecode++;
2049
0
    break;
2050
2051
    /* Start of match assertion */
2052
2053
0
    case OP_SOM:
2054
0
    if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2055
0
    ecode++;
2056
0
    break;
2057
2058
    /* Reset the start of match point */
2059
2060
0
    case OP_SET_SOM:
2061
0
    mstart = eptr;
2062
0
    ecode++;
2063
0
    break;
2064
2065
    /* Multiline mode: assert before any newline, or before end of subject
2066
    unless noteol is set. */
2067
2068
0
    case OP_DOLLM:
2069
0
    if (eptr < md->end_subject)
2070
0
      {
2071
0
      if (!IS_NEWLINE(eptr))
2072
0
        {
2073
0
        if (md->partial != 0 &&
2074
0
            eptr + 1 >= md->end_subject &&
2075
0
            NLBLOCK->nltype == NLTYPE_FIXED &&
2076
0
            NLBLOCK->nllen == 2 &&
2077
0
            *eptr == NLBLOCK->nl[0])
2078
0
          {
2079
0
          md->hitend = TRUE;
2080
0
          if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2081
0
          }
2082
0
        RRETURN(MATCH_NOMATCH);
2083
0
        }
2084
0
      }
2085
0
    else
2086
0
      {
2087
0
      if (md->noteol) RRETURN(MATCH_NOMATCH);
2088
0
      SCHECK_PARTIAL();
2089
0
      }
2090
0
    ecode++;
2091
0
    break;
2092
2093
    /* Not multiline mode: assert before a terminating newline or before end of
2094
    subject unless noteol is set. */
2095
2096
0
    case OP_DOLL:
2097
0
    if (md->noteol) RRETURN(MATCH_NOMATCH);
2098
0
    if (!md->endonly) goto ASSERT_NL_OR_EOS;
2099
2100
    /* ... else fall through for endonly */
2101
2102
    /* End of subject assertion (\z) */
2103
2104
0
    case OP_EOD:
2105
0
    if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2106
0
    SCHECK_PARTIAL();
2107
0
    ecode++;
2108
0
    break;
2109
2110
    /* End of subject or ending \n assertion (\Z) */
2111
2112
0
    case OP_EODN:
2113
0
    ASSERT_NL_OR_EOS:
2114
0
    if (eptr < md->end_subject &&
2115
0
        (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2116
0
      {
2117
0
      if (md->partial != 0 &&
2118
0
          eptr + 1 >= md->end_subject &&
2119
0
          NLBLOCK->nltype == NLTYPE_FIXED &&
2120
0
          NLBLOCK->nllen == 2 &&
2121
0
          *eptr == NLBLOCK->nl[0])
2122
0
        {
2123
0
        md->hitend = TRUE;
2124
0
        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2125
0
        }
2126
0
      RRETURN(MATCH_NOMATCH);
2127
0
      }
2128
2129
    /* Either at end of string or \n before end. */
2130
2131
0
    SCHECK_PARTIAL();
2132
0
    ecode++;
2133
0
    break;
2134
2135
    /* Word boundary assertions */
2136
2137
0
    case OP_NOT_WORD_BOUNDARY:
2138
0
    case OP_WORD_BOUNDARY:
2139
0
      {
2140
2141
      /* Find out if the previous and current characters are "word" characters.
2142
      It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2143
      be "non-word" characters. Remember the earliest consulted character for
2144
      partial matching. */
2145
2146
0
#ifdef SUPPORT_UTF
2147
0
      if (utf)
2148
0
        {
2149
        /* Get status of previous character */
2150
2151
0
        if (eptr == md->start_subject) prev_is_word = FALSE; else
2152
0
          {
2153
0
          PCRE_PUCHAR lastptr = eptr - 1;
2154
0
          BACKCHAR(lastptr);
2155
0
          if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2156
0
          GETCHAR(c, lastptr);
2157
0
#ifdef SUPPORT_UCP
2158
0
          if (md->use_ucp)
2159
0
            {
2160
0
            if (c == '_') prev_is_word = TRUE; else
2161
0
              {
2162
0
              int cat = UCD_CATEGORY(c);
2163
0
              prev_is_word = (cat == ucp_L || cat == ucp_N);
2164
0
              }
2165
0
            }
2166
0
          else
2167
0
#endif
2168
0
          prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2169
0
          }
2170
2171
        /* Get status of next character */
2172
2173
0
        if (eptr >= md->end_subject)
2174
0
          {
2175
0
          SCHECK_PARTIAL();
2176
0
          cur_is_word = FALSE;
2177
0
          }
2178
0
        else
2179
0
          {
2180
0
          GETCHAR(c, eptr);
2181
0
#ifdef SUPPORT_UCP
2182
0
          if (md->use_ucp)
2183
0
            {
2184
0
            if (c == '_') cur_is_word = TRUE; else
2185
0
              {
2186
0
              int cat = UCD_CATEGORY(c);
2187
0
              cur_is_word = (cat == ucp_L || cat == ucp_N);
2188
0
              }
2189
0
            }
2190
0
          else
2191
0
#endif
2192
0
          cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2193
0
          }
2194
0
        }
2195
0
      else
2196
0
#endif
2197
2198
      /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2199
      consistency with the behaviour of \w we do use it in this case. */
2200
2201
0
        {
2202
        /* Get status of previous character */
2203
2204
0
        if (eptr == md->start_subject) prev_is_word = FALSE; else
2205
0
          {
2206
0
          if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2207
0
#ifdef SUPPORT_UCP
2208
0
          if (md->use_ucp)
2209
0
            {
2210
0
            c = eptr[-1];
2211
0
            if (c == '_') prev_is_word = TRUE; else
2212
0
              {
2213
0
              int cat = UCD_CATEGORY(c);
2214
0
              prev_is_word = (cat == ucp_L || cat == ucp_N);
2215
0
              }
2216
0
            }
2217
0
          else
2218
0
#endif
2219
0
          prev_is_word = MAX_255(eptr[-1])
2220
0
            && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2221
0
          }
2222
2223
        /* Get status of next character */
2224
2225
0
        if (eptr >= md->end_subject)
2226
0
          {
2227
0
          SCHECK_PARTIAL();
2228
0
          cur_is_word = FALSE;
2229
0
          }
2230
0
        else
2231
0
#ifdef SUPPORT_UCP
2232
0
        if (md->use_ucp)
2233
0
          {
2234
0
          c = *eptr;
2235
0
          if (c == '_') cur_is_word = TRUE; else
2236
0
            {
2237
0
            int cat = UCD_CATEGORY(c);
2238
0
            cur_is_word = (cat == ucp_L || cat == ucp_N);
2239
0
            }
2240
0
          }
2241
0
        else
2242
0
#endif
2243
0
        cur_is_word = MAX_255(*eptr)
2244
0
          && ((md->ctypes[*eptr] & ctype_word) != 0);
2245
0
        }
2246
2247
      /* Now see if the situation is what we want */
2248
2249
0
      if ((*ecode++ == OP_WORD_BOUNDARY)?
2250
0
           cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2251
0
        RRETURN(MATCH_NOMATCH);
2252
0
      }
2253
0
    break;
2254
2255
    /* Match any single character type except newline; have to take care with
2256
    CRLF newlines and partial matching. */
2257
2258
0
    case OP_ANY:
2259
0
    if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2260
0
    if (md->partial != 0 &&
2261
0
        eptr + 1 >= md->end_subject &&
2262
0
        NLBLOCK->nltype == NLTYPE_FIXED &&
2263
0
        NLBLOCK->nllen == 2 &&
2264
0
        *eptr == NLBLOCK->nl[0])
2265
0
      {
2266
0
      md->hitend = TRUE;
2267
0
      if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2268
0
      }
2269
2270
    /* Fall through */
2271
2272
    /* Match any single character whatsoever. */
2273
2274
0
    case OP_ALLANY:
2275
0
    if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2276
0
      {                            /* not be updated before SCHECK_PARTIAL. */
2277
0
      SCHECK_PARTIAL();
2278
0
      RRETURN(MATCH_NOMATCH);
2279
0
      }
2280
0
    eptr++;
2281
0
#ifdef SUPPORT_UTF
2282
0
    if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2283
0
#endif
2284
0
    ecode++;
2285
0
    break;
2286
2287
    /* Match a single byte, even in UTF-8 mode. This opcode really does match
2288
    any byte, even newline, independent of the setting of PCRE_DOTALL. */
2289
2290
0
    case OP_ANYBYTE:
2291
0
    if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2292
0
      {                            /* not be updated before SCHECK_PARTIAL. */
2293
0
      SCHECK_PARTIAL();
2294
0
      RRETURN(MATCH_NOMATCH);
2295
0
      }
2296
0
    eptr++;
2297
0
    ecode++;
2298
0
    break;
2299
2300
0
    case OP_NOT_DIGIT:
2301
0
    if (eptr >= md->end_subject)
2302
0
      {
2303
0
      SCHECK_PARTIAL();
2304
0
      RRETURN(MATCH_NOMATCH);
2305
0
      }
2306
0
    GETCHARINCTEST(c, eptr);
2307
0
    if (
2308
0
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2309
0
       c < 256 &&
2310
0
#endif
2311
0
       (md->ctypes[c] & ctype_digit) != 0
2312
0
       )
2313
0
      RRETURN(MATCH_NOMATCH);
2314
0
    ecode++;
2315
0
    break;
2316
2317
0
    case OP_DIGIT:
2318
0
    if (eptr >= md->end_subject)
2319
0
      {
2320
0
      SCHECK_PARTIAL();
2321
0
      RRETURN(MATCH_NOMATCH);
2322
0
      }
2323
0
    GETCHARINCTEST(c, eptr);
2324
0
    if (
2325
0
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2326
0
       c > 255 ||
2327
0
#endif
2328
0
       (md->ctypes[c] & ctype_digit) == 0
2329
0
       )
2330
0
      RRETURN(MATCH_NOMATCH);
2331
0
    ecode++;
2332
0
    break;
2333
2334
0
    case OP_NOT_WHITESPACE:
2335
0
    if (eptr >= md->end_subject)
2336
0
      {
2337
0
      SCHECK_PARTIAL();
2338
0
      RRETURN(MATCH_NOMATCH);
2339
0
      }
2340
0
    GETCHARINCTEST(c, eptr);
2341
0
    if (
2342
0
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2343
0
       c < 256 &&
2344
0
#endif
2345
0
       (md->ctypes[c] & ctype_space) != 0
2346
0
       )
2347
0
      RRETURN(MATCH_NOMATCH);
2348
0
    ecode++;
2349
0
    break;
2350
2351
0
    case OP_WHITESPACE:
2352
0
    if (eptr >= md->end_subject)
2353
0
      {
2354
0
      SCHECK_PARTIAL();
2355
0
      RRETURN(MATCH_NOMATCH);
2356
0
      }
2357
0
    GETCHARINCTEST(c, eptr);
2358
0
    if (
2359
0
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2360
0
       c > 255 ||
2361
0
#endif
2362
0
       (md->ctypes[c] & ctype_space) == 0
2363
0
       )
2364
0
      RRETURN(MATCH_NOMATCH);
2365
0
    ecode++;
2366
0
    break;
2367
2368
0
    case OP_NOT_WORDCHAR:
2369
0
    if (eptr >= md->end_subject)
2370
0
      {
2371
0
      SCHECK_PARTIAL();
2372
0
      RRETURN(MATCH_NOMATCH);
2373
0
      }
2374
0
    GETCHARINCTEST(c, eptr);
2375
0
    if (
2376
0
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2377
0
       c < 256 &&
2378
0
#endif
2379
0
       (md->ctypes[c] & ctype_word) != 0
2380
0
       )
2381
0
      RRETURN(MATCH_NOMATCH);
2382
0
    ecode++;
2383
0
    break;
2384
2385
0
    case OP_WORDCHAR:
2386
0
    if (eptr >= md->end_subject)
2387
0
      {
2388
0
      SCHECK_PARTIAL();
2389
0
      RRETURN(MATCH_NOMATCH);
2390
0
      }
2391
0
    GETCHARINCTEST(c, eptr);
2392
0
    if (
2393
0
#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2394
0
       c > 255 ||
2395
0
#endif
2396
0
       (md->ctypes[c] & ctype_word) == 0
2397
0
       )
2398
0
      RRETURN(MATCH_NOMATCH);
2399
0
    ecode++;
2400
0
    break;
2401
2402
0
    case OP_ANYNL:
2403
0
    if (eptr >= md->end_subject)
2404
0
      {
2405
0
      SCHECK_PARTIAL();
2406
0
      RRETURN(MATCH_NOMATCH);
2407
0
      }
2408
0
    GETCHARINCTEST(c, eptr);
2409
0
    switch(c)
2410
0
      {
2411
0
      default: RRETURN(MATCH_NOMATCH);
2412
2413
0
      case 0x000d:
2414
0
      if (eptr >= md->end_subject)
2415
0
        {
2416
0
        SCHECK_PARTIAL();
2417
0
        }
2418
0
      else if (*eptr == 0x0a) eptr++;
2419
0
      break;
2420
2421
0
      case 0x000a:
2422
0
      break;
2423
2424
0
      case 0x000b:
2425
0
      case 0x000c:
2426
0
      case 0x0085:
2427
0
      case 0x2028:
2428
0
      case 0x2029:
2429
0
      if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2430
0
      break;
2431
0
      }
2432
0
    ecode++;
2433
0
    break;
2434
2435
0
    case OP_NOT_HSPACE:
2436
0
    if (eptr >= md->end_subject)
2437
0
      {
2438
0
      SCHECK_PARTIAL();
2439
0
      RRETURN(MATCH_NOMATCH);
2440
0
      }
2441
0
    GETCHARINCTEST(c, eptr);
2442
0
    switch(c)
2443
0
      {
2444
0
      default: break;
2445
0
      case 0x09:      /* HT */
2446
0
      case 0x20:      /* SPACE */
2447
0
      case 0xa0:      /* NBSP */
2448
0
      case 0x1680:    /* OGHAM SPACE MARK */
2449
0
      case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2450
0
      case 0x2000:    /* EN QUAD */
2451
0
      case 0x2001:    /* EM QUAD */
2452
0
      case 0x2002:    /* EN SPACE */
2453
0
      case 0x2003:    /* EM SPACE */
2454
0
      case 0x2004:    /* THREE-PER-EM SPACE */
2455
0
      case 0x2005:    /* FOUR-PER-EM SPACE */
2456
0
      case 0x2006:    /* SIX-PER-EM SPACE */
2457
0
      case 0x2007:    /* FIGURE SPACE */
2458
0
      case 0x2008:    /* PUNCTUATION SPACE */
2459
0
      case 0x2009:    /* THIN SPACE */
2460
0
      case 0x200A:    /* HAIR SPACE */
2461
0
      case 0x202f:    /* NARROW NO-BREAK SPACE */
2462
0
      case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2463
0
      case 0x3000:    /* IDEOGRAPHIC SPACE */
2464
0
      RRETURN(MATCH_NOMATCH);
2465
0
      }
2466
0
    ecode++;
2467
0
    break;
2468
2469
0
    case OP_HSPACE:
2470
0
    if (eptr >= md->end_subject)
2471
0
      {
2472
0
      SCHECK_PARTIAL();
2473
0
      RRETURN(MATCH_NOMATCH);
2474
0
      }
2475
0
    GETCHARINCTEST(c, eptr);
2476
0
    switch(c)
2477
0
      {
2478
0
      default: RRETURN(MATCH_NOMATCH);
2479
0
      case 0x09:      /* HT */
2480
0
      case 0x20:      /* SPACE */
2481
0
      case 0xa0:      /* NBSP */
2482
0
      case 0x1680:    /* OGHAM SPACE MARK */
2483
0
      case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2484
0
      case 0x2000:    /* EN QUAD */
2485
0
      case 0x2001:    /* EM QUAD */
2486
0
      case 0x2002:    /* EN SPACE */
2487
0
      case 0x2003:    /* EM SPACE */
2488
0
      case 0x2004:    /* THREE-PER-EM SPACE */
2489
0
      case 0x2005:    /* FOUR-PER-EM SPACE */
2490
0
      case 0x2006:    /* SIX-PER-EM SPACE */
2491
0
      case 0x2007:    /* FIGURE SPACE */
2492
0
      case 0x2008:    /* PUNCTUATION SPACE */
2493
0
      case 0x2009:    /* THIN SPACE */
2494
0
      case 0x200A:    /* HAIR SPACE */
2495
0
      case 0x202f:    /* NARROW NO-BREAK SPACE */
2496
0
      case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2497
0
      case 0x3000:    /* IDEOGRAPHIC SPACE */
2498
0
      break;
2499
0
      }
2500
0
    ecode++;
2501
0
    break;
2502
2503
0
    case OP_NOT_VSPACE:
2504
0
    if (eptr >= md->end_subject)
2505
0
      {
2506
0
      SCHECK_PARTIAL();
2507
0
      RRETURN(MATCH_NOMATCH);
2508
0
      }
2509
0
    GETCHARINCTEST(c, eptr);
2510
0
    switch(c)
2511
0
      {
2512
0
      default: break;
2513
0
      case 0x0a:      /* LF */
2514
0
      case 0x0b:      /* VT */
2515
0
      case 0x0c:      /* FF */
2516
0
      case 0x0d:      /* CR */
2517
0
      case 0x85:      /* NEL */
2518
0
      case 0x2028:    /* LINE SEPARATOR */
2519
0
      case 0x2029:    /* PARAGRAPH SEPARATOR */
2520
0
      RRETURN(MATCH_NOMATCH);
2521
0
      }
2522
0
    ecode++;
2523
0
    break;
2524
2525
0
    case OP_VSPACE:
2526
0
    if (eptr >= md->end_subject)
2527
0
      {
2528
0
      SCHECK_PARTIAL();
2529
0
      RRETURN(MATCH_NOMATCH);
2530
0
      }
2531
0
    GETCHARINCTEST(c, eptr);
2532
0
    switch(c)
2533
0
      {
2534
0
      default: RRETURN(MATCH_NOMATCH);
2535
0
      case 0x0a:      /* LF */
2536
0
      case 0x0b:      /* VT */
2537
0
      case 0x0c:      /* FF */
2538
0
      case 0x0d:      /* CR */
2539
0
      case 0x85:      /* NEL */
2540
0
      case 0x2028:    /* LINE SEPARATOR */
2541
0
      case 0x2029:    /* PARAGRAPH SEPARATOR */
2542
0
      break;
2543
0
      }
2544
0
    ecode++;
2545
0
    break;
2546
2547
0
#ifdef SUPPORT_UCP
2548
    /* Check the next character by Unicode property. We will get here only
2549
    if the support is in the binary; otherwise a compile-time error occurs. */
2550
2551
0
    case OP_PROP:
2552
0
    case OP_NOTPROP:
2553
0
    if (eptr >= md->end_subject)
2554
0
      {
2555
0
      SCHECK_PARTIAL();
2556
0
      RRETURN(MATCH_NOMATCH);
2557
0
      }
2558
0
    GETCHARINCTEST(c, eptr);
2559
0
      {
2560
0
      const pcre_uint8 chartype = UCD_CHARTYPE(c);
2561
2562
0
      switch(ecode[1])
2563
0
        {
2564
0
        case PT_ANY:
2565
0
        if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2566
0
        break;
2567
2568
0
        case PT_LAMP:
2569
0
        if ((chartype == ucp_Lu ||
2570
0
             chartype == ucp_Ll ||
2571
0
             chartype == ucp_Lt) == (op == OP_NOTPROP))
2572
0
          RRETURN(MATCH_NOMATCH);
2573
0
        break;
2574
2575
0
        case PT_GC:
2576
0
        if ((ecode[2] != PRIV(ucp_gentype)[chartype]) == (op == OP_PROP))
2577
0
          RRETURN(MATCH_NOMATCH);
2578
0
        break;
2579
2580
0
        case PT_PC:
2581
0
        if ((ecode[2] != chartype) == (op == OP_PROP))
2582
0
          RRETURN(MATCH_NOMATCH);
2583
0
        break;
2584
2585
0
        case PT_SC:
2586
0
        if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
2587
0
          RRETURN(MATCH_NOMATCH);
2588
0
        break;
2589
2590
        /* These are specials */
2591
2592
0
        case PT_ALNUM:
2593
0
        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2594
0
             PRIV(ucp_gentype)[chartype] == ucp_N) == (op == OP_NOTPROP))
2595
0
          RRETURN(MATCH_NOMATCH);
2596
0
        break;
2597
2598
0
        case PT_SPACE:    /* Perl space */
2599
0
        if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
2600
0
             c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2601
0
               == (op == OP_NOTPROP))
2602
0
          RRETURN(MATCH_NOMATCH);
2603
0
        break;
2604
2605
0
        case PT_PXSPACE:  /* POSIX space */
2606
0
        if ((PRIV(ucp_gentype)[chartype] == ucp_Z ||
2607
0
             c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2608
0
             c == CHAR_FF || c == CHAR_CR)
2609
0
               == (op == OP_NOTPROP))
2610
0
          RRETURN(MATCH_NOMATCH);
2611
0
        break;
2612
2613
0
        case PT_WORD:
2614
0
        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2615
0
             PRIV(ucp_gentype)[chartype] == ucp_N ||
2616
0
             c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2617
0
          RRETURN(MATCH_NOMATCH);
2618
0
        break;
2619
2620
        /* This should never occur */
2621
2622
0
        default:
2623
0
        RRETURN(PCRE_ERROR_INTERNAL);
2624
0
        }
2625
2626
0
      ecode += 3;
2627
0
      }
2628
0
    break;
2629
2630
    /* Match an extended Unicode sequence. We will get here only if the support
2631
    is in the binary; otherwise a compile-time error occurs. */
2632
2633
0
    case OP_EXTUNI:
2634
0
    if (eptr >= md->end_subject)
2635
0
      {
2636
0
      SCHECK_PARTIAL();
2637
0
      RRETURN(MATCH_NOMATCH);
2638
0
      }
2639
0
    GETCHARINCTEST(c, eptr);
2640
0
    if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
2641
0
    while (eptr < md->end_subject)
2642
0
      {
2643
0
      int len = 1;
2644
0
      if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2645
0
      if (UCD_CATEGORY(c) != ucp_M) break;
2646
0
      eptr += len;
2647
0
      }
2648
0
    CHECK_PARTIAL();
2649
0
    ecode++;
2650
0
    break;
2651
0
#endif
2652
2653
2654
    /* Match a back reference, possibly repeatedly. Look past the end of the
2655
    item to see if there is repeat information following. The code is similar
2656
    to that for character classes, but repeated for efficiency. Then obey
2657
    similar code to character type repeats - written out again for speed.
2658
    However, if the referenced string is the empty string, always treat
2659
    it as matched, any number of times (otherwise there could be infinite
2660
    loops). */
2661
2662
0
    case OP_REF:
2663
0
    case OP_REFI:
2664
0
    caseless = op == OP_REFI;
2665
0
    offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2666
0
    ecode += 1 + IMM2_SIZE;
2667
2668
    /* If the reference is unset, there are two possibilities:
2669
2670
    (a) In the default, Perl-compatible state, set the length negative;
2671
    this ensures that every attempt at a match fails. We can't just fail
2672
    here, because of the possibility of quantifiers with zero minima.
2673
2674
    (b) If the JavaScript compatibility flag is set, set the length to zero
2675
    so that the back reference matches an empty string.
2676
2677
    Otherwise, set the length to the length of what was matched by the
2678
    referenced subpattern. */
2679
2680
0
    if (offset >= offset_top || md->offset_vector[offset] < 0)
2681
0
      length = (md->jscript_compat)? 0 : -1;
2682
0
    else
2683
0
      length = md->offset_vector[offset+1] - md->offset_vector[offset];
2684
2685
    /* Set up for repetition, or handle the non-repeated case */
2686
2687
0
    switch (*ecode)
2688
0
      {
2689
0
      case OP_CRSTAR:
2690
0
      case OP_CRMINSTAR:
2691
0
      case OP_CRPLUS:
2692
0
      case OP_CRMINPLUS:
2693
0
      case OP_CRQUERY:
2694
0
      case OP_CRMINQUERY:
2695
0
      c = *ecode++ - OP_CRSTAR;
2696
0
      minimize = (c & 1) != 0;
2697
0
      min = rep_min[c];                 /* Pick up values from tables; */
2698
0
      max = rep_max[c];                 /* zero for max => infinity */
2699
0
      if (max == 0) max = INT_MAX;
2700
0
      break;
2701
2702
0
      case OP_CRRANGE:
2703
0
      case OP_CRMINRANGE:
2704
0
      minimize = (*ecode == OP_CRMINRANGE);
2705
0
      min = GET2(ecode, 1);
2706
0
      max = GET2(ecode, 1 + IMM2_SIZE);
2707
0
      if (max == 0) max = INT_MAX;
2708
0
      ecode += 1 + 2 * IMM2_SIZE;
2709
0
      break;
2710
2711
0
      default:               /* No repeat follows */
2712
0
      if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2713
0
        {
2714
0
        if (length == -2) eptr = md->end_subject;   /* Partial match */
2715
0
        CHECK_PARTIAL();
2716
0
        RRETURN(MATCH_NOMATCH);
2717
0
        }
2718
0
      eptr += length;
2719
0
      continue;              /* With the main loop */
2720
0
      }
2721
2722
    /* Handle repeated back references. If the length of the reference is
2723
    zero, just continue with the main loop. If the length is negative, it
2724
    means the reference is unset in non-Java-compatible mode. If the minimum is
2725
    zero, we can continue at the same level without recursion. For any other
2726
    minimum, carrying on will result in NOMATCH. */
2727
2728
0
    if (length == 0) continue;
2729
0
    if (length < 0 && min == 0) continue;
2730
2731
    /* First, ensure the minimum number of matches are present. We get back
2732
    the length of the reference string explicitly rather than passing the
2733
    address of eptr, so that eptr can be a register variable. */
2734
2735
0
    for (i = 1; i <= min; i++)
2736
0
      {
2737
0
      int slength;
2738
0
      if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2739
0
        {
2740
0
        if (slength == -2) eptr = md->end_subject;   /* Partial match */
2741
0
        CHECK_PARTIAL();
2742
0
        RRETURN(MATCH_NOMATCH);
2743
0
        }
2744
0
      eptr += slength;
2745
0
      }
2746
2747
    /* If min = max, continue at the same level without recursion.
2748
    They are not both allowed to be zero. */
2749
2750
0
    if (min == max) continue;
2751
2752
    /* If minimizing, keep trying and advancing the pointer */
2753
2754
0
    if (minimize)
2755
0
      {
2756
0
      for (fi = min;; fi++)
2757
0
        {
2758
0
        int slength;
2759
0
        RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2760
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2761
0
        if (fi >= max) RRETURN(MATCH_NOMATCH);
2762
0
        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2763
0
          {
2764
0
          if (slength == -2) eptr = md->end_subject;   /* Partial match */
2765
0
          CHECK_PARTIAL();
2766
0
          RRETURN(MATCH_NOMATCH);
2767
0
          }
2768
0
        eptr += slength;
2769
0
        }
2770
      /* Control never gets here */
2771
0
      }
2772
2773
    /* If maximizing, find the longest string and work backwards */
2774
2775
0
    else
2776
0
      {
2777
0
      pp = eptr;
2778
0
      for (i = min; i < max; i++)
2779
0
        {
2780
0
        int slength;
2781
0
        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2782
0
          {
2783
          /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2784
          the soft partial matching case. */
2785
2786
0
          if (slength == -2 && md->partial != 0 &&
2787
0
              md->end_subject > md->start_used_ptr)
2788
0
            {
2789
0
            md->hitend = TRUE;
2790
0
            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2791
0
            }
2792
0
          break;
2793
0
          }
2794
0
        eptr += slength;
2795
0
        }
2796
2797
0
      while (eptr >= pp)
2798
0
        {
2799
0
        RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2800
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2801
0
        eptr -= length;
2802
0
        }
2803
0
      RRETURN(MATCH_NOMATCH);
2804
0
      }
2805
    /* Control never gets here */
2806
2807
    /* Match a bit-mapped character class, possibly repeatedly. This op code is
2808
    used when all the characters in the class have values in the range 0-255,
2809
    and either the matching is caseful, or the characters are in the range
2810
    0-127 when UTF-8 processing is enabled. The only difference between
2811
    OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2812
    encountered.
2813
2814
    First, look past the end of the item to see if there is repeat information
2815
    following. Then obey similar code to character type repeats - written out
2816
    again for speed. */
2817
2818
0
    case OP_NCLASS:
2819
0
    case OP_CLASS:
2820
0
      {
2821
      /* The data variable is saved across frames, so the byte map needs to
2822
      be stored there. */
2823
0
#define BYTE_MAP ((pcre_uint8 *)data)
2824
0
      data = ecode + 1;                /* Save for matching */
2825
0
      ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2826
2827
0
      switch (*ecode)
2828
0
        {
2829
0
        case OP_CRSTAR:
2830
0
        case OP_CRMINSTAR:
2831
0
        case OP_CRPLUS:
2832
0
        case OP_CRMINPLUS:
2833
0
        case OP_CRQUERY:
2834
0
        case OP_CRMINQUERY:
2835
0
        c = *ecode++ - OP_CRSTAR;
2836
0
        minimize = (c & 1) != 0;
2837
0
        min = rep_min[c];                 /* Pick up values from tables; */
2838
0
        max = rep_max[c];                 /* zero for max => infinity */
2839
0
        if (max == 0) max = INT_MAX;
2840
0
        break;
2841
2842
0
        case OP_CRRANGE:
2843
0
        case OP_CRMINRANGE:
2844
0
        minimize = (*ecode == OP_CRMINRANGE);
2845
0
        min = GET2(ecode, 1);
2846
0
        max = GET2(ecode, 1 + IMM2_SIZE);
2847
0
        if (max == 0) max = INT_MAX;
2848
0
        ecode += 1 + 2 * IMM2_SIZE;
2849
0
        break;
2850
2851
0
        default:               /* No repeat follows */
2852
0
        min = max = 1;
2853
0
        break;
2854
0
        }
2855
2856
      /* First, ensure the minimum number of matches are present. */
2857
2858
0
#ifdef SUPPORT_UTF
2859
0
      if (utf)
2860
0
        {
2861
0
        for (i = 1; i <= min; i++)
2862
0
          {
2863
0
          if (eptr >= md->end_subject)
2864
0
            {
2865
0
            SCHECK_PARTIAL();
2866
0
            RRETURN(MATCH_NOMATCH);
2867
0
            }
2868
0
          GETCHARINC(c, eptr);
2869
0
          if (c > 255)
2870
0
            {
2871
0
            if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2872
0
            }
2873
0
          else
2874
0
            if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2875
0
          }
2876
0
        }
2877
0
      else
2878
0
#endif
2879
      /* Not UTF mode */
2880
0
        {
2881
0
        for (i = 1; i <= min; i++)
2882
0
          {
2883
0
          if (eptr >= md->end_subject)
2884
0
            {
2885
0
            SCHECK_PARTIAL();
2886
0
            RRETURN(MATCH_NOMATCH);
2887
0
            }
2888
0
          c = *eptr++;
2889
#ifndef COMPILE_PCRE8
2890
          if (c > 255)
2891
            {
2892
            if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2893
            }
2894
          else
2895
#endif
2896
0
            if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2897
0
          }
2898
0
        }
2899
2900
      /* If max == min we can continue with the main loop without the
2901
      need to recurse. */
2902
2903
0
      if (min == max) continue;
2904
2905
      /* If minimizing, keep testing the rest of the expression and advancing
2906
      the pointer while it matches the class. */
2907
2908
0
      if (minimize)
2909
0
        {
2910
0
#ifdef SUPPORT_UTF
2911
0
        if (utf)
2912
0
          {
2913
0
          for (fi = min;; fi++)
2914
0
            {
2915
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2916
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2917
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
2918
0
            if (eptr >= md->end_subject)
2919
0
              {
2920
0
              SCHECK_PARTIAL();
2921
0
              RRETURN(MATCH_NOMATCH);
2922
0
              }
2923
0
            GETCHARINC(c, eptr);
2924
0
            if (c > 255)
2925
0
              {
2926
0
              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2927
0
              }
2928
0
            else
2929
0
              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2930
0
            }
2931
0
          }
2932
0
        else
2933
0
#endif
2934
        /* Not UTF mode */
2935
0
          {
2936
0
          for (fi = min;; fi++)
2937
0
            {
2938
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
2939
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2940
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
2941
0
            if (eptr >= md->end_subject)
2942
0
              {
2943
0
              SCHECK_PARTIAL();
2944
0
              RRETURN(MATCH_NOMATCH);
2945
0
              }
2946
0
            c = *eptr++;
2947
#ifndef COMPILE_PCRE8
2948
            if (c > 255)
2949
              {
2950
              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2951
              }
2952
            else
2953
#endif
2954
0
              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2955
0
            }
2956
0
          }
2957
        /* Control never gets here */
2958
0
        }
2959
2960
      /* If maximizing, find the longest possible run, then work backwards. */
2961
2962
0
      else
2963
0
        {
2964
0
        pp = eptr;
2965
2966
0
#ifdef SUPPORT_UTF
2967
0
        if (utf)
2968
0
          {
2969
0
          for (i = min; i < max; i++)
2970
0
            {
2971
0
            int len = 1;
2972
0
            if (eptr >= md->end_subject)
2973
0
              {
2974
0
              SCHECK_PARTIAL();
2975
0
              break;
2976
0
              }
2977
0
            GETCHARLEN(c, eptr, len);
2978
0
            if (c > 255)
2979
0
              {
2980
0
              if (op == OP_CLASS) break;
2981
0
              }
2982
0
            else
2983
0
              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
2984
0
            eptr += len;
2985
0
            }
2986
0
          for (;;)
2987
0
            {
2988
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
2989
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2990
0
            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2991
0
            BACKCHAR(eptr);
2992
0
            }
2993
0
          }
2994
0
        else
2995
0
#endif
2996
          /* Not UTF mode */
2997
0
          {
2998
0
          for (i = min; i < max; i++)
2999
0
            {
3000
0
            if (eptr >= md->end_subject)
3001
0
              {
3002
0
              SCHECK_PARTIAL();
3003
0
              break;
3004
0
              }
3005
0
            c = *eptr;
3006
#ifndef COMPILE_PCRE8
3007
            if (c > 255)
3008
              {
3009
              if (op == OP_CLASS) break;
3010
              }
3011
            else
3012
#endif
3013
0
              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3014
0
            eptr++;
3015
0
            }
3016
0
          while (eptr >= pp)
3017
0
            {
3018
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3019
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3020
0
            eptr--;
3021
0
            }
3022
0
          }
3023
3024
0
        RRETURN(MATCH_NOMATCH);
3025
0
        }
3026
0
#undef BYTE_MAP
3027
0
      }
3028
    /* Control never gets here */
3029
3030
3031
    /* Match an extended character class. This opcode is encountered only
3032
    when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
3033
    mode, because Unicode properties are supported in non-UTF-8 mode. */
3034
3035
0
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3036
0
    case OP_XCLASS:
3037
0
      {
3038
0
      data = ecode + 1 + LINK_SIZE;                /* Save for matching */
3039
0
      ecode += GET(ecode, 1);                      /* Advance past the item */
3040
3041
0
      switch (*ecode)
3042
0
        {
3043
0
        case OP_CRSTAR:
3044
0
        case OP_CRMINSTAR:
3045
0
        case OP_CRPLUS:
3046
0
        case OP_CRMINPLUS:
3047
0
        case OP_CRQUERY:
3048
0
        case OP_CRMINQUERY:
3049
0
        c = *ecode++ - OP_CRSTAR;
3050
0
        minimize = (c & 1) != 0;
3051
0
        min = rep_min[c];                 /* Pick up values from tables; */
3052
0
        max = rep_max[c];                 /* zero for max => infinity */
3053
0
        if (max == 0) max = INT_MAX;
3054
0
        break;
3055
3056
0
        case OP_CRRANGE:
3057
0
        case OP_CRMINRANGE:
3058
0
        minimize = (*ecode == OP_CRMINRANGE);
3059
0
        min = GET2(ecode, 1);
3060
0
        max = GET2(ecode, 1 + IMM2_SIZE);
3061
0
        if (max == 0) max = INT_MAX;
3062
0
        ecode += 1 + 2 * IMM2_SIZE;
3063
0
        break;
3064
3065
0
        default:               /* No repeat follows */
3066
0
        min = max = 1;
3067
0
        break;
3068
0
        }
3069
3070
      /* First, ensure the minimum number of matches are present. */
3071
3072
0
      for (i = 1; i <= min; i++)
3073
0
        {
3074
0
        if (eptr >= md->end_subject)
3075
0
          {
3076
0
          SCHECK_PARTIAL();
3077
0
          RRETURN(MATCH_NOMATCH);
3078
0
          }
3079
0
        GETCHARINCTEST(c, eptr);
3080
0
        if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3081
0
        }
3082
3083
      /* If max == min we can continue with the main loop without the
3084
      need to recurse. */
3085
3086
0
      if (min == max) continue;
3087
3088
      /* If minimizing, keep testing the rest of the expression and advancing
3089
      the pointer while it matches the class. */
3090
3091
0
      if (minimize)
3092
0
        {
3093
0
        for (fi = min;; fi++)
3094
0
          {
3095
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3096
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3097
0
          if (fi >= max) RRETURN(MATCH_NOMATCH);
3098
0
          if (eptr >= md->end_subject)
3099
0
            {
3100
0
            SCHECK_PARTIAL();
3101
0
            RRETURN(MATCH_NOMATCH);
3102
0
            }
3103
0
          GETCHARINCTEST(c, eptr);
3104
0
          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3105
0
          }
3106
        /* Control never gets here */
3107
0
        }
3108
3109
      /* If maximizing, find the longest possible run, then work backwards. */
3110
3111
0
      else
3112
0
        {
3113
0
        pp = eptr;
3114
0
        for (i = min; i < max; i++)
3115
0
          {
3116
0
          int len = 1;
3117
0
          if (eptr >= md->end_subject)
3118
0
            {
3119
0
            SCHECK_PARTIAL();
3120
0
            break;
3121
0
            }
3122
0
#ifdef SUPPORT_UTF
3123
0
          GETCHARLENTEST(c, eptr, len);
3124
#else
3125
          c = *eptr;
3126
#endif
3127
0
          if (!PRIV(xclass)(c, data, utf)) break;
3128
0
          eptr += len;
3129
0
          }
3130
0
        for(;;)
3131
0
          {
3132
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3133
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3134
0
          if (eptr-- == pp) break;        /* Stop if tried at original pos */
3135
0
#ifdef SUPPORT_UTF
3136
0
          if (utf) BACKCHAR(eptr);
3137
0
#endif
3138
0
          }
3139
0
        RRETURN(MATCH_NOMATCH);
3140
0
        }
3141
3142
      /* Control never gets here */
3143
0
      }
3144
0
#endif    /* End of XCLASS */
3145
3146
    /* Match a single character, casefully */
3147
3148
0
    case OP_CHAR:
3149
0
#ifdef SUPPORT_UTF
3150
0
    if (utf)
3151
0
      {
3152
0
      length = 1;
3153
0
      ecode++;
3154
0
      GETCHARLEN(fc, ecode, length);
3155
0
      if (length > md->end_subject - eptr)
3156
0
        {
3157
0
        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3158
0
        RRETURN(MATCH_NOMATCH);
3159
0
        }
3160
0
      while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
3161
0
      }
3162
0
    else
3163
0
#endif
3164
    /* Not UTF mode */
3165
0
      {
3166
0
      if (md->end_subject - eptr < 1)
3167
0
        {
3168
0
        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3169
0
        RRETURN(MATCH_NOMATCH);
3170
0
        }
3171
0
      if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3172
0
      ecode += 2;
3173
0
      }
3174
0
    break;
3175
3176
    /* Match a single character, caselessly. If we are at the end of the
3177
    subject, give up immediately. */
3178
3179
0
    case OP_CHARI:
3180
0
    if (eptr >= md->end_subject)
3181
0
      {
3182
0
      SCHECK_PARTIAL();
3183
0
      RRETURN(MATCH_NOMATCH);
3184
0
      }
3185
3186
0
#ifdef SUPPORT_UTF
3187
0
    if (utf)
3188
0
      {
3189
0
      length = 1;
3190
0
      ecode++;
3191
0
      GETCHARLEN(fc, ecode, length);
3192
3193
      /* If the pattern character's value is < 128, we have only one byte, and
3194
      we know that its other case must also be one byte long, so we can use the
3195
      fast lookup table. We know that there is at least one byte left in the
3196
      subject. */
3197
3198
0
      if (fc < 128)
3199
0
        {
3200
0
        if (md->lcc[fc]
3201
0
            != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3202
0
        ecode++;
3203
0
        eptr++;
3204
0
        }
3205
3206
      /* Otherwise we must pick up the subject character. Note that we cannot
3207
      use the value of "length" to check for sufficient bytes left, because the
3208
      other case of the character may have more or fewer bytes.  */
3209
3210
0
      else
3211
0
        {
3212
0
        unsigned int dc;
3213
0
        GETCHARINC(dc, eptr);
3214
0
        ecode += length;
3215
3216
        /* If we have Unicode property support, we can use it to test the other
3217
        case of the character, if there is one. */
3218
3219
0
        if (fc != dc)
3220
0
          {
3221
0
#ifdef SUPPORT_UCP
3222
0
          if (dc != UCD_OTHERCASE(fc))
3223
0
#endif
3224
0
            RRETURN(MATCH_NOMATCH);
3225
0
          }
3226
0
        }
3227
0
      }
3228
0
    else
3229
0
#endif   /* SUPPORT_UTF */
3230
3231
    /* Not UTF mode */
3232
0
      {
3233
0
      if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3234
0
          != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3235
0
      eptr++;
3236
0
      ecode += 2;
3237
0
      }
3238
0
    break;
3239
3240
    /* Match a single character repeatedly. */
3241
3242
0
    case OP_EXACT:
3243
0
    case OP_EXACTI:
3244
0
    min = max = GET2(ecode, 1);
3245
0
    ecode += 1 + IMM2_SIZE;
3246
0
    goto REPEATCHAR;
3247
3248
0
    case OP_POSUPTO:
3249
0
    case OP_POSUPTOI:
3250
0
    possessive = TRUE;
3251
    /* Fall through */
3252
3253
0
    case OP_UPTO:
3254
0
    case OP_UPTOI:
3255
0
    case OP_MINUPTO:
3256
0
    case OP_MINUPTOI:
3257
0
    min = 0;
3258
0
    max = GET2(ecode, 1);
3259
0
    minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3260
0
    ecode += 1 + IMM2_SIZE;
3261
0
    goto REPEATCHAR;
3262
3263
0
    case OP_POSSTAR:
3264
0
    case OP_POSSTARI:
3265
0
    possessive = TRUE;
3266
0
    min = 0;
3267
0
    max = INT_MAX;
3268
0
    ecode++;
3269
0
    goto REPEATCHAR;
3270
3271
0
    case OP_POSPLUS:
3272
0
    case OP_POSPLUSI:
3273
0
    possessive = TRUE;
3274
0
    min = 1;
3275
0
    max = INT_MAX;
3276
0
    ecode++;
3277
0
    goto REPEATCHAR;
3278
3279
0
    case OP_POSQUERY:
3280
0
    case OP_POSQUERYI:
3281
0
    possessive = TRUE;
3282
0
    min = 0;
3283
0
    max = 1;
3284
0
    ecode++;
3285
0
    goto REPEATCHAR;
3286
3287
0
    case OP_STAR:
3288
0
    case OP_STARI:
3289
0
    case OP_MINSTAR:
3290
0
    case OP_MINSTARI:
3291
0
    case OP_PLUS:
3292
0
    case OP_PLUSI:
3293
0
    case OP_MINPLUS:
3294
0
    case OP_MINPLUSI:
3295
0
    case OP_QUERY:
3296
0
    case OP_QUERYI:
3297
0
    case OP_MINQUERY:
3298
0
    case OP_MINQUERYI:
3299
0
    c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3300
0
    minimize = (c & 1) != 0;
3301
0
    min = rep_min[c];                 /* Pick up values from tables; */
3302
0
    max = rep_max[c];                 /* zero for max => infinity */
3303
0
    if (max == 0) max = INT_MAX;
3304
3305
    /* Common code for all repeated single-character matches. */
3306
3307
0
    REPEATCHAR:
3308
0
#ifdef SUPPORT_UTF
3309
0
    if (utf)
3310
0
      {
3311
0
      length = 1;
3312
0
      charptr = ecode;
3313
0
      GETCHARLEN(fc, ecode, length);
3314
0
      ecode += length;
3315
3316
      /* Handle multibyte character matching specially here. There is
3317
      support for caseless matching if UCP support is present. */
3318
3319
0
      if (length > 1)
3320
0
        {
3321
0
#ifdef SUPPORT_UCP
3322
0
        unsigned int othercase;
3323
0
        if (op >= OP_STARI &&     /* Caseless */
3324
0
            (othercase = UCD_OTHERCASE(fc)) != fc)
3325
0
          oclength = PRIV(ord2utf)(othercase, occhars);
3326
0
        else oclength = 0;
3327
0
#endif  /* SUPPORT_UCP */
3328
3329
0
        for (i = 1; i <= min; i++)
3330
0
          {
3331
0
          if (eptr <= md->end_subject - length &&
3332
0
            memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3333
0
#ifdef SUPPORT_UCP
3334
0
          else if (oclength > 0 &&
3335
0
                   eptr <= md->end_subject - oclength &&
3336
0
                   memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3337
0
#endif  /* SUPPORT_UCP */
3338
0
          else
3339
0
            {
3340
0
            CHECK_PARTIAL();
3341
0
            RRETURN(MATCH_NOMATCH);
3342
0
            }
3343
0
          }
3344
3345
0
        if (min == max) continue;
3346
3347
0
        if (minimize)
3348
0
          {
3349
0
          for (fi = min;; fi++)
3350
0
            {
3351
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3352
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3353
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
3354
0
            if (eptr <= md->end_subject - length &&
3355
0
              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3356
0
#ifdef SUPPORT_UCP
3357
0
            else if (oclength > 0 &&
3358
0
                     eptr <= md->end_subject - oclength &&
3359
0
                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3360
0
#endif  /* SUPPORT_UCP */
3361
0
            else
3362
0
              {
3363
0
              CHECK_PARTIAL();
3364
0
              RRETURN(MATCH_NOMATCH);
3365
0
              }
3366
0
            }
3367
          /* Control never gets here */
3368
0
          }
3369
3370
0
        else  /* Maximize */
3371
0
          {
3372
0
          pp = eptr;
3373
0
          for (i = min; i < max; i++)
3374
0
            {
3375
0
            if (eptr <= md->end_subject - length &&
3376
0
                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3377
0
#ifdef SUPPORT_UCP
3378
0
            else if (oclength > 0 &&
3379
0
                     eptr <= md->end_subject - oclength &&
3380
0
                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3381
0
#endif  /* SUPPORT_UCP */
3382
0
            else
3383
0
              {
3384
0
              CHECK_PARTIAL();
3385
0
              break;
3386
0
              }
3387
0
            }
3388
3389
0
          if (possessive) continue;
3390
3391
0
          for(;;)
3392
0
            {
3393
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3394
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3395
0
            if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
3396
0
#ifdef SUPPORT_UCP
3397
0
            eptr--;
3398
0
            BACKCHAR(eptr);
3399
#else   /* without SUPPORT_UCP */
3400
            eptr -= length;
3401
#endif  /* SUPPORT_UCP */
3402
0
            }
3403
0
          }
3404
        /* Control never gets here */
3405
0
        }
3406
3407
      /* If the length of a UTF-8 character is 1, we fall through here, and
3408
      obey the code as for non-UTF-8 characters below, though in this case the
3409
      value of fc will always be < 128. */
3410
0
      }
3411
0
    else
3412
0
#endif  /* SUPPORT_UTF */
3413
      /* When not in UTF-8 mode, load a single-byte character. */
3414
0
      fc = *ecode++;
3415
3416
    /* The value of fc at this point is always one character, though we may
3417
    or may not be in UTF mode. The code is duplicated for the caseless and
3418
    caseful cases, for speed, since matching characters is likely to be quite
3419
    common. First, ensure the minimum number of matches are present. If min =
3420
    max, continue at the same level without recursing. Otherwise, if
3421
    minimizing, keep trying the rest of the expression and advancing one
3422
    matching character if failing, up to the maximum. Alternatively, if
3423
    maximizing, find the maximum number of characters and work backwards. */
3424
3425
0
    DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3426
0
      max, (char *)eptr));
3427
3428
0
    if (op >= OP_STARI)  /* Caseless */
3429
0
      {
3430
0
#ifdef COMPILE_PCRE8
3431
      /* fc must be < 128 if UTF is enabled. */
3432
0
      foc = md->fcc[fc];
3433
#else
3434
#ifdef SUPPORT_UTF
3435
#ifdef SUPPORT_UCP
3436
      if (utf && fc > 127)
3437
        foc = UCD_OTHERCASE(fc);
3438
#else
3439
      if (utf && fc > 127)
3440
        foc = fc;
3441
#endif /* SUPPORT_UCP */
3442
      else
3443
#endif /* SUPPORT_UTF */
3444
        foc = TABLE_GET(fc, md->fcc, fc);
3445
#endif /* COMPILE_PCRE8 */
3446
3447
0
      for (i = 1; i <= min; i++)
3448
0
        {
3449
0
        if (eptr >= md->end_subject)
3450
0
          {
3451
0
          SCHECK_PARTIAL();
3452
0
          RRETURN(MATCH_NOMATCH);
3453
0
          }
3454
0
        if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3455
0
        eptr++;
3456
0
        }
3457
0
      if (min == max) continue;
3458
0
      if (minimize)
3459
0
        {
3460
0
        for (fi = min;; fi++)
3461
0
          {
3462
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3463
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3464
0
          if (fi >= max) RRETURN(MATCH_NOMATCH);
3465
0
          if (eptr >= md->end_subject)
3466
0
            {
3467
0
            SCHECK_PARTIAL();
3468
0
            RRETURN(MATCH_NOMATCH);
3469
0
            }
3470
0
          if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
3471
0
          eptr++;
3472
0
          }
3473
        /* Control never gets here */
3474
0
        }
3475
0
      else  /* Maximize */
3476
0
        {
3477
0
        pp = eptr;
3478
0
        for (i = min; i < max; i++)
3479
0
          {
3480
0
          if (eptr >= md->end_subject)
3481
0
            {
3482
0
            SCHECK_PARTIAL();
3483
0
            break;
3484
0
            }
3485
0
          if (fc != *eptr && foc != *eptr) break;
3486
0
          eptr++;
3487
0
          }
3488
3489
0
        if (possessive) continue;
3490
3491
0
        while (eptr >= pp)
3492
0
          {
3493
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3494
0
          eptr--;
3495
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3496
0
          }
3497
0
        RRETURN(MATCH_NOMATCH);
3498
0
        }
3499
      /* Control never gets here */
3500
0
      }
3501
3502
    /* Caseful comparisons (includes all multi-byte characters) */
3503
3504
0
    else
3505
0
      {
3506
0
      for (i = 1; i <= min; i++)
3507
0
        {
3508
0
        if (eptr >= md->end_subject)
3509
0
          {
3510
0
          SCHECK_PARTIAL();
3511
0
          RRETURN(MATCH_NOMATCH);
3512
0
          }
3513
0
        if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3514
0
        }
3515
3516
0
      if (min == max) continue;
3517
3518
0
      if (minimize)
3519
0
        {
3520
0
        for (fi = min;; fi++)
3521
0
          {
3522
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3523
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3524
0
          if (fi >= max) RRETURN(MATCH_NOMATCH);
3525
0
          if (eptr >= md->end_subject)
3526
0
            {
3527
0
            SCHECK_PARTIAL();
3528
0
            RRETURN(MATCH_NOMATCH);
3529
0
            }
3530
0
          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
3531
0
          }
3532
        /* Control never gets here */
3533
0
        }
3534
0
      else  /* Maximize */
3535
0
        {
3536
0
        pp = eptr;
3537
0
        for (i = min; i < max; i++)
3538
0
          {
3539
0
          if (eptr >= md->end_subject)
3540
0
            {
3541
0
            SCHECK_PARTIAL();
3542
0
            break;
3543
0
            }
3544
0
          if (fc != *eptr) break;
3545
0
          eptr++;
3546
0
          }
3547
0
        if (possessive) continue;
3548
3549
0
        while (eptr >= pp)
3550
0
          {
3551
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3552
0
          eptr--;
3553
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3554
0
          }
3555
0
        RRETURN(MATCH_NOMATCH);
3556
0
        }
3557
0
      }
3558
    /* Control never gets here */
3559
3560
    /* Match a negated single one-byte character. The character we are
3561
    checking can be multibyte. */
3562
3563
0
    case OP_NOT:
3564
0
    case OP_NOTI:
3565
0
    if (eptr >= md->end_subject)
3566
0
      {
3567
0
      SCHECK_PARTIAL();
3568
0
      RRETURN(MATCH_NOMATCH);
3569
0
      }
3570
0
#ifdef SUPPORT_UTF
3571
0
    if (utf)
3572
0
      {
3573
0
      unsigned int ch, och;
3574
3575
0
      ecode++;
3576
0
      GETCHARINC(ch, ecode);
3577
0
      GETCHARINC(c, eptr);
3578
3579
0
      if (op == OP_NOT)
3580
0
        {
3581
0
        if (ch == c) RRETURN(MATCH_NOMATCH);
3582
0
        }
3583
0
      else
3584
0
        {
3585
0
#ifdef SUPPORT_UCP
3586
0
        if (ch > 127)
3587
0
          och = UCD_OTHERCASE(ch);
3588
#else
3589
        if (ch > 127)
3590
          och = ch;
3591
#endif /* SUPPORT_UCP */
3592
0
        else
3593
0
          och = TABLE_GET(ch, md->fcc, ch);
3594
0
        if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3595
0
        }
3596
0
      }
3597
0
    else
3598
0
#endif
3599
0
      {
3600
0
      unsigned int ch = ecode[1];
3601
0
      c = *eptr++;
3602
0
      if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3603
0
        RRETURN(MATCH_NOMATCH);
3604
0
      ecode += 2;
3605
0
      }
3606
0
    break;
3607
3608
    /* Match a negated single one-byte character repeatedly. This is almost a
3609
    repeat of the code for a repeated single character, but I haven't found a
3610
    nice way of commoning these up that doesn't require a test of the
3611
    positive/negative option for each character match. Maybe that wouldn't add
3612
    very much to the time taken, but character matching *is* what this is all
3613
    about... */
3614
3615
0
    case OP_NOTEXACT:
3616
0
    case OP_NOTEXACTI:
3617
0
    min = max = GET2(ecode, 1);
3618
0
    ecode += 1 + IMM2_SIZE;
3619
0
    goto REPEATNOTCHAR;
3620
3621
0
    case OP_NOTUPTO:
3622
0
    case OP_NOTUPTOI:
3623
0
    case OP_NOTMINUPTO:
3624
0
    case OP_NOTMINUPTOI:
3625
0
    min = 0;
3626
0
    max = GET2(ecode, 1);
3627
0
    minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3628
0
    ecode += 1 + IMM2_SIZE;
3629
0
    goto REPEATNOTCHAR;
3630
3631
0
    case OP_NOTPOSSTAR:
3632
0
    case OP_NOTPOSSTARI:
3633
0
    possessive = TRUE;
3634
0
    min = 0;
3635
0
    max = INT_MAX;
3636
0
    ecode++;
3637
0
    goto REPEATNOTCHAR;
3638
3639
0
    case OP_NOTPOSPLUS:
3640
0
    case OP_NOTPOSPLUSI:
3641
0
    possessive = TRUE;
3642
0
    min = 1;
3643
0
    max = INT_MAX;
3644
0
    ecode++;
3645
0
    goto REPEATNOTCHAR;
3646
3647
0
    case OP_NOTPOSQUERY:
3648
0
    case OP_NOTPOSQUERYI:
3649
0
    possessive = TRUE;
3650
0
    min = 0;
3651
0
    max = 1;
3652
0
    ecode++;
3653
0
    goto REPEATNOTCHAR;
3654
3655
0
    case OP_NOTPOSUPTO:
3656
0
    case OP_NOTPOSUPTOI:
3657
0
    possessive = TRUE;
3658
0
    min = 0;
3659
0
    max = GET2(ecode, 1);
3660
0
    ecode += 1 + IMM2_SIZE;
3661
0
    goto REPEATNOTCHAR;
3662
3663
0
    case OP_NOTSTAR:
3664
0
    case OP_NOTSTARI:
3665
0
    case OP_NOTMINSTAR:
3666
0
    case OP_NOTMINSTARI:
3667
0
    case OP_NOTPLUS:
3668
0
    case OP_NOTPLUSI:
3669
0
    case OP_NOTMINPLUS:
3670
0
    case OP_NOTMINPLUSI:
3671
0
    case OP_NOTQUERY:
3672
0
    case OP_NOTQUERYI:
3673
0
    case OP_NOTMINQUERY:
3674
0
    case OP_NOTMINQUERYI:
3675
0
    c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3676
0
    minimize = (c & 1) != 0;
3677
0
    min = rep_min[c];                 /* Pick up values from tables; */
3678
0
    max = rep_max[c];                 /* zero for max => infinity */
3679
0
    if (max == 0) max = INT_MAX;
3680
3681
    /* Common code for all repeated single-byte matches. */
3682
3683
0
    REPEATNOTCHAR:
3684
0
    GETCHARINCTEST(fc, ecode);
3685
3686
    /* The code is duplicated for the caseless and caseful cases, for speed,
3687
    since matching characters is likely to be quite common. First, ensure the
3688
    minimum number of matches are present. If min = max, continue at the same
3689
    level without recursing. Otherwise, if minimizing, keep trying the rest of
3690
    the expression and advancing one matching character if failing, up to the
3691
    maximum. Alternatively, if maximizing, find the maximum number of
3692
    characters and work backwards. */
3693
3694
0
    DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3695
0
      max, (char *)eptr));
3696
3697
0
    if (op >= OP_NOTSTARI)     /* Caseless */
3698
0
      {
3699
0
#ifdef SUPPORT_UTF
3700
0
#ifdef SUPPORT_UCP
3701
0
      if (utf && fc > 127)
3702
0
        foc = UCD_OTHERCASE(fc);
3703
#else
3704
      if (utf && fc > 127)
3705
        foc = fc;
3706
#endif /* SUPPORT_UCP */
3707
0
      else
3708
0
#endif /* SUPPORT_UTF */
3709
0
        foc = TABLE_GET(fc, md->fcc, fc);
3710
3711
0
#ifdef SUPPORT_UTF
3712
0
      if (utf)
3713
0
        {
3714
0
        unsigned int d;
3715
0
        for (i = 1; i <= min; i++)
3716
0
          {
3717
0
          if (eptr >= md->end_subject)
3718
0
            {
3719
0
            SCHECK_PARTIAL();
3720
0
            RRETURN(MATCH_NOMATCH);
3721
0
            }
3722
0
          GETCHARINC(d, eptr);
3723
0
          if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3724
0
          }
3725
0
        }
3726
0
      else
3727
0
#endif
3728
      /* Not UTF mode */
3729
0
        {
3730
0
        for (i = 1; i <= min; i++)
3731
0
          {
3732
0
          if (eptr >= md->end_subject)
3733
0
            {
3734
0
            SCHECK_PARTIAL();
3735
0
            RRETURN(MATCH_NOMATCH);
3736
0
            }
3737
0
          if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3738
0
          eptr++;
3739
0
          }
3740
0
        }
3741
3742
0
      if (min == max) continue;
3743
3744
0
      if (minimize)
3745
0
        {
3746
0
#ifdef SUPPORT_UTF
3747
0
        if (utf)
3748
0
          {
3749
0
          unsigned int d;
3750
0
          for (fi = min;; fi++)
3751
0
            {
3752
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3753
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3754
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
3755
0
            if (eptr >= md->end_subject)
3756
0
              {
3757
0
              SCHECK_PARTIAL();
3758
0
              RRETURN(MATCH_NOMATCH);
3759
0
              }
3760
0
            GETCHARINC(d, eptr);
3761
0
            if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3762
0
            }
3763
0
          }
3764
0
        else
3765
0
#endif
3766
        /* Not UTF mode */
3767
0
          {
3768
0
          for (fi = min;; fi++)
3769
0
            {
3770
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3771
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3772
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
3773
0
            if (eptr >= md->end_subject)
3774
0
              {
3775
0
              SCHECK_PARTIAL();
3776
0
              RRETURN(MATCH_NOMATCH);
3777
0
              }
3778
0
            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3779
0
            eptr++;
3780
0
            }
3781
0
          }
3782
        /* Control never gets here */
3783
0
        }
3784
3785
      /* Maximize case */
3786
3787
0
      else
3788
0
        {
3789
0
        pp = eptr;
3790
3791
0
#ifdef SUPPORT_UTF
3792
0
        if (utf)
3793
0
          {
3794
0
          unsigned int d;
3795
0
          for (i = min; i < max; i++)
3796
0
            {
3797
0
            int len = 1;
3798
0
            if (eptr >= md->end_subject)
3799
0
              {
3800
0
              SCHECK_PARTIAL();
3801
0
              break;
3802
0
              }
3803
0
            GETCHARLEN(d, eptr, len);
3804
0
            if (fc == d || (unsigned int)foc == d) break;
3805
0
            eptr += len;
3806
0
            }
3807
0
          if (possessive) continue;
3808
0
          for(;;)
3809
0
            {
3810
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3811
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3812
0
            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3813
0
            BACKCHAR(eptr);
3814
0
            }
3815
0
          }
3816
0
        else
3817
0
#endif
3818
        /* Not UTF mode */
3819
0
          {
3820
0
          for (i = min; i < max; i++)
3821
0
            {
3822
0
            if (eptr >= md->end_subject)
3823
0
              {
3824
0
              SCHECK_PARTIAL();
3825
0
              break;
3826
0
              }
3827
0
            if (fc == *eptr || foc == *eptr) break;
3828
0
            eptr++;
3829
0
            }
3830
0
          if (possessive) continue;
3831
0
          while (eptr >= pp)
3832
0
            {
3833
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3834
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3835
0
            eptr--;
3836
0
            }
3837
0
          }
3838
3839
0
        RRETURN(MATCH_NOMATCH);
3840
0
        }
3841
      /* Control never gets here */
3842
0
      }
3843
3844
    /* Caseful comparisons */
3845
3846
0
    else
3847
0
      {
3848
0
#ifdef SUPPORT_UTF
3849
0
      if (utf)
3850
0
        {
3851
0
        unsigned int d;
3852
0
        for (i = 1; i <= min; i++)
3853
0
          {
3854
0
          if (eptr >= md->end_subject)
3855
0
            {
3856
0
            SCHECK_PARTIAL();
3857
0
            RRETURN(MATCH_NOMATCH);
3858
0
            }
3859
0
          GETCHARINC(d, eptr);
3860
0
          if (fc == d) RRETURN(MATCH_NOMATCH);
3861
0
          }
3862
0
        }
3863
0
      else
3864
0
#endif
3865
      /* Not UTF mode */
3866
0
        {
3867
0
        for (i = 1; i <= min; i++)
3868
0
          {
3869
0
          if (eptr >= md->end_subject)
3870
0
            {
3871
0
            SCHECK_PARTIAL();
3872
0
            RRETURN(MATCH_NOMATCH);
3873
0
            }
3874
0
          if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3875
0
          }
3876
0
        }
3877
3878
0
      if (min == max) continue;
3879
3880
0
      if (minimize)
3881
0
        {
3882
0
#ifdef SUPPORT_UTF
3883
0
        if (utf)
3884
0
          {
3885
0
          unsigned int d;
3886
0
          for (fi = min;; fi++)
3887
0
            {
3888
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3889
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3890
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
3891
0
            if (eptr >= md->end_subject)
3892
0
              {
3893
0
              SCHECK_PARTIAL();
3894
0
              RRETURN(MATCH_NOMATCH);
3895
0
              }
3896
0
            GETCHARINC(d, eptr);
3897
0
            if (fc == d) RRETURN(MATCH_NOMATCH);
3898
0
            }
3899
0
          }
3900
0
        else
3901
0
#endif
3902
        /* Not UTF mode */
3903
0
          {
3904
0
          for (fi = min;; fi++)
3905
0
            {
3906
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
3907
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3908
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
3909
0
            if (eptr >= md->end_subject)
3910
0
              {
3911
0
              SCHECK_PARTIAL();
3912
0
              RRETURN(MATCH_NOMATCH);
3913
0
              }
3914
0
            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3915
0
            }
3916
0
          }
3917
        /* Control never gets here */
3918
0
        }
3919
3920
      /* Maximize case */
3921
3922
0
      else
3923
0
        {
3924
0
        pp = eptr;
3925
3926
0
#ifdef SUPPORT_UTF
3927
0
        if (utf)
3928
0
          {
3929
0
          unsigned int d;
3930
0
          for (i = min; i < max; i++)
3931
0
            {
3932
0
            int len = 1;
3933
0
            if (eptr >= md->end_subject)
3934
0
              {
3935
0
              SCHECK_PARTIAL();
3936
0
              break;
3937
0
              }
3938
0
            GETCHARLEN(d, eptr, len);
3939
0
            if (fc == d) break;
3940
0
            eptr += len;
3941
0
            }
3942
0
          if (possessive) continue;
3943
0
          for(;;)
3944
0
            {
3945
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
3946
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3947
0
            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3948
0
            BACKCHAR(eptr);
3949
0
            }
3950
0
          }
3951
0
        else
3952
0
#endif
3953
        /* Not UTF mode */
3954
0
          {
3955
0
          for (i = min; i < max; i++)
3956
0
            {
3957
0
            if (eptr >= md->end_subject)
3958
0
              {
3959
0
              SCHECK_PARTIAL();
3960
0
              break;
3961
0
              }
3962
0
            if (fc == *eptr) break;
3963
0
            eptr++;
3964
0
            }
3965
0
          if (possessive) continue;
3966
0
          while (eptr >= pp)
3967
0
            {
3968
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
3969
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3970
0
            eptr--;
3971
0
            }
3972
0
          }
3973
3974
0
        RRETURN(MATCH_NOMATCH);
3975
0
        }
3976
0
      }
3977
    /* Control never gets here */
3978
3979
    /* Match a single character type repeatedly; several different opcodes
3980
    share code. This is very similar to the code for single characters, but we
3981
    repeat it in the interests of efficiency. */
3982
3983
0
    case OP_TYPEEXACT:
3984
0
    min = max = GET2(ecode, 1);
3985
0
    minimize = TRUE;
3986
0
    ecode += 1 + IMM2_SIZE;
3987
0
    goto REPEATTYPE;
3988
3989
0
    case OP_TYPEUPTO:
3990
0
    case OP_TYPEMINUPTO:
3991
0
    min = 0;
3992
0
    max = GET2(ecode, 1);
3993
0
    minimize = *ecode == OP_TYPEMINUPTO;
3994
0
    ecode += 1 + IMM2_SIZE;
3995
0
    goto REPEATTYPE;
3996
3997
0
    case OP_TYPEPOSSTAR:
3998
0
    possessive = TRUE;
3999
0
    min = 0;
4000
0
    max = INT_MAX;
4001
0
    ecode++;
4002
0
    goto REPEATTYPE;
4003
4004
0
    case OP_TYPEPOSPLUS:
4005
0
    possessive = TRUE;
4006
0
    min = 1;
4007
0
    max = INT_MAX;
4008
0
    ecode++;
4009
0
    goto REPEATTYPE;
4010
4011
0
    case OP_TYPEPOSQUERY:
4012
0
    possessive = TRUE;
4013
0
    min = 0;
4014
0
    max = 1;
4015
0
    ecode++;
4016
0
    goto REPEATTYPE;
4017
4018
0
    case OP_TYPEPOSUPTO:
4019
0
    possessive = TRUE;
4020
0
    min = 0;
4021
0
    max = GET2(ecode, 1);
4022
0
    ecode += 1 + IMM2_SIZE;
4023
0
    goto REPEATTYPE;
4024
4025
0
    case OP_TYPESTAR:
4026
0
    case OP_TYPEMINSTAR:
4027
0
    case OP_TYPEPLUS:
4028
0
    case OP_TYPEMINPLUS:
4029
0
    case OP_TYPEQUERY:
4030
0
    case OP_TYPEMINQUERY:
4031
0
    c = *ecode++ - OP_TYPESTAR;
4032
0
    minimize = (c & 1) != 0;
4033
0
    min = rep_min[c];                 /* Pick up values from tables; */
4034
0
    max = rep_max[c];                 /* zero for max => infinity */
4035
0
    if (max == 0) max = INT_MAX;
4036
4037
    /* Common code for all repeated single character type matches. Note that
4038
    in UTF-8 mode, '.' matches a character of any length, but for the other
4039
    character types, the valid characters are all one-byte long. */
4040
4041
0
    REPEATTYPE:
4042
0
    ctype = *ecode++;      /* Code for the character type */
4043
4044
0
#ifdef SUPPORT_UCP
4045
0
    if (ctype == OP_PROP || ctype == OP_NOTPROP)
4046
0
      {
4047
0
      prop_fail_result = ctype == OP_NOTPROP;
4048
0
      prop_type = *ecode++;
4049
0
      prop_value = *ecode++;
4050
0
      }
4051
0
    else prop_type = -1;
4052
0
#endif
4053
4054
    /* First, ensure the minimum number of matches are present. Use inline
4055
    code for maximizing the speed, and do the type test once at the start
4056
    (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4057
    is tidier. Also separate the UCP code, which can be the same for both UTF-8
4058
    and single-bytes. */
4059
4060
0
    if (min > 0)
4061
0
      {
4062
0
#ifdef SUPPORT_UCP
4063
0
      if (prop_type >= 0)
4064
0
        {
4065
0
        switch(prop_type)
4066
0
          {
4067
0
          case PT_ANY:
4068
0
          if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4069
0
          for (i = 1; i <= min; i++)
4070
0
            {
4071
0
            if (eptr >= md->end_subject)
4072
0
              {
4073
0
              SCHECK_PARTIAL();
4074
0
              RRETURN(MATCH_NOMATCH);
4075
0
              }
4076
0
            GETCHARINCTEST(c, eptr);
4077
0
            }
4078
0
          break;
4079
4080
0
          case PT_LAMP:
4081
0
          for (i = 1; i <= min; i++)
4082
0
            {
4083
0
            int chartype;
4084
0
            if (eptr >= md->end_subject)
4085
0
              {
4086
0
              SCHECK_PARTIAL();
4087
0
              RRETURN(MATCH_NOMATCH);
4088
0
              }
4089
0
            GETCHARINCTEST(c, eptr);
4090
0
            chartype = UCD_CHARTYPE(c);
4091
0
            if ((chartype == ucp_Lu ||
4092
0
                 chartype == ucp_Ll ||
4093
0
                 chartype == ucp_Lt) == prop_fail_result)
4094
0
              RRETURN(MATCH_NOMATCH);
4095
0
            }
4096
0
          break;
4097
4098
0
          case PT_GC:
4099
0
          for (i = 1; i <= min; i++)
4100
0
            {
4101
0
            if (eptr >= md->end_subject)
4102
0
              {
4103
0
              SCHECK_PARTIAL();
4104
0
              RRETURN(MATCH_NOMATCH);
4105
0
              }
4106
0
            GETCHARINCTEST(c, eptr);
4107
0
            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4108
0
              RRETURN(MATCH_NOMATCH);
4109
0
            }
4110
0
          break;
4111
4112
0
          case PT_PC:
4113
0
          for (i = 1; i <= min; i++)
4114
0
            {
4115
0
            if (eptr >= md->end_subject)
4116
0
              {
4117
0
              SCHECK_PARTIAL();
4118
0
              RRETURN(MATCH_NOMATCH);
4119
0
              }
4120
0
            GETCHARINCTEST(c, eptr);
4121
0
            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4122
0
              RRETURN(MATCH_NOMATCH);
4123
0
            }
4124
0
          break;
4125
4126
0
          case PT_SC:
4127
0
          for (i = 1; i <= min; i++)
4128
0
            {
4129
0
            if (eptr >= md->end_subject)
4130
0
              {
4131
0
              SCHECK_PARTIAL();
4132
0
              RRETURN(MATCH_NOMATCH);
4133
0
              }
4134
0
            GETCHARINCTEST(c, eptr);
4135
0
            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4136
0
              RRETURN(MATCH_NOMATCH);
4137
0
            }
4138
0
          break;
4139
4140
0
          case PT_ALNUM:
4141
0
          for (i = 1; i <= min; i++)
4142
0
            {
4143
0
            int category;
4144
0
            if (eptr >= md->end_subject)
4145
0
              {
4146
0
              SCHECK_PARTIAL();
4147
0
              RRETURN(MATCH_NOMATCH);
4148
0
              }
4149
0
            GETCHARINCTEST(c, eptr);
4150
0
            category = UCD_CATEGORY(c);
4151
0
            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4152
0
              RRETURN(MATCH_NOMATCH);
4153
0
            }
4154
0
          break;
4155
4156
0
          case PT_SPACE:    /* Perl space */
4157
0
          for (i = 1; i <= min; i++)
4158
0
            {
4159
0
            if (eptr >= md->end_subject)
4160
0
              {
4161
0
              SCHECK_PARTIAL();
4162
0
              RRETURN(MATCH_NOMATCH);
4163
0
              }
4164
0
            GETCHARINCTEST(c, eptr);
4165
0
            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4166
0
                 c == CHAR_FF || c == CHAR_CR)
4167
0
                   == prop_fail_result)
4168
0
              RRETURN(MATCH_NOMATCH);
4169
0
            }
4170
0
          break;
4171
4172
0
          case PT_PXSPACE:  /* POSIX space */
4173
0
          for (i = 1; i <= min; i++)
4174
0
            {
4175
0
            if (eptr >= md->end_subject)
4176
0
              {
4177
0
              SCHECK_PARTIAL();
4178
0
              RRETURN(MATCH_NOMATCH);
4179
0
              }
4180
0
            GETCHARINCTEST(c, eptr);
4181
0
            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4182
0
                 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4183
0
                   == prop_fail_result)
4184
0
              RRETURN(MATCH_NOMATCH);
4185
0
            }
4186
0
          break;
4187
4188
0
          case PT_WORD:
4189
0
          for (i = 1; i <= min; i++)
4190
0
            {
4191
0
            int category;
4192
0
            if (eptr >= md->end_subject)
4193
0
              {
4194
0
              SCHECK_PARTIAL();
4195
0
              RRETURN(MATCH_NOMATCH);
4196
0
              }
4197
0
            GETCHARINCTEST(c, eptr);
4198
0
            category = UCD_CATEGORY(c);
4199
0
            if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4200
0
                   == prop_fail_result)
4201
0
              RRETURN(MATCH_NOMATCH);
4202
0
            }
4203
0
          break;
4204
4205
          /* This should not occur */
4206
4207
0
          default:
4208
0
          RRETURN(PCRE_ERROR_INTERNAL);
4209
0
          }
4210
0
        }
4211
4212
      /* Match extended Unicode sequences. We will get here only if the
4213
      support is in the binary; otherwise a compile-time error occurs. */
4214
4215
0
      else if (ctype == OP_EXTUNI)
4216
0
        {
4217
0
        for (i = 1; i <= min; i++)
4218
0
          {
4219
0
          if (eptr >= md->end_subject)
4220
0
            {
4221
0
            SCHECK_PARTIAL();
4222
0
            RRETURN(MATCH_NOMATCH);
4223
0
            }
4224
0
          GETCHARINCTEST(c, eptr);
4225
0
          if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
4226
0
          while (eptr < md->end_subject)
4227
0
            {
4228
0
            int len = 1;
4229
0
            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4230
0
            if (UCD_CATEGORY(c) != ucp_M) break;
4231
0
            eptr += len;
4232
0
            }
4233
0
          CHECK_PARTIAL();
4234
0
          }
4235
0
        }
4236
4237
0
      else
4238
0
#endif     /* SUPPORT_UCP */
4239
4240
/* Handle all other cases when the coding is UTF-8 */
4241
4242
0
#ifdef SUPPORT_UTF
4243
0
      if (utf) switch(ctype)
4244
0
        {
4245
0
        case OP_ANY:
4246
0
        for (i = 1; i <= min; i++)
4247
0
          {
4248
0
          if (eptr >= md->end_subject)
4249
0
            {
4250
0
            SCHECK_PARTIAL();
4251
0
            RRETURN(MATCH_NOMATCH);
4252
0
            }
4253
0
          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4254
0
          if (md->partial != 0 &&
4255
0
              eptr + 1 >= md->end_subject &&
4256
0
              NLBLOCK->nltype == NLTYPE_FIXED &&
4257
0
              NLBLOCK->nllen == 2 &&
4258
0
              *eptr == NLBLOCK->nl[0])
4259
0
            {
4260
0
            md->hitend = TRUE;
4261
0
            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4262
0
            }
4263
0
          eptr++;
4264
0
          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4265
0
          }
4266
0
        break;
4267
4268
0
        case OP_ALLANY:
4269
0
        for (i = 1; i <= min; i++)
4270
0
          {
4271
0
          if (eptr >= md->end_subject)
4272
0
            {
4273
0
            SCHECK_PARTIAL();
4274
0
            RRETURN(MATCH_NOMATCH);
4275
0
            }
4276
0
          eptr++;
4277
0
          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4278
0
          }
4279
0
        break;
4280
4281
0
        case OP_ANYBYTE:
4282
0
        if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4283
0
        eptr += min;
4284
0
        break;
4285
4286
0
        case OP_ANYNL:
4287
0
        for (i = 1; i <= min; i++)
4288
0
          {
4289
0
          if (eptr >= md->end_subject)
4290
0
            {
4291
0
            SCHECK_PARTIAL();
4292
0
            RRETURN(MATCH_NOMATCH);
4293
0
            }
4294
0
          GETCHARINC(c, eptr);
4295
0
          switch(c)
4296
0
            {
4297
0
            default: RRETURN(MATCH_NOMATCH);
4298
4299
0
            case 0x000d:
4300
0
            if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4301
0
            break;
4302
4303
0
            case 0x000a:
4304
0
            break;
4305
4306
0
            case 0x000b:
4307
0
            case 0x000c:
4308
0
            case 0x0085:
4309
0
            case 0x2028:
4310
0
            case 0x2029:
4311
0
            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4312
0
            break;
4313
0
            }
4314
0
          }
4315
0
        break;
4316
4317
0
        case OP_NOT_HSPACE:
4318
0
        for (i = 1; i <= min; i++)
4319
0
          {
4320
0
          if (eptr >= md->end_subject)
4321
0
            {
4322
0
            SCHECK_PARTIAL();
4323
0
            RRETURN(MATCH_NOMATCH);
4324
0
            }
4325
0
          GETCHARINC(c, eptr);
4326
0
          switch(c)
4327
0
            {
4328
0
            default: break;
4329
0
            case 0x09:      /* HT */
4330
0
            case 0x20:      /* SPACE */
4331
0
            case 0xa0:      /* NBSP */
4332
0
            case 0x1680:    /* OGHAM SPACE MARK */
4333
0
            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4334
0
            case 0x2000:    /* EN QUAD */
4335
0
            case 0x2001:    /* EM QUAD */
4336
0
            case 0x2002:    /* EN SPACE */
4337
0
            case 0x2003:    /* EM SPACE */
4338
0
            case 0x2004:    /* THREE-PER-EM SPACE */
4339
0
            case 0x2005:    /* FOUR-PER-EM SPACE */
4340
0
            case 0x2006:    /* SIX-PER-EM SPACE */
4341
0
            case 0x2007:    /* FIGURE SPACE */
4342
0
            case 0x2008:    /* PUNCTUATION SPACE */
4343
0
            case 0x2009:    /* THIN SPACE */
4344
0
            case 0x200A:    /* HAIR SPACE */
4345
0
            case 0x202f:    /* NARROW NO-BREAK SPACE */
4346
0
            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4347
0
            case 0x3000:    /* IDEOGRAPHIC SPACE */
4348
0
            RRETURN(MATCH_NOMATCH);
4349
0
            }
4350
0
          }
4351
0
        break;
4352
4353
0
        case OP_HSPACE:
4354
0
        for (i = 1; i <= min; i++)
4355
0
          {
4356
0
          if (eptr >= md->end_subject)
4357
0
            {
4358
0
            SCHECK_PARTIAL();
4359
0
            RRETURN(MATCH_NOMATCH);
4360
0
            }
4361
0
          GETCHARINC(c, eptr);
4362
0
          switch(c)
4363
0
            {
4364
0
            default: RRETURN(MATCH_NOMATCH);
4365
0
            case 0x09:      /* HT */
4366
0
            case 0x20:      /* SPACE */
4367
0
            case 0xa0:      /* NBSP */
4368
0
            case 0x1680:    /* OGHAM SPACE MARK */
4369
0
            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4370
0
            case 0x2000:    /* EN QUAD */
4371
0
            case 0x2001:    /* EM QUAD */
4372
0
            case 0x2002:    /* EN SPACE */
4373
0
            case 0x2003:    /* EM SPACE */
4374
0
            case 0x2004:    /* THREE-PER-EM SPACE */
4375
0
            case 0x2005:    /* FOUR-PER-EM SPACE */
4376
0
            case 0x2006:    /* SIX-PER-EM SPACE */
4377
0
            case 0x2007:    /* FIGURE SPACE */
4378
0
            case 0x2008:    /* PUNCTUATION SPACE */
4379
0
            case 0x2009:    /* THIN SPACE */
4380
0
            case 0x200A:    /* HAIR SPACE */
4381
0
            case 0x202f:    /* NARROW NO-BREAK SPACE */
4382
0
            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4383
0
            case 0x3000:    /* IDEOGRAPHIC SPACE */
4384
0
            break;
4385
0
            }
4386
0
          }
4387
0
        break;
4388
4389
0
        case OP_NOT_VSPACE:
4390
0
        for (i = 1; i <= min; i++)
4391
0
          {
4392
0
          if (eptr >= md->end_subject)
4393
0
            {
4394
0
            SCHECK_PARTIAL();
4395
0
            RRETURN(MATCH_NOMATCH);
4396
0
            }
4397
0
          GETCHARINC(c, eptr);
4398
0
          switch(c)
4399
0
            {
4400
0
            default: break;
4401
0
            case 0x0a:      /* LF */
4402
0
            case 0x0b:      /* VT */
4403
0
            case 0x0c:      /* FF */
4404
0
            case 0x0d:      /* CR */
4405
0
            case 0x85:      /* NEL */
4406
0
            case 0x2028:    /* LINE SEPARATOR */
4407
0
            case 0x2029:    /* PARAGRAPH SEPARATOR */
4408
0
            RRETURN(MATCH_NOMATCH);
4409
0
            }
4410
0
          }
4411
0
        break;
4412
4413
0
        case OP_VSPACE:
4414
0
        for (i = 1; i <= min; i++)
4415
0
          {
4416
0
          if (eptr >= md->end_subject)
4417
0
            {
4418
0
            SCHECK_PARTIAL();
4419
0
            RRETURN(MATCH_NOMATCH);
4420
0
            }
4421
0
          GETCHARINC(c, eptr);
4422
0
          switch(c)
4423
0
            {
4424
0
            default: RRETURN(MATCH_NOMATCH);
4425
0
            case 0x0a:      /* LF */
4426
0
            case 0x0b:      /* VT */
4427
0
            case 0x0c:      /* FF */
4428
0
            case 0x0d:      /* CR */
4429
0
            case 0x85:      /* NEL */
4430
0
            case 0x2028:    /* LINE SEPARATOR */
4431
0
            case 0x2029:    /* PARAGRAPH SEPARATOR */
4432
0
            break;
4433
0
            }
4434
0
          }
4435
0
        break;
4436
4437
0
        case OP_NOT_DIGIT:
4438
0
        for (i = 1; i <= min; i++)
4439
0
          {
4440
0
          if (eptr >= md->end_subject)
4441
0
            {
4442
0
            SCHECK_PARTIAL();
4443
0
            RRETURN(MATCH_NOMATCH);
4444
0
            }
4445
0
          GETCHARINC(c, eptr);
4446
0
          if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4447
0
            RRETURN(MATCH_NOMATCH);
4448
0
          }
4449
0
        break;
4450
4451
0
        case OP_DIGIT:
4452
0
        for (i = 1; i <= min; i++)
4453
0
          {
4454
0
          if (eptr >= md->end_subject)
4455
0
            {
4456
0
            SCHECK_PARTIAL();
4457
0
            RRETURN(MATCH_NOMATCH);
4458
0
            }
4459
0
          if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
4460
0
            RRETURN(MATCH_NOMATCH);
4461
0
          eptr++;
4462
          /* No need to skip more bytes - we know it's a 1-byte character */
4463
0
          }
4464
0
        break;
4465
4466
0
        case OP_NOT_WHITESPACE:
4467
0
        for (i = 1; i <= min; i++)
4468
0
          {
4469
0
          if (eptr >= md->end_subject)
4470
0
            {
4471
0
            SCHECK_PARTIAL();
4472
0
            RRETURN(MATCH_NOMATCH);
4473
0
            }
4474
0
          if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
4475
0
            RRETURN(MATCH_NOMATCH);
4476
0
          eptr++;
4477
0
          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4478
0
          }
4479
0
        break;
4480
4481
0
        case OP_WHITESPACE:
4482
0
        for (i = 1; i <= min; i++)
4483
0
          {
4484
0
          if (eptr >= md->end_subject)
4485
0
            {
4486
0
            SCHECK_PARTIAL();
4487
0
            RRETURN(MATCH_NOMATCH);
4488
0
            }
4489
0
          if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
4490
0
            RRETURN(MATCH_NOMATCH);
4491
0
          eptr++;
4492
          /* No need to skip more bytes - we know it's a 1-byte character */
4493
0
          }
4494
0
        break;
4495
4496
0
        case OP_NOT_WORDCHAR:
4497
0
        for (i = 1; i <= min; i++)
4498
0
          {
4499
0
          if (eptr >= md->end_subject)
4500
0
            {
4501
0
            SCHECK_PARTIAL();
4502
0
            RRETURN(MATCH_NOMATCH);
4503
0
            }
4504
0
          if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
4505
0
            RRETURN(MATCH_NOMATCH);
4506
0
          eptr++;
4507
0
          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4508
0
          }
4509
0
        break;
4510
4511
0
        case OP_WORDCHAR:
4512
0
        for (i = 1; i <= min; i++)
4513
0
          {
4514
0
          if (eptr >= md->end_subject)
4515
0
            {
4516
0
            SCHECK_PARTIAL();
4517
0
            RRETURN(MATCH_NOMATCH);
4518
0
            }
4519
0
          if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
4520
0
            RRETURN(MATCH_NOMATCH);
4521
0
          eptr++;
4522
          /* No need to skip more bytes - we know it's a 1-byte character */
4523
0
          }
4524
0
        break;
4525
4526
0
        default:
4527
0
        RRETURN(PCRE_ERROR_INTERNAL);
4528
0
        }  /* End switch(ctype) */
4529
4530
0
      else
4531
0
#endif     /* SUPPORT_UTF */
4532
4533
      /* Code for the non-UTF-8 case for minimum matching of operators other
4534
      than OP_PROP and OP_NOTPROP. */
4535
4536
0
      switch(ctype)
4537
0
        {
4538
0
        case OP_ANY:
4539
0
        for (i = 1; i <= min; i++)
4540
0
          {
4541
0
          if (eptr >= md->end_subject)
4542
0
            {
4543
0
            SCHECK_PARTIAL();
4544
0
            RRETURN(MATCH_NOMATCH);
4545
0
            }
4546
0
          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4547
0
          if (md->partial != 0 &&
4548
0
              eptr + 1 >= md->end_subject &&
4549
0
              NLBLOCK->nltype == NLTYPE_FIXED &&
4550
0
              NLBLOCK->nllen == 2 &&
4551
0
              *eptr == NLBLOCK->nl[0])
4552
0
            {
4553
0
            md->hitend = TRUE;
4554
0
            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4555
0
            }
4556
0
          eptr++;
4557
0
          }
4558
0
        break;
4559
4560
0
        case OP_ALLANY:
4561
0
        if (eptr > md->end_subject - min)
4562
0
          {
4563
0
          SCHECK_PARTIAL();
4564
0
          RRETURN(MATCH_NOMATCH);
4565
0
          }
4566
0
        eptr += min;
4567
0
        break;
4568
4569
0
        case OP_ANYBYTE:
4570
0
        if (eptr > md->end_subject - min)
4571
0
          {
4572
0
          SCHECK_PARTIAL();
4573
0
          RRETURN(MATCH_NOMATCH);
4574
0
          }
4575
0
        eptr += min;
4576
0
        break;
4577
4578
0
        case OP_ANYNL:
4579
0
        for (i = 1; i <= min; i++)
4580
0
          {
4581
0
          if (eptr >= md->end_subject)
4582
0
            {
4583
0
            SCHECK_PARTIAL();
4584
0
            RRETURN(MATCH_NOMATCH);
4585
0
            }
4586
0
          switch(*eptr++)
4587
0
            {
4588
0
            default: RRETURN(MATCH_NOMATCH);
4589
4590
0
            case 0x000d:
4591
0
            if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4592
0
            break;
4593
4594
0
            case 0x000a:
4595
0
            break;
4596
4597
0
            case 0x000b:
4598
0
            case 0x000c:
4599
0
            case 0x0085:
4600
#ifdef COMPILE_PCRE16
4601
            case 0x2028:
4602
            case 0x2029:
4603
#endif
4604
0
            if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4605
0
            break;
4606
0
            }
4607
0
          }
4608
0
        break;
4609
4610
0
        case OP_NOT_HSPACE:
4611
0
        for (i = 1; i <= min; i++)
4612
0
          {
4613
0
          if (eptr >= md->end_subject)
4614
0
            {
4615
0
            SCHECK_PARTIAL();
4616
0
            RRETURN(MATCH_NOMATCH);
4617
0
            }
4618
0
          switch(*eptr++)
4619
0
            {
4620
0
            default: break;
4621
0
            case 0x09:      /* HT */
4622
0
            case 0x20:      /* SPACE */
4623
0
            case 0xa0:      /* NBSP */
4624
#ifdef COMPILE_PCRE16
4625
            case 0x1680:    /* OGHAM SPACE MARK */
4626
            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4627
            case 0x2000:    /* EN QUAD */
4628
            case 0x2001:    /* EM QUAD */
4629
            case 0x2002:    /* EN SPACE */
4630
            case 0x2003:    /* EM SPACE */
4631
            case 0x2004:    /* THREE-PER-EM SPACE */
4632
            case 0x2005:    /* FOUR-PER-EM SPACE */
4633
            case 0x2006:    /* SIX-PER-EM SPACE */
4634
            case 0x2007:    /* FIGURE SPACE */
4635
            case 0x2008:    /* PUNCTUATION SPACE */
4636
            case 0x2009:    /* THIN SPACE */
4637
            case 0x200A:    /* HAIR SPACE */
4638
            case 0x202f:    /* NARROW NO-BREAK SPACE */
4639
            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4640
            case 0x3000:    /* IDEOGRAPHIC SPACE */
4641
#endif
4642
0
            RRETURN(MATCH_NOMATCH);
4643
0
            }
4644
0
          }
4645
0
        break;
4646
4647
0
        case OP_HSPACE:
4648
0
        for (i = 1; i <= min; i++)
4649
0
          {
4650
0
          if (eptr >= md->end_subject)
4651
0
            {
4652
0
            SCHECK_PARTIAL();
4653
0
            RRETURN(MATCH_NOMATCH);
4654
0
            }
4655
0
          switch(*eptr++)
4656
0
            {
4657
0
            default: RRETURN(MATCH_NOMATCH);
4658
0
            case 0x09:      /* HT */
4659
0
            case 0x20:      /* SPACE */
4660
0
            case 0xa0:      /* NBSP */
4661
#ifdef COMPILE_PCRE16
4662
            case 0x1680:    /* OGHAM SPACE MARK */
4663
            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4664
            case 0x2000:    /* EN QUAD */
4665
            case 0x2001:    /* EM QUAD */
4666
            case 0x2002:    /* EN SPACE */
4667
            case 0x2003:    /* EM SPACE */
4668
            case 0x2004:    /* THREE-PER-EM SPACE */
4669
            case 0x2005:    /* FOUR-PER-EM SPACE */
4670
            case 0x2006:    /* SIX-PER-EM SPACE */
4671
            case 0x2007:    /* FIGURE SPACE */
4672
            case 0x2008:    /* PUNCTUATION SPACE */
4673
            case 0x2009:    /* THIN SPACE */
4674
            case 0x200A:    /* HAIR SPACE */
4675
            case 0x202f:    /* NARROW NO-BREAK SPACE */
4676
            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4677
            case 0x3000:    /* IDEOGRAPHIC SPACE */
4678
#endif
4679
0
            break;
4680
0
            }
4681
0
          }
4682
0
        break;
4683
4684
0
        case OP_NOT_VSPACE:
4685
0
        for (i = 1; i <= min; i++)
4686
0
          {
4687
0
          if (eptr >= md->end_subject)
4688
0
            {
4689
0
            SCHECK_PARTIAL();
4690
0
            RRETURN(MATCH_NOMATCH);
4691
0
            }
4692
0
          switch(*eptr++)
4693
0
            {
4694
0
            default: break;
4695
0
            case 0x0a:      /* LF */
4696
0
            case 0x0b:      /* VT */
4697
0
            case 0x0c:      /* FF */
4698
0
            case 0x0d:      /* CR */
4699
0
            case 0x85:      /* NEL */
4700
#ifdef COMPILE_PCRE16
4701
            case 0x2028:    /* LINE SEPARATOR */
4702
            case 0x2029:    /* PARAGRAPH SEPARATOR */
4703
#endif
4704
0
            RRETURN(MATCH_NOMATCH);
4705
0
            }
4706
0
          }
4707
0
        break;
4708
4709
0
        case OP_VSPACE:
4710
0
        for (i = 1; i <= min; i++)
4711
0
          {
4712
0
          if (eptr >= md->end_subject)
4713
0
            {
4714
0
            SCHECK_PARTIAL();
4715
0
            RRETURN(MATCH_NOMATCH);
4716
0
            }
4717
0
          switch(*eptr++)
4718
0
            {
4719
0
            default: RRETURN(MATCH_NOMATCH);
4720
0
            case 0x0a:      /* LF */
4721
0
            case 0x0b:      /* VT */
4722
0
            case 0x0c:      /* FF */
4723
0
            case 0x0d:      /* CR */
4724
0
            case 0x85:      /* NEL */
4725
#ifdef COMPILE_PCRE16
4726
            case 0x2028:    /* LINE SEPARATOR */
4727
            case 0x2029:    /* PARAGRAPH SEPARATOR */
4728
#endif
4729
0
            break;
4730
0
            }
4731
0
          }
4732
0
        break;
4733
4734
0
        case OP_NOT_DIGIT:
4735
0
        for (i = 1; i <= min; i++)
4736
0
          {
4737
0
          if (eptr >= md->end_subject)
4738
0
            {
4739
0
            SCHECK_PARTIAL();
4740
0
            RRETURN(MATCH_NOMATCH);
4741
0
            }
4742
0
          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4743
0
            RRETURN(MATCH_NOMATCH);
4744
0
          eptr++;
4745
0
          }
4746
0
        break;
4747
4748
0
        case OP_DIGIT:
4749
0
        for (i = 1; i <= min; i++)
4750
0
          {
4751
0
          if (eptr >= md->end_subject)
4752
0
            {
4753
0
            SCHECK_PARTIAL();
4754
0
            RRETURN(MATCH_NOMATCH);
4755
0
            }
4756
0
          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4757
0
            RRETURN(MATCH_NOMATCH);
4758
0
          eptr++;
4759
0
          }
4760
0
        break;
4761
4762
0
        case OP_NOT_WHITESPACE:
4763
0
        for (i = 1; i <= min; i++)
4764
0
          {
4765
0
          if (eptr >= md->end_subject)
4766
0
            {
4767
0
            SCHECK_PARTIAL();
4768
0
            RRETURN(MATCH_NOMATCH);
4769
0
            }
4770
0
          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4771
0
            RRETURN(MATCH_NOMATCH);
4772
0
          eptr++;
4773
0
          }
4774
0
        break;
4775
4776
0
        case OP_WHITESPACE:
4777
0
        for (i = 1; i <= min; i++)
4778
0
          {
4779
0
          if (eptr >= md->end_subject)
4780
0
            {
4781
0
            SCHECK_PARTIAL();
4782
0
            RRETURN(MATCH_NOMATCH);
4783
0
            }
4784
0
          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4785
0
            RRETURN(MATCH_NOMATCH);
4786
0
          eptr++;
4787
0
          }
4788
0
        break;
4789
4790
0
        case OP_NOT_WORDCHAR:
4791
0
        for (i = 1; i <= min; i++)
4792
0
          {
4793
0
          if (eptr >= md->end_subject)
4794
0
            {
4795
0
            SCHECK_PARTIAL();
4796
0
            RRETURN(MATCH_NOMATCH);
4797
0
            }
4798
0
          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4799
0
            RRETURN(MATCH_NOMATCH);
4800
0
          eptr++;
4801
0
          }
4802
0
        break;
4803
4804
0
        case OP_WORDCHAR:
4805
0
        for (i = 1; i <= min; i++)
4806
0
          {
4807
0
          if (eptr >= md->end_subject)
4808
0
            {
4809
0
            SCHECK_PARTIAL();
4810
0
            RRETURN(MATCH_NOMATCH);
4811
0
            }
4812
0
          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4813
0
            RRETURN(MATCH_NOMATCH);
4814
0
          eptr++;
4815
0
          }
4816
0
        break;
4817
4818
0
        default:
4819
0
        RRETURN(PCRE_ERROR_INTERNAL);
4820
0
        }
4821
0
      }
4822
4823
    /* If min = max, continue at the same level without recursing */
4824
4825
0
    if (min == max) continue;
4826
4827
    /* If minimizing, we have to test the rest of the pattern before each
4828
    subsequent match. Again, separate the UTF-8 case for speed, and also
4829
    separate the UCP cases. */
4830
4831
0
    if (minimize)
4832
0
      {
4833
0
#ifdef SUPPORT_UCP
4834
0
      if (prop_type >= 0)
4835
0
        {
4836
0
        switch(prop_type)
4837
0
          {
4838
0
          case PT_ANY:
4839
0
          for (fi = min;; fi++)
4840
0
            {
4841
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4842
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4843
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4844
0
            if (eptr >= md->end_subject)
4845
0
              {
4846
0
              SCHECK_PARTIAL();
4847
0
              RRETURN(MATCH_NOMATCH);
4848
0
              }
4849
0
            GETCHARINCTEST(c, eptr);
4850
0
            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4851
0
            }
4852
          /* Control never gets here */
4853
4854
0
          case PT_LAMP:
4855
0
          for (fi = min;; fi++)
4856
0
            {
4857
0
            int chartype;
4858
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4859
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4860
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4861
0
            if (eptr >= md->end_subject)
4862
0
              {
4863
0
              SCHECK_PARTIAL();
4864
0
              RRETURN(MATCH_NOMATCH);
4865
0
              }
4866
0
            GETCHARINCTEST(c, eptr);
4867
0
            chartype = UCD_CHARTYPE(c);
4868
0
            if ((chartype == ucp_Lu ||
4869
0
                 chartype == ucp_Ll ||
4870
0
                 chartype == ucp_Lt) == prop_fail_result)
4871
0
              RRETURN(MATCH_NOMATCH);
4872
0
            }
4873
          /* Control never gets here */
4874
4875
0
          case PT_GC:
4876
0
          for (fi = min;; fi++)
4877
0
            {
4878
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4879
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4880
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4881
0
            if (eptr >= md->end_subject)
4882
0
              {
4883
0
              SCHECK_PARTIAL();
4884
0
              RRETURN(MATCH_NOMATCH);
4885
0
              }
4886
0
            GETCHARINCTEST(c, eptr);
4887
0
            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4888
0
              RRETURN(MATCH_NOMATCH);
4889
0
            }
4890
          /* Control never gets here */
4891
4892
0
          case PT_PC:
4893
0
          for (fi = min;; fi++)
4894
0
            {
4895
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4896
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4897
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4898
0
            if (eptr >= md->end_subject)
4899
0
              {
4900
0
              SCHECK_PARTIAL();
4901
0
              RRETURN(MATCH_NOMATCH);
4902
0
              }
4903
0
            GETCHARINCTEST(c, eptr);
4904
0
            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4905
0
              RRETURN(MATCH_NOMATCH);
4906
0
            }
4907
          /* Control never gets here */
4908
4909
0
          case PT_SC:
4910
0
          for (fi = min;; fi++)
4911
0
            {
4912
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4913
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4914
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4915
0
            if (eptr >= md->end_subject)
4916
0
              {
4917
0
              SCHECK_PARTIAL();
4918
0
              RRETURN(MATCH_NOMATCH);
4919
0
              }
4920
0
            GETCHARINCTEST(c, eptr);
4921
0
            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4922
0
              RRETURN(MATCH_NOMATCH);
4923
0
            }
4924
          /* Control never gets here */
4925
4926
0
          case PT_ALNUM:
4927
0
          for (fi = min;; fi++)
4928
0
            {
4929
0
            int category;
4930
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4931
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4932
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4933
0
            if (eptr >= md->end_subject)
4934
0
              {
4935
0
              SCHECK_PARTIAL();
4936
0
              RRETURN(MATCH_NOMATCH);
4937
0
              }
4938
0
            GETCHARINCTEST(c, eptr);
4939
0
            category = UCD_CATEGORY(c);
4940
0
            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4941
0
              RRETURN(MATCH_NOMATCH);
4942
0
            }
4943
          /* Control never gets here */
4944
4945
0
          case PT_SPACE:    /* Perl space */
4946
0
          for (fi = min;; fi++)
4947
0
            {
4948
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
4949
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4950
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4951
0
            if (eptr >= md->end_subject)
4952
0
              {
4953
0
              SCHECK_PARTIAL();
4954
0
              RRETURN(MATCH_NOMATCH);
4955
0
              }
4956
0
            GETCHARINCTEST(c, eptr);
4957
0
            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4958
0
                 c == CHAR_FF || c == CHAR_CR)
4959
0
                   == prop_fail_result)
4960
0
              RRETURN(MATCH_NOMATCH);
4961
0
            }
4962
          /* Control never gets here */
4963
4964
0
          case PT_PXSPACE:  /* POSIX space */
4965
0
          for (fi = min;; fi++)
4966
0
            {
4967
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
4968
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4969
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4970
0
            if (eptr >= md->end_subject)
4971
0
              {
4972
0
              SCHECK_PARTIAL();
4973
0
              RRETURN(MATCH_NOMATCH);
4974
0
              }
4975
0
            GETCHARINCTEST(c, eptr);
4976
0
            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4977
0
                 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4978
0
                   == prop_fail_result)
4979
0
              RRETURN(MATCH_NOMATCH);
4980
0
            }
4981
          /* Control never gets here */
4982
4983
0
          case PT_WORD:
4984
0
          for (fi = min;; fi++)
4985
0
            {
4986
0
            int category;
4987
0
            RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4988
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4989
0
            if (fi >= max) RRETURN(MATCH_NOMATCH);
4990
0
            if (eptr >= md->end_subject)
4991
0
              {
4992
0
              SCHECK_PARTIAL();
4993
0
              RRETURN(MATCH_NOMATCH);
4994
0
              }
4995
0
            GETCHARINCTEST(c, eptr);
4996
0
            category = UCD_CATEGORY(c);
4997
0
            if ((category == ucp_L ||
4998
0
                 category == ucp_N ||
4999
0
                 c == CHAR_UNDERSCORE)
5000
0
                   == prop_fail_result)
5001
0
              RRETURN(MATCH_NOMATCH);
5002
0
            }
5003
          /* Control never gets here */
5004
5005
          /* This should never occur */
5006
5007
0
          default:
5008
0
          RRETURN(PCRE_ERROR_INTERNAL);
5009
0
          }
5010
0
        }
5011
5012
      /* Match extended Unicode sequences. We will get here only if the
5013
      support is in the binary; otherwise a compile-time error occurs. */
5014
5015
0
      else if (ctype == OP_EXTUNI)
5016
0
        {
5017
0
        for (fi = min;; fi++)
5018
0
          {
5019
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5020
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5021
0
          if (fi >= max) RRETURN(MATCH_NOMATCH);
5022
0
          if (eptr >= md->end_subject)
5023
0
            {
5024
0
            SCHECK_PARTIAL();
5025
0
            RRETURN(MATCH_NOMATCH);
5026
0
            }
5027
0
          GETCHARINCTEST(c, eptr);
5028
0
          if (UCD_CATEGORY(c) == ucp_M) RRETURN(MATCH_NOMATCH);
5029
0
          while (eptr < md->end_subject)
5030
0
            {
5031
0
            int len = 1;
5032
0
            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5033
0
            if (UCD_CATEGORY(c) != ucp_M) break;
5034
0
            eptr += len;
5035
0
            }
5036
0
          CHECK_PARTIAL();
5037
0
          }
5038
0
        }
5039
0
      else
5040
0
#endif     /* SUPPORT_UCP */
5041
5042
0
#ifdef SUPPORT_UTF
5043
0
      if (utf)
5044
0
        {
5045
0
        for (fi = min;; fi++)
5046
0
          {
5047
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5048
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5049
0
          if (fi >= max) RRETURN(MATCH_NOMATCH);
5050
0
          if (eptr >= md->end_subject)
5051
0
            {
5052
0
            SCHECK_PARTIAL();
5053
0
            RRETURN(MATCH_NOMATCH);
5054
0
            }
5055
0
          if (ctype == OP_ANY && IS_NEWLINE(eptr))
5056
0
            RRETURN(MATCH_NOMATCH);
5057
0
          GETCHARINC(c, eptr);
5058
0
          switch(ctype)
5059
0
            {
5060
0
            case OP_ANY:               /* This is the non-NL case */
5061
0
            if (md->partial != 0 &&    /* Take care with CRLF partial */
5062
0
                eptr >= md->end_subject &&
5063
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
5064
0
                NLBLOCK->nllen == 2 &&
5065
0
                c == NLBLOCK->nl[0])
5066
0
              {
5067
0
              md->hitend = TRUE;
5068
0
              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5069
0
              }
5070
0
            break;
5071
5072
0
            case OP_ALLANY:
5073
0
            case OP_ANYBYTE:
5074
0
            break;
5075
5076
0
            case OP_ANYNL:
5077
0
            switch(c)
5078
0
              {
5079
0
              default: RRETURN(MATCH_NOMATCH);
5080
0
              case 0x000d:
5081
0
              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
5082
0
              break;
5083
0
              case 0x000a:
5084
0
              break;
5085
5086
0
              case 0x000b:
5087
0
              case 0x000c:
5088
0
              case 0x0085:
5089
0
              case 0x2028:
5090
0
              case 0x2029:
5091
0
              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5092
0
              break;
5093
0
              }
5094
0
            break;
5095
5096
0
            case OP_NOT_HSPACE:
5097
0
            switch(c)
5098
0
              {
5099
0
              default: break;
5100
0
              case 0x09:      /* HT */
5101
0
              case 0x20:      /* SPACE */
5102
0
              case 0xa0:      /* NBSP */
5103
0
              case 0x1680:    /* OGHAM SPACE MARK */
5104
0
              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5105
0
              case 0x2000:    /* EN QUAD */
5106
0
              case 0x2001:    /* EM QUAD */
5107
0
              case 0x2002:    /* EN SPACE */
5108
0
              case 0x2003:    /* EM SPACE */
5109
0
              case 0x2004:    /* THREE-PER-EM SPACE */
5110
0
              case 0x2005:    /* FOUR-PER-EM SPACE */
5111
0
              case 0x2006:    /* SIX-PER-EM SPACE */
5112
0
              case 0x2007:    /* FIGURE SPACE */
5113
0
              case 0x2008:    /* PUNCTUATION SPACE */
5114
0
              case 0x2009:    /* THIN SPACE */
5115
0
              case 0x200A:    /* HAIR SPACE */
5116
0
              case 0x202f:    /* NARROW NO-BREAK SPACE */
5117
0
              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5118
0
              case 0x3000:    /* IDEOGRAPHIC SPACE */
5119
0
              RRETURN(MATCH_NOMATCH);
5120
0
              }
5121
0
            break;
5122
5123
0
            case OP_HSPACE:
5124
0
            switch(c)
5125
0
              {
5126
0
              default: RRETURN(MATCH_NOMATCH);
5127
0
              case 0x09:      /* HT */
5128
0
              case 0x20:      /* SPACE */
5129
0
              case 0xa0:      /* NBSP */
5130
0
              case 0x1680:    /* OGHAM SPACE MARK */
5131
0
              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5132
0
              case 0x2000:    /* EN QUAD */
5133
0
              case 0x2001:    /* EM QUAD */
5134
0
              case 0x2002:    /* EN SPACE */
5135
0
              case 0x2003:    /* EM SPACE */
5136
0
              case 0x2004:    /* THREE-PER-EM SPACE */
5137
0
              case 0x2005:    /* FOUR-PER-EM SPACE */
5138
0
              case 0x2006:    /* SIX-PER-EM SPACE */
5139
0
              case 0x2007:    /* FIGURE SPACE */
5140
0
              case 0x2008:    /* PUNCTUATION SPACE */
5141
0
              case 0x2009:    /* THIN SPACE */
5142
0
              case 0x200A:    /* HAIR SPACE */
5143
0
              case 0x202f:    /* NARROW NO-BREAK SPACE */
5144
0
              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5145
0
              case 0x3000:    /* IDEOGRAPHIC SPACE */
5146
0
              break;
5147
0
              }
5148
0
            break;
5149
5150
0
            case OP_NOT_VSPACE:
5151
0
            switch(c)
5152
0
              {
5153
0
              default: break;
5154
0
              case 0x0a:      /* LF */
5155
0
              case 0x0b:      /* VT */
5156
0
              case 0x0c:      /* FF */
5157
0
              case 0x0d:      /* CR */
5158
0
              case 0x85:      /* NEL */
5159
0
              case 0x2028:    /* LINE SEPARATOR */
5160
0
              case 0x2029:    /* PARAGRAPH SEPARATOR */
5161
0
              RRETURN(MATCH_NOMATCH);
5162
0
              }
5163
0
            break;
5164
5165
0
            case OP_VSPACE:
5166
0
            switch(c)
5167
0
              {
5168
0
              default: RRETURN(MATCH_NOMATCH);
5169
0
              case 0x0a:      /* LF */
5170
0
              case 0x0b:      /* VT */
5171
0
              case 0x0c:      /* FF */
5172
0
              case 0x0d:      /* CR */
5173
0
              case 0x85:      /* NEL */
5174
0
              case 0x2028:    /* LINE SEPARATOR */
5175
0
              case 0x2029:    /* PARAGRAPH SEPARATOR */
5176
0
              break;
5177
0
              }
5178
0
            break;
5179
5180
0
            case OP_NOT_DIGIT:
5181
0
            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5182
0
              RRETURN(MATCH_NOMATCH);
5183
0
            break;
5184
5185
0
            case OP_DIGIT:
5186
0
            if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5187
0
              RRETURN(MATCH_NOMATCH);
5188
0
            break;
5189
5190
0
            case OP_NOT_WHITESPACE:
5191
0
            if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5192
0
              RRETURN(MATCH_NOMATCH);
5193
0
            break;
5194
5195
0
            case OP_WHITESPACE:
5196
0
            if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5197
0
              RRETURN(MATCH_NOMATCH);
5198
0
            break;
5199
5200
0
            case OP_NOT_WORDCHAR:
5201
0
            if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5202
0
              RRETURN(MATCH_NOMATCH);
5203
0
            break;
5204
5205
0
            case OP_WORDCHAR:
5206
0
            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5207
0
              RRETURN(MATCH_NOMATCH);
5208
0
            break;
5209
5210
0
            default:
5211
0
            RRETURN(PCRE_ERROR_INTERNAL);
5212
0
            }
5213
0
          }
5214
0
        }
5215
0
      else
5216
0
#endif
5217
      /* Not UTF mode */
5218
0
        {
5219
0
        for (fi = min;; fi++)
5220
0
          {
5221
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5222
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5223
0
          if (fi >= max) RRETURN(MATCH_NOMATCH);
5224
0
          if (eptr >= md->end_subject)
5225
0
            {
5226
0
            SCHECK_PARTIAL();
5227
0
            RRETURN(MATCH_NOMATCH);
5228
0
            }
5229
0
          if (ctype == OP_ANY && IS_NEWLINE(eptr))
5230
0
            RRETURN(MATCH_NOMATCH);
5231
0
          c = *eptr++;
5232
0
          switch(ctype)
5233
0
            {
5234
0
            case OP_ANY:               /* This is the non-NL case */
5235
0
            if (md->partial != 0 &&    /* Take care with CRLF partial */
5236
0
                eptr >= md->end_subject &&
5237
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
5238
0
                NLBLOCK->nllen == 2 &&
5239
0
                c == NLBLOCK->nl[0])
5240
0
              {
5241
0
              md->hitend = TRUE;
5242
0
              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5243
0
              }
5244
0
            break;
5245
5246
0
            case OP_ALLANY:
5247
0
            case OP_ANYBYTE:
5248
0
            break;
5249
5250
0
            case OP_ANYNL:
5251
0
            switch(c)
5252
0
              {
5253
0
              default: RRETURN(MATCH_NOMATCH);
5254
0
              case 0x000d:
5255
0
              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
5256
0
              break;
5257
5258
0
              case 0x000a:
5259
0
              break;
5260
5261
0
              case 0x000b:
5262
0
              case 0x000c:
5263
0
              case 0x0085:
5264
#ifdef COMPILE_PCRE16
5265
              case 0x2028:
5266
              case 0x2029:
5267
#endif
5268
0
              if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5269
0
              break;
5270
0
              }
5271
0
            break;
5272
5273
0
            case OP_NOT_HSPACE:
5274
0
            switch(c)
5275
0
              {
5276
0
              default: break;
5277
0
              case 0x09:      /* HT */
5278
0
              case 0x20:      /* SPACE */
5279
0
              case 0xa0:      /* NBSP */
5280
#ifdef COMPILE_PCRE16
5281
              case 0x1680:    /* OGHAM SPACE MARK */
5282
              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5283
              case 0x2000:    /* EN QUAD */
5284
              case 0x2001:    /* EM QUAD */
5285
              case 0x2002:    /* EN SPACE */
5286
              case 0x2003:    /* EM SPACE */
5287
              case 0x2004:    /* THREE-PER-EM SPACE */
5288
              case 0x2005:    /* FOUR-PER-EM SPACE */
5289
              case 0x2006:    /* SIX-PER-EM SPACE */
5290
              case 0x2007:    /* FIGURE SPACE */
5291
              case 0x2008:    /* PUNCTUATION SPACE */
5292
              case 0x2009:    /* THIN SPACE */
5293
              case 0x200A:    /* HAIR SPACE */
5294
              case 0x202f:    /* NARROW NO-BREAK SPACE */
5295
              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5296
              case 0x3000:    /* IDEOGRAPHIC SPACE */
5297
#endif
5298
0
              RRETURN(MATCH_NOMATCH);
5299
0
              }
5300
0
            break;
5301
5302
0
            case OP_HSPACE:
5303
0
            switch(c)
5304
0
              {
5305
0
              default: RRETURN(MATCH_NOMATCH);
5306
0
              case 0x09:      /* HT */
5307
0
              case 0x20:      /* SPACE */
5308
0
              case 0xa0:      /* NBSP */
5309
#ifdef COMPILE_PCRE16
5310
              case 0x1680:    /* OGHAM SPACE MARK */
5311
              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5312
              case 0x2000:    /* EN QUAD */
5313
              case 0x2001:    /* EM QUAD */
5314
              case 0x2002:    /* EN SPACE */
5315
              case 0x2003:    /* EM SPACE */
5316
              case 0x2004:    /* THREE-PER-EM SPACE */
5317
              case 0x2005:    /* FOUR-PER-EM SPACE */
5318
              case 0x2006:    /* SIX-PER-EM SPACE */
5319
              case 0x2007:    /* FIGURE SPACE */
5320
              case 0x2008:    /* PUNCTUATION SPACE */
5321
              case 0x2009:    /* THIN SPACE */
5322
              case 0x200A:    /* HAIR SPACE */
5323
              case 0x202f:    /* NARROW NO-BREAK SPACE */
5324
              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5325
              case 0x3000:    /* IDEOGRAPHIC SPACE */
5326
#endif
5327
0
              break;
5328
0
              }
5329
0
            break;
5330
5331
0
            case OP_NOT_VSPACE:
5332
0
            switch(c)
5333
0
              {
5334
0
              default: break;
5335
0
              case 0x0a:      /* LF */
5336
0
              case 0x0b:      /* VT */
5337
0
              case 0x0c:      /* FF */
5338
0
              case 0x0d:      /* CR */
5339
0
              case 0x85:      /* NEL */
5340
#ifdef COMPILE_PCRE16
5341
              case 0x2028:    /* LINE SEPARATOR */
5342
              case 0x2029:    /* PARAGRAPH SEPARATOR */
5343
#endif
5344
0
              RRETURN(MATCH_NOMATCH);
5345
0
              }
5346
0
            break;
5347
5348
0
            case OP_VSPACE:
5349
0
            switch(c)
5350
0
              {
5351
0
              default: RRETURN(MATCH_NOMATCH);
5352
0
              case 0x0a:      /* LF */
5353
0
              case 0x0b:      /* VT */
5354
0
              case 0x0c:      /* FF */
5355
0
              case 0x0d:      /* CR */
5356
0
              case 0x85:      /* NEL */
5357
#ifdef COMPILE_PCRE16
5358
              case 0x2028:    /* LINE SEPARATOR */
5359
              case 0x2029:    /* PARAGRAPH SEPARATOR */
5360
#endif
5361
0
              break;
5362
0
              }
5363
0
            break;
5364
5365
0
            case OP_NOT_DIGIT:
5366
0
            if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5367
0
            break;
5368
5369
0
            case OP_DIGIT:
5370
0
            if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5371
0
            break;
5372
5373
0
            case OP_NOT_WHITESPACE:
5374
0
            if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5375
0
            break;
5376
5377
0
            case OP_WHITESPACE:
5378
0
            if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5379
0
            break;
5380
5381
0
            case OP_NOT_WORDCHAR:
5382
0
            if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5383
0
            break;
5384
5385
0
            case OP_WORDCHAR:
5386
0
            if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5387
0
            break;
5388
5389
0
            default:
5390
0
            RRETURN(PCRE_ERROR_INTERNAL);
5391
0
            }
5392
0
          }
5393
0
        }
5394
      /* Control never gets here */
5395
0
      }
5396
5397
    /* If maximizing, it is worth using inline code for speed, doing the type
5398
    test once at the start (i.e. keep it out of the loop). Again, keep the
5399
    UTF-8 and UCP stuff separate. */
5400
5401
0
    else
5402
0
      {
5403
0
      pp = eptr;  /* Remember where we started */
5404
5405
0
#ifdef SUPPORT_UCP
5406
0
      if (prop_type >= 0)
5407
0
        {
5408
0
        switch(prop_type)
5409
0
          {
5410
0
          case PT_ANY:
5411
0
          for (i = min; i < max; i++)
5412
0
            {
5413
0
            int len = 1;
5414
0
            if (eptr >= md->end_subject)
5415
0
              {
5416
0
              SCHECK_PARTIAL();
5417
0
              break;
5418
0
              }
5419
0
            GETCHARLENTEST(c, eptr, len);
5420
0
            if (prop_fail_result) break;
5421
0
            eptr+= len;
5422
0
            }
5423
0
          break;
5424
5425
0
          case PT_LAMP:
5426
0
          for (i = min; i < max; i++)
5427
0
            {
5428
0
            int chartype;
5429
0
            int len = 1;
5430
0
            if (eptr >= md->end_subject)
5431
0
              {
5432
0
              SCHECK_PARTIAL();
5433
0
              break;
5434
0
              }
5435
0
            GETCHARLENTEST(c, eptr, len);
5436
0
            chartype = UCD_CHARTYPE(c);
5437
0
            if ((chartype == ucp_Lu ||
5438
0
                 chartype == ucp_Ll ||
5439
0
                 chartype == ucp_Lt) == prop_fail_result)
5440
0
              break;
5441
0
            eptr+= len;
5442
0
            }
5443
0
          break;
5444
5445
0
          case PT_GC:
5446
0
          for (i = min; i < max; i++)
5447
0
            {
5448
0
            int len = 1;
5449
0
            if (eptr >= md->end_subject)
5450
0
              {
5451
0
              SCHECK_PARTIAL();
5452
0
              break;
5453
0
              }
5454
0
            GETCHARLENTEST(c, eptr, len);
5455
0
            if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5456
0
            eptr+= len;
5457
0
            }
5458
0
          break;
5459
5460
0
          case PT_PC:
5461
0
          for (i = min; i < max; i++)
5462
0
            {
5463
0
            int len = 1;
5464
0
            if (eptr >= md->end_subject)
5465
0
              {
5466
0
              SCHECK_PARTIAL();
5467
0
              break;
5468
0
              }
5469
0
            GETCHARLENTEST(c, eptr, len);
5470
0
            if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5471
0
            eptr+= len;
5472
0
            }
5473
0
          break;
5474
5475
0
          case PT_SC:
5476
0
          for (i = min; i < max; i++)
5477
0
            {
5478
0
            int len = 1;
5479
0
            if (eptr >= md->end_subject)
5480
0
              {
5481
0
              SCHECK_PARTIAL();
5482
0
              break;
5483
0
              }
5484
0
            GETCHARLENTEST(c, eptr, len);
5485
0
            if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5486
0
            eptr+= len;
5487
0
            }
5488
0
          break;
5489
5490
0
          case PT_ALNUM:
5491
0
          for (i = min; i < max; i++)
5492
0
            {
5493
0
            int category;
5494
0
            int len = 1;
5495
0
            if (eptr >= md->end_subject)
5496
0
              {
5497
0
              SCHECK_PARTIAL();
5498
0
              break;
5499
0
              }
5500
0
            GETCHARLENTEST(c, eptr, len);
5501
0
            category = UCD_CATEGORY(c);
5502
0
            if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5503
0
              break;
5504
0
            eptr+= len;
5505
0
            }
5506
0
          break;
5507
5508
0
          case PT_SPACE:    /* Perl space */
5509
0
          for (i = min; i < max; i++)
5510
0
            {
5511
0
            int len = 1;
5512
0
            if (eptr >= md->end_subject)
5513
0
              {
5514
0
              SCHECK_PARTIAL();
5515
0
              break;
5516
0
              }
5517
0
            GETCHARLENTEST(c, eptr, len);
5518
0
            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5519
0
                 c == CHAR_FF || c == CHAR_CR)
5520
0
                 == prop_fail_result)
5521
0
              break;
5522
0
            eptr+= len;
5523
0
            }
5524
0
          break;
5525
5526
0
          case PT_PXSPACE:  /* POSIX space */
5527
0
          for (i = min; i < max; i++)
5528
0
            {
5529
0
            int len = 1;
5530
0
            if (eptr >= md->end_subject)
5531
0
              {
5532
0
              SCHECK_PARTIAL();
5533
0
              break;
5534
0
              }
5535
0
            GETCHARLENTEST(c, eptr, len);
5536
0
            if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
5537
0
                 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
5538
0
                 == prop_fail_result)
5539
0
              break;
5540
0
            eptr+= len;
5541
0
            }
5542
0
          break;
5543
5544
0
          case PT_WORD:
5545
0
          for (i = min; i < max; i++)
5546
0
            {
5547
0
            int category;
5548
0
            int len = 1;
5549
0
            if (eptr >= md->end_subject)
5550
0
              {
5551
0
              SCHECK_PARTIAL();
5552
0
              break;
5553
0
              }
5554
0
            GETCHARLENTEST(c, eptr, len);
5555
0
            category = UCD_CATEGORY(c);
5556
0
            if ((category == ucp_L || category == ucp_N ||
5557
0
                 c == CHAR_UNDERSCORE) == prop_fail_result)
5558
0
              break;
5559
0
            eptr+= len;
5560
0
            }
5561
0
          break;
5562
5563
0
          default:
5564
0
          RRETURN(PCRE_ERROR_INTERNAL);
5565
0
          }
5566
5567
        /* eptr is now past the end of the maximum run */
5568
5569
0
        if (possessive) continue;
5570
0
        for(;;)
5571
0
          {
5572
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5573
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5574
0
          if (eptr-- == pp) break;        /* Stop if tried at original pos */
5575
0
          if (utf) BACKCHAR(eptr);
5576
0
          }
5577
0
        }
5578
5579
      /* Match extended Unicode sequences. We will get here only if the
5580
      support is in the binary; otherwise a compile-time error occurs. */
5581
5582
0
      else if (ctype == OP_EXTUNI)
5583
0
        {
5584
0
        for (i = min; i < max; i++)
5585
0
          {
5586
0
          int len = 1;
5587
0
          if (eptr >= md->end_subject)
5588
0
            {
5589
0
            SCHECK_PARTIAL();
5590
0
            break;
5591
0
            }
5592
0
          if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5593
0
          if (UCD_CATEGORY(c) == ucp_M) break;
5594
0
          eptr += len;
5595
0
          while (eptr < md->end_subject)
5596
0
            {
5597
0
            len = 1;
5598
0
            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5599
0
            if (UCD_CATEGORY(c) != ucp_M) break;
5600
0
            eptr += len;
5601
0
            }
5602
0
          CHECK_PARTIAL();
5603
0
          }
5604
5605
        /* eptr is now past the end of the maximum run */
5606
5607
0
        if (possessive) continue;
5608
5609
0
        for(;;)
5610
0
          {
5611
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5612
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5613
0
          if (eptr-- == pp) break;        /* Stop if tried at original pos */
5614
0
          for (;;)                        /* Move back over one extended */
5615
0
            {
5616
0
            if (!utf) c = *eptr; else
5617
0
              {
5618
0
              BACKCHAR(eptr);
5619
0
              GETCHAR(c, eptr);
5620
0
              }
5621
0
            if (UCD_CATEGORY(c) != ucp_M) break;
5622
0
            eptr--;
5623
0
            }
5624
0
          }
5625
0
        }
5626
5627
0
      else
5628
0
#endif   /* SUPPORT_UCP */
5629
5630
0
#ifdef SUPPORT_UTF
5631
0
      if (utf)
5632
0
        {
5633
0
        switch(ctype)
5634
0
          {
5635
0
          case OP_ANY:
5636
0
          if (max < INT_MAX)
5637
0
            {
5638
0
            for (i = min; i < max; i++)
5639
0
              {
5640
0
              if (eptr >= md->end_subject)
5641
0
                {
5642
0
                SCHECK_PARTIAL();
5643
0
                break;
5644
0
                }
5645
0
              if (IS_NEWLINE(eptr)) break;
5646
0
              if (md->partial != 0 &&    /* Take care with CRLF partial */
5647
0
                  eptr + 1 >= md->end_subject &&
5648
0
                  NLBLOCK->nltype == NLTYPE_FIXED &&
5649
0
                  NLBLOCK->nllen == 2 &&
5650
0
                  *eptr == NLBLOCK->nl[0])
5651
0
                {
5652
0
                md->hitend = TRUE;
5653
0
                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5654
0
                }
5655
0
              eptr++;
5656
0
              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5657
0
              }
5658
0
            }
5659
5660
          /* Handle unlimited UTF-8 repeat */
5661
5662
0
          else
5663
0
            {
5664
0
            for (i = min; i < max; i++)
5665
0
              {
5666
0
              if (eptr >= md->end_subject)
5667
0
                {
5668
0
                SCHECK_PARTIAL();
5669
0
                break;
5670
0
                }
5671
0
              if (IS_NEWLINE(eptr)) break;
5672
0
              if (md->partial != 0 &&    /* Take care with CRLF partial */
5673
0
                  eptr + 1 >= md->end_subject &&
5674
0
                  NLBLOCK->nltype == NLTYPE_FIXED &&
5675
0
                  NLBLOCK->nllen == 2 &&
5676
0
                  *eptr == NLBLOCK->nl[0])
5677
0
                {
5678
0
                md->hitend = TRUE;
5679
0
                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5680
0
                }
5681
0
              eptr++;
5682
0
              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5683
0
              }
5684
0
            }
5685
0
          break;
5686
5687
0
          case OP_ALLANY:
5688
0
          if (max < INT_MAX)
5689
0
            {
5690
0
            for (i = min; i < max; i++)
5691
0
              {
5692
0
              if (eptr >= md->end_subject)
5693
0
                {
5694
0
                SCHECK_PARTIAL();
5695
0
                break;
5696
0
                }
5697
0
              eptr++;
5698
0
              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5699
0
              }
5700
0
            }
5701
0
          else
5702
0
            {
5703
0
            eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5704
0
            SCHECK_PARTIAL();
5705
0
            }
5706
0
          break;
5707
5708
          /* The byte case is the same as non-UTF8 */
5709
5710
0
          case OP_ANYBYTE:
5711
0
          c = max - min;
5712
0
          if (c > (unsigned int)(md->end_subject - eptr))
5713
0
            {
5714
0
            eptr = md->end_subject;
5715
0
            SCHECK_PARTIAL();
5716
0
            }
5717
0
          else eptr += c;
5718
0
          break;
5719
5720
0
          case OP_ANYNL:
5721
0
          for (i = min; i < max; i++)
5722
0
            {
5723
0
            int len = 1;
5724
0
            if (eptr >= md->end_subject)
5725
0
              {
5726
0
              SCHECK_PARTIAL();
5727
0
              break;
5728
0
              }
5729
0
            GETCHARLEN(c, eptr, len);
5730
0
            if (c == 0x000d)
5731
0
              {
5732
0
              if (++eptr >= md->end_subject) break;
5733
0
              if (*eptr == 0x000a) eptr++;
5734
0
              }
5735
0
            else
5736
0
              {
5737
0
              if (c != 0x000a &&
5738
0
                  (md->bsr_anycrlf ||
5739
0
                   (c != 0x000b && c != 0x000c &&
5740
0
                    c != 0x0085 && c != 0x2028 && c != 0x2029)))
5741
0
                break;
5742
0
              eptr += len;
5743
0
              }
5744
0
            }
5745
0
          break;
5746
5747
0
          case OP_NOT_HSPACE:
5748
0
          case OP_HSPACE:
5749
0
          for (i = min; i < max; i++)
5750
0
            {
5751
0
            BOOL gotspace;
5752
0
            int len = 1;
5753
0
            if (eptr >= md->end_subject)
5754
0
              {
5755
0
              SCHECK_PARTIAL();
5756
0
              break;
5757
0
              }
5758
0
            GETCHARLEN(c, eptr, len);
5759
0
            switch(c)
5760
0
              {
5761
0
              default: gotspace = FALSE; break;
5762
0
              case 0x09:      /* HT */
5763
0
              case 0x20:      /* SPACE */
5764
0
              case 0xa0:      /* NBSP */
5765
0
              case 0x1680:    /* OGHAM SPACE MARK */
5766
0
              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
5767
0
              case 0x2000:    /* EN QUAD */
5768
0
              case 0x2001:    /* EM QUAD */
5769
0
              case 0x2002:    /* EN SPACE */
5770
0
              case 0x2003:    /* EM SPACE */
5771
0
              case 0x2004:    /* THREE-PER-EM SPACE */
5772
0
              case 0x2005:    /* FOUR-PER-EM SPACE */
5773
0
              case 0x2006:    /* SIX-PER-EM SPACE */
5774
0
              case 0x2007:    /* FIGURE SPACE */
5775
0
              case 0x2008:    /* PUNCTUATION SPACE */
5776
0
              case 0x2009:    /* THIN SPACE */
5777
0
              case 0x200A:    /* HAIR SPACE */
5778
0
              case 0x202f:    /* NARROW NO-BREAK SPACE */
5779
0
              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
5780
0
              case 0x3000:    /* IDEOGRAPHIC SPACE */
5781
0
              gotspace = TRUE;
5782
0
              break;
5783
0
              }
5784
0
            if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5785
0
            eptr += len;
5786
0
            }
5787
0
          break;
5788
5789
0
          case OP_NOT_VSPACE:
5790
0
          case OP_VSPACE:
5791
0
          for (i = min; i < max; i++)
5792
0
            {
5793
0
            BOOL gotspace;
5794
0
            int len = 1;
5795
0
            if (eptr >= md->end_subject)
5796
0
              {
5797
0
              SCHECK_PARTIAL();
5798
0
              break;
5799
0
              }
5800
0
            GETCHARLEN(c, eptr, len);
5801
0
            switch(c)
5802
0
              {
5803
0
              default: gotspace = FALSE; break;
5804
0
              case 0x0a:      /* LF */
5805
0
              case 0x0b:      /* VT */
5806
0
              case 0x0c:      /* FF */
5807
0
              case 0x0d:      /* CR */
5808
0
              case 0x85:      /* NEL */
5809
0
              case 0x2028:    /* LINE SEPARATOR */
5810
0
              case 0x2029:    /* PARAGRAPH SEPARATOR */
5811
0
              gotspace = TRUE;
5812
0
              break;
5813
0
              }
5814
0
            if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5815
0
            eptr += len;
5816
0
            }
5817
0
          break;
5818
5819
0
          case OP_NOT_DIGIT:
5820
0
          for (i = min; i < max; i++)
5821
0
            {
5822
0
            int len = 1;
5823
0
            if (eptr >= md->end_subject)
5824
0
              {
5825
0
              SCHECK_PARTIAL();
5826
0
              break;
5827
0
              }
5828
0
            GETCHARLEN(c, eptr, len);
5829
0
            if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5830
0
            eptr+= len;
5831
0
            }
5832
0
          break;
5833
5834
0
          case OP_DIGIT:
5835
0
          for (i = min; i < max; i++)
5836
0
            {
5837
0
            int len = 1;
5838
0
            if (eptr >= md->end_subject)
5839
0
              {
5840
0
              SCHECK_PARTIAL();
5841
0
              break;
5842
0
              }
5843
0
            GETCHARLEN(c, eptr, len);
5844
0
            if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5845
0
            eptr+= len;
5846
0
            }
5847
0
          break;
5848
5849
0
          case OP_NOT_WHITESPACE:
5850
0
          for (i = min; i < max; i++)
5851
0
            {
5852
0
            int len = 1;
5853
0
            if (eptr >= md->end_subject)
5854
0
              {
5855
0
              SCHECK_PARTIAL();
5856
0
              break;
5857
0
              }
5858
0
            GETCHARLEN(c, eptr, len);
5859
0
            if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5860
0
            eptr+= len;
5861
0
            }
5862
0
          break;
5863
5864
0
          case OP_WHITESPACE:
5865
0
          for (i = min; i < max; i++)
5866
0
            {
5867
0
            int len = 1;
5868
0
            if (eptr >= md->end_subject)
5869
0
              {
5870
0
              SCHECK_PARTIAL();
5871
0
              break;
5872
0
              }
5873
0
            GETCHARLEN(c, eptr, len);
5874
0
            if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5875
0
            eptr+= len;
5876
0
            }
5877
0
          break;
5878
5879
0
          case OP_NOT_WORDCHAR:
5880
0
          for (i = min; i < max; i++)
5881
0
            {
5882
0
            int len = 1;
5883
0
            if (eptr >= md->end_subject)
5884
0
              {
5885
0
              SCHECK_PARTIAL();
5886
0
              break;
5887
0
              }
5888
0
            GETCHARLEN(c, eptr, len);
5889
0
            if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5890
0
            eptr+= len;
5891
0
            }
5892
0
          break;
5893
5894
0
          case OP_WORDCHAR:
5895
0
          for (i = min; i < max; i++)
5896
0
            {
5897
0
            int len = 1;
5898
0
            if (eptr >= md->end_subject)
5899
0
              {
5900
0
              SCHECK_PARTIAL();
5901
0
              break;
5902
0
              }
5903
0
            GETCHARLEN(c, eptr, len);
5904
0
            if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5905
0
            eptr+= len;
5906
0
            }
5907
0
          break;
5908
5909
0
          default:
5910
0
          RRETURN(PCRE_ERROR_INTERNAL);
5911
0
          }
5912
5913
        /* eptr is now past the end of the maximum run. If possessive, we are
5914
        done (no backing up). Otherwise, match at this position; anything other
5915
        than no match is immediately returned. For nomatch, back up one
5916
        character, unless we are matching \R and the last thing matched was
5917
        \r\n, in which case, back up two bytes. */
5918
5919
0
        if (possessive) continue;
5920
0
        for(;;)
5921
0
          {
5922
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5923
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5924
0
          if (eptr-- == pp) break;        /* Stop if tried at original pos */
5925
0
          BACKCHAR(eptr);
5926
0
          if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
5927
0
              eptr[-1] == '\r') eptr--;
5928
0
          }
5929
0
        }
5930
0
      else
5931
0
#endif  /* SUPPORT_UTF */
5932
      /* Not UTF mode */
5933
0
        {
5934
0
        switch(ctype)
5935
0
          {
5936
0
          case OP_ANY:
5937
0
          for (i = min; i < max; i++)
5938
0
            {
5939
0
            if (eptr >= md->end_subject)
5940
0
              {
5941
0
              SCHECK_PARTIAL();
5942
0
              break;
5943
0
              }
5944
0
            if (IS_NEWLINE(eptr)) break;
5945
0
            if (md->partial != 0 &&    /* Take care with CRLF partial */
5946
0
                eptr + 1 >= md->end_subject &&
5947
0
                NLBLOCK->nltype == NLTYPE_FIXED &&
5948
0
                NLBLOCK->nllen == 2 &&
5949
0
                *eptr == NLBLOCK->nl[0])
5950
0
              {
5951
0
              md->hitend = TRUE;
5952
0
              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5953
0
              }
5954
0
            eptr++;
5955
0
            }
5956
0
          break;
5957
5958
0
          case OP_ALLANY:
5959
0
          case OP_ANYBYTE:
5960
0
          c = max - min;
5961
0
          if (c > (unsigned int)(md->end_subject - eptr))
5962
0
            {
5963
0
            eptr = md->end_subject;
5964
0
            SCHECK_PARTIAL();
5965
0
            }
5966
0
          else eptr += c;
5967
0
          break;
5968
5969
0
          case OP_ANYNL:
5970
0
          for (i = min; i < max; i++)
5971
0
            {
5972
0
            if (eptr >= md->end_subject)
5973
0
              {
5974
0
              SCHECK_PARTIAL();
5975
0
              break;
5976
0
              }
5977
0
            c = *eptr;
5978
0
            if (c == 0x000d)
5979
0
              {
5980
0
              if (++eptr >= md->end_subject) break;
5981
0
              if (*eptr == 0x000a) eptr++;
5982
0
              }
5983
0
            else
5984
0
              {
5985
0
              if (c != 0x000a && (md->bsr_anycrlf ||
5986
0
                (c != 0x000b && c != 0x000c && c != 0x0085
5987
#ifdef COMPILE_PCRE16
5988
                && c != 0x2028 && c != 0x2029
5989
#endif
5990
0
                ))) break;
5991
0
              eptr++;
5992
0
              }
5993
0
            }
5994
0
          break;
5995
5996
0
          case OP_NOT_HSPACE:
5997
0
          for (i = min; i < max; i++)
5998
0
            {
5999
0
            if (eptr >= md->end_subject)
6000
0
              {
6001
0
              SCHECK_PARTIAL();
6002
0
              break;
6003
0
              }
6004
0
            c = *eptr;
6005
0
            if (c == 0x09 || c == 0x20 || c == 0xa0
6006
#ifdef COMPILE_PCRE16
6007
              || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
6008
              || c == 0x202f || c == 0x205f || c == 0x3000
6009
#endif
6010
0
              ) break;
6011
0
            eptr++;
6012
0
            }
6013
0
          break;
6014
6015
0
          case OP_HSPACE:
6016
0
          for (i = min; i < max; i++)
6017
0
            {
6018
0
            if (eptr >= md->end_subject)
6019
0
              {
6020
0
              SCHECK_PARTIAL();
6021
0
              break;
6022
0
              }
6023
0
            c = *eptr;
6024
0
            if (c != 0x09 && c != 0x20 && c != 0xa0
6025
#ifdef COMPILE_PCRE16
6026
              && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
6027
              && c != 0x202f && c != 0x205f && c != 0x3000
6028
#endif
6029
0
              ) break;
6030
0
            eptr++;
6031
0
            }
6032
0
          break;
6033
6034
0
          case OP_NOT_VSPACE:
6035
0
          for (i = min; i < max; i++)
6036
0
            {
6037
0
            if (eptr >= md->end_subject)
6038
0
              {
6039
0
              SCHECK_PARTIAL();
6040
0
              break;
6041
0
              }
6042
0
            c = *eptr;
6043
0
            if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
6044
#ifdef COMPILE_PCRE16
6045
              || c == 0x2028 || c == 0x2029
6046
#endif
6047
0
              ) break;
6048
0
            eptr++;
6049
0
            }
6050
0
          break;
6051
6052
0
          case OP_VSPACE:
6053
0
          for (i = min; i < max; i++)
6054
0
            {
6055
0
            if (eptr >= md->end_subject)
6056
0
              {
6057
0
              SCHECK_PARTIAL();
6058
0
              break;
6059
0
              }
6060
0
            c = *eptr;
6061
0
            if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
6062
#ifdef COMPILE_PCRE16
6063
              && c != 0x2028 && c != 0x2029
6064
#endif
6065
0
              ) break;
6066
0
            eptr++;
6067
0
            }
6068
0
          break;
6069
6070
0
          case OP_NOT_DIGIT:
6071
0
          for (i = min; i < max; i++)
6072
0
            {
6073
0
            if (eptr >= md->end_subject)
6074
0
              {
6075
0
              SCHECK_PARTIAL();
6076
0
              break;
6077
0
              }
6078
0
            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6079
0
            eptr++;
6080
0
            }
6081
0
          break;
6082
6083
0
          case OP_DIGIT:
6084
0
          for (i = min; i < max; i++)
6085
0
            {
6086
0
            if (eptr >= md->end_subject)
6087
0
              {
6088
0
              SCHECK_PARTIAL();
6089
0
              break;
6090
0
              }
6091
0
            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6092
0
            eptr++;
6093
0
            }
6094
0
          break;
6095
6096
0
          case OP_NOT_WHITESPACE:
6097
0
          for (i = min; i < max; i++)
6098
0
            {
6099
0
            if (eptr >= md->end_subject)
6100
0
              {
6101
0
              SCHECK_PARTIAL();
6102
0
              break;
6103
0
              }
6104
0
            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6105
0
            eptr++;
6106
0
            }
6107
0
          break;
6108
6109
0
          case OP_WHITESPACE:
6110
0
          for (i = min; i < max; i++)
6111
0
            {
6112
0
            if (eptr >= md->end_subject)
6113
0
              {
6114
0
              SCHECK_PARTIAL();
6115
0
              break;
6116
0
              }
6117
0
            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6118
0
            eptr++;
6119
0
            }
6120
0
          break;
6121
6122
0
          case OP_NOT_WORDCHAR:
6123
0
          for (i = min; i < max; i++)
6124
0
            {
6125
0
            if (eptr >= md->end_subject)
6126
0
              {
6127
0
              SCHECK_PARTIAL();
6128
0
              break;
6129
0
              }
6130
0
            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6131
0
            eptr++;
6132
0
            }
6133
0
          break;
6134
6135
0
          case OP_WORDCHAR:
6136
0
          for (i = min; i < max; i++)
6137
0
            {
6138
0
            if (eptr >= md->end_subject)
6139
0
              {
6140
0
              SCHECK_PARTIAL();
6141
0
              break;
6142
0
              }
6143
0
            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6144
0
            eptr++;
6145
0
            }
6146
0
          break;
6147
6148
0
          default:
6149
0
          RRETURN(PCRE_ERROR_INTERNAL);
6150
0
          }
6151
6152
        /* eptr is now past the end of the maximum run. If possessive, we are
6153
        done (no backing up). Otherwise, match at this position; anything other
6154
        than no match is immediately returned. For nomatch, back up one
6155
        character (byte), unless we are matching \R and the last thing matched
6156
        was \r\n, in which case, back up two bytes. */
6157
6158
0
        if (possessive) continue;
6159
0
        while (eptr >= pp)
6160
0
          {
6161
0
          RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6162
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6163
0
          eptr--;
6164
0
          if (ctype == OP_ANYNL && eptr > pp  && *eptr == '\n' &&
6165
0
              eptr[-1] == '\r') eptr--;
6166
0
          }
6167
0
        }
6168
6169
      /* Get here if we can't make it match with any permitted repetitions */
6170
6171
0
      RRETURN(MATCH_NOMATCH);
6172
0
      }
6173
    /* Control never gets here */
6174
6175
    /* There's been some horrible disaster. Arrival here can only mean there is
6176
    something seriously wrong in the code above or the OP_xxx definitions. */
6177
6178
0
    default:
6179
0
    DPRINTF(("Unknown opcode %d\n", *ecode));
6180
0
    RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6181
0
    }
6182
6183
  /* Do not stick any code in here without much thought; it is assumed
6184
  that "continue" in the code above comes out to here to repeat the main
6185
  loop. */
6186
6187
0
  }             /* End of main loop */
6188
/* Control never reaches here */
6189
6190
6191
/* When compiling to use the heap rather than the stack for recursive calls to
6192
match(), the RRETURN() macro jumps here. The number that is saved in
6193
frame->Xwhere indicates which label we actually want to return to. */
6194
6195
#ifdef NO_RECURSE
6196
#define LBL(val) case val: goto L_RM##val;
6197
HEAP_RETURN:
6198
switch (frame->Xwhere)
6199
  {
6200
  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6201
  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6202
  LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6203
  LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6204
  LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6205
  LBL(65) LBL(66)
6206
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6207
  LBL(21)
6208
#endif
6209
#ifdef SUPPORT_UTF
6210
  LBL(16) LBL(18) LBL(20)
6211
  LBL(22) LBL(23) LBL(28) LBL(30)
6212
  LBL(32) LBL(34) LBL(42) LBL(46)
6213
#ifdef SUPPORT_UCP
6214
  LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6215
  LBL(59) LBL(60) LBL(61) LBL(62)
6216
#endif  /* SUPPORT_UCP */
6217
#endif  /* SUPPORT_UTF */
6218
  default:
6219
  DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6220
6221
printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
6222
6223
  return PCRE_ERROR_INTERNAL;
6224
  }
6225
#undef LBL
6226
#endif  /* NO_RECURSE */
6227
0
}
6228
6229
6230
/***************************************************************************
6231
****************************************************************************
6232
                   RECURSION IN THE match() FUNCTION
6233
6234
Undefine all the macros that were defined above to handle this. */
6235
6236
#ifdef NO_RECURSE
6237
#undef eptr
6238
#undef ecode
6239
#undef mstart
6240
#undef offset_top
6241
#undef eptrb
6242
#undef flags
6243
6244
#undef callpat
6245
#undef charptr
6246
#undef data
6247
#undef next
6248
#undef pp
6249
#undef prev
6250
#undef saved_eptr
6251
6252
#undef new_recursive
6253
6254
#undef cur_is_word
6255
#undef condition
6256
#undef prev_is_word
6257
6258
#undef ctype
6259
#undef length
6260
#undef max
6261
#undef min
6262
#undef number
6263
#undef offset
6264
#undef op
6265
#undef save_capture_last
6266
#undef save_offset1
6267
#undef save_offset2
6268
#undef save_offset3
6269
#undef stacksave
6270
6271
#undef newptrb
6272
6273
#endif
6274
6275
/* These two are defined as macros in both cases */
6276
6277
#undef fc
6278
#undef fi
6279
6280
/***************************************************************************
6281
***************************************************************************/
6282
6283
6284
#ifdef NO_RECURSE
6285
/*************************************************
6286
*          Release allocated heap frames         *
6287
*************************************************/
6288
6289
/* This function releases all the allocated frames. The base frame is on the
6290
machine stack, and so must not be freed.
6291
6292
Argument: the address of the base frame
6293
Returns:  nothing
6294
*/
6295
6296
static void
6297
release_match_heapframes (heapframe *frame_base)
6298
{
6299
heapframe *nextframe = frame_base->Xnextframe;
6300
while (nextframe != NULL)
6301
  {
6302
  heapframe *oldframe = nextframe;
6303
  nextframe = nextframe->Xnextframe;
6304
  (PUBL(stack_free))(oldframe);
6305
  }
6306
}
6307
#endif
6308
6309
6310
/*************************************************
6311
*         Execute a Regular Expression           *
6312
*************************************************/
6313
6314
/* This function applies a compiled re to a subject string and picks out
6315
portions of the string if it matches. Two elements in the vector are set for
6316
each substring: the offsets to the start and end of the substring.
6317
6318
Arguments:
6319
  argument_re     points to the compiled expression
6320
  extra_data      points to extra data or is NULL
6321
  subject         points to the subject string
6322
  length          length of subject string (may contain binary zeros)
6323
  start_offset    where to start in the subject string
6324
  options         option bits
6325
  offsets         points to a vector of ints to be filled in with offsets
6326
  offsetcount     the number of elements in the vector
6327
6328
Returns:          > 0 => success; value is the number of elements filled in
6329
                  = 0 => success, but offsets is not big enough
6330
                   -1 => failed to match
6331
                 < -1 => some kind of unexpected problem
6332
*/
6333
6334
#ifdef COMPILE_PCRE8
6335
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6336
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6337
  PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6338
  int offsetcount)
6339
#else
6340
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6341
pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6342
  PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6343
  int offsetcount)
6344
#endif
6345
0
{
6346
0
int rc, ocount, arg_offset_max;
6347
0
int newline;
6348
0
BOOL using_temporary_offsets = FALSE;
6349
0
BOOL anchored;
6350
0
BOOL startline;
6351
0
BOOL firstline;
6352
0
BOOL utf;
6353
0
BOOL has_first_char = FALSE;
6354
0
BOOL has_req_char = FALSE;
6355
0
pcre_uchar first_char = 0;
6356
0
pcre_uchar first_char2 = 0;
6357
0
pcre_uchar req_char = 0;
6358
0
pcre_uchar req_char2 = 0;
6359
0
match_data match_block;
6360
0
match_data *md = &match_block;
6361
0
const pcre_uint8 *tables;
6362
0
const pcre_uint8 *start_bits = NULL;
6363
0
PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6364
0
PCRE_PUCHAR end_subject;
6365
0
PCRE_PUCHAR start_partial = NULL;
6366
0
PCRE_PUCHAR req_char_ptr = start_match - 1;
6367
6368
0
const pcre_study_data *study;
6369
0
const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6370
6371
#ifdef NO_RECURSE
6372
heapframe frame_zero;
6373
frame_zero.Xprevframe = NULL;            /* Marks the top level */
6374
frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6375
md->match_frames_base = &frame_zero;
6376
#endif
6377
6378
/* Check for the special magic call that measures the size of the stack used
6379
per recursive call of match(). Without the funny casting for sizeof, a Windows
6380
compiler gave this error: "unary minus operator applied to unsigned type,
6381
result still unsigned". Hopefully the cast fixes that. */
6382
6383
0
if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6384
0
    start_offset == -999)
6385
#ifdef NO_RECURSE
6386
  return -((int)sizeof(heapframe));
6387
#else
6388
0
  return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6389
0
#endif
6390
6391
/* Plausibility checks */
6392
6393
0
if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6394
0
if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6395
0
  return PCRE_ERROR_NULL;
6396
0
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6397
0
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6398
6399
/* Check that the first field in the block is the magic number. If it is not,
6400
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6401
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6402
means that the pattern is likely compiled with different endianness. */
6403
6404
0
if (re->magic_number != MAGIC_NUMBER)
6405
0
  return re->magic_number == REVERSED_MAGIC_NUMBER?
6406
0
    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6407
0
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6408
6409
/* These two settings are used in the code for checking a UTF-8 string that
6410
follows immediately afterwards. Other values in the md block are used only
6411
during "normal" pcre_exec() processing, not when the JIT support is in use,
6412
so they are set up later. */
6413
6414
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
6415
0
utf = md->utf = (re->options & PCRE_UTF8) != 0;
6416
0
md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6417
0
              ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6418
6419
/* Check a UTF-8 string if required. Pass back the character offset and error
6420
code for an invalid string if a results vector is available. */
6421
6422
0
#ifdef SUPPORT_UTF
6423
0
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6424
0
  {
6425
0
  int erroroffset;
6426
0
  int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6427
0
  if (errorcode != 0)
6428
0
    {
6429
0
    if (offsetcount >= 2)
6430
0
      {
6431
0
      offsets[0] = erroroffset;
6432
0
      offsets[1] = errorcode;
6433
0
      }
6434
#ifdef COMPILE_PCRE16
6435
    return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6436
      PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6437
#else
6438
0
    return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6439
0
      PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6440
0
#endif
6441
0
    }
6442
6443
  /* Check that a start_offset points to the start of a UTF character. */
6444
0
  if (start_offset > 0 && start_offset < length &&
6445
0
      NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6446
0
    return PCRE_ERROR_BADUTF8_OFFSET;
6447
0
  }
6448
0
#endif
6449
6450
/* If the pattern was successfully studied with JIT support, run the JIT
6451
executable instead of the rest of this function. Most options must be set at
6452
compile time for the JIT code to be usable. Fallback to the normal code path if
6453
an unsupported flag is set. */
6454
6455
#ifdef SUPPORT_JIT
6456
if (extra_data != NULL
6457
    && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6458
                             PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6459
    && extra_data->executable_jit != NULL
6460
    && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6461
                    PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
6462
                    PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
6463
  {
6464
  rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,
6465
       start_offset, options, offsets, offsetcount);
6466
6467
  /* PCRE_ERROR_NULL means that the selected normal or partial matching
6468
  mode is not compiled. In this case we simply fallback to interpreter. */
6469
6470
  if (rc != PCRE_ERROR_NULL) return rc;
6471
  }
6472
#endif
6473
6474
/* Carry on with non-JIT matching. This information is for finding all the
6475
numbers associated with a given name, for condition testing. */
6476
6477
0
md->name_table = (pcre_uchar *)re + re->name_table_offset;
6478
0
md->name_count = re->name_count;
6479
0
md->name_entry_size = re->name_entry_size;
6480
6481
/* Fish out the optional data from the extra_data structure, first setting
6482
the default values. */
6483
6484
0
study = NULL;
6485
0
md->match_limit = MATCH_LIMIT;
6486
0
md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6487
0
md->callout_data = NULL;
6488
6489
/* The table pointer is always in native byte order. */
6490
6491
0
tables = re->tables;
6492
6493
0
if (extra_data != NULL)
6494
0
  {
6495
0
  unsigned int flags = extra_data->flags;
6496
0
  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6497
0
    study = (const pcre_study_data *)extra_data->study_data;
6498
0
  if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6499
0
    md->match_limit = extra_data->match_limit;
6500
0
  if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6501
0
    md->match_limit_recursion = extra_data->match_limit_recursion;
6502
0
  if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6503
0
    md->callout_data = extra_data->callout_data;
6504
0
  if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6505
0
  }
6506
6507
/* If the exec call supplied NULL for tables, use the inbuilt ones. This
6508
is a feature that makes it possible to save compiled regex and re-use them
6509
in other programs later. */
6510
6511
0
if (tables == NULL) tables = PRIV(default_tables);
6512
6513
/* Set up other data */
6514
6515
0
anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6516
0
startline = (re->flags & PCRE_STARTLINE) != 0;
6517
0
firstline = (re->options & PCRE_FIRSTLINE) != 0;
6518
6519
/* The code starts after the real_pcre block and the capture name table. */
6520
6521
0
md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6522
0
  re->name_count * re->name_entry_size;
6523
6524
0
md->start_subject = (PCRE_PUCHAR)subject;
6525
0
md->start_offset = start_offset;
6526
0
md->end_subject = md->start_subject + length;
6527
0
end_subject = md->end_subject;
6528
6529
0
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6530
0
md->use_ucp = (re->options & PCRE_UCP) != 0;
6531
0
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6532
0
md->ignore_skip_arg = FALSE;
6533
6534
/* Some options are unpacked into BOOL variables in the hope that testing
6535
them will be faster than individual option bits. */
6536
6537
0
md->notbol = (options & PCRE_NOTBOL) != 0;
6538
0
md->noteol = (options & PCRE_NOTEOL) != 0;
6539
0
md->notempty = (options & PCRE_NOTEMPTY) != 0;
6540
0
md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6541
6542
0
md->hitend = FALSE;
6543
0
md->mark = md->nomatch_mark = NULL;     /* In case never set */
6544
6545
0
md->recursive = NULL;                   /* No recursion at top level */
6546
0
md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6547
6548
0
md->lcc = tables + lcc_offset;
6549
0
md->fcc = tables + fcc_offset;
6550
0
md->ctypes = tables + ctypes_offset;
6551
6552
/* Handle different \R options. */
6553
6554
0
switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6555
0
  {
6556
0
  case 0:
6557
0
  if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6558
0
    md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6559
0
  else
6560
#ifdef BSR_ANYCRLF
6561
  md->bsr_anycrlf = TRUE;
6562
#else
6563
0
  md->bsr_anycrlf = FALSE;
6564
0
#endif
6565
0
  break;
6566
6567
0
  case PCRE_BSR_ANYCRLF:
6568
0
  md->bsr_anycrlf = TRUE;
6569
0
  break;
6570
6571
0
  case PCRE_BSR_UNICODE:
6572
0
  md->bsr_anycrlf = FALSE;
6573
0
  break;
6574
6575
0
  default: return PCRE_ERROR_BADNEWLINE;
6576
0
  }
6577
6578
/* Handle different types of newline. The three bits give eight cases. If
6579
nothing is set at run time, whatever was used at compile time applies. */
6580
6581
0
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6582
0
        (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6583
0
  {
6584
0
  case 0: newline = NEWLINE; break;   /* Compile-time default */
6585
0
  case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6586
0
  case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6587
0
  case PCRE_NEWLINE_CR+
6588
0
       PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6589
0
  case PCRE_NEWLINE_ANY: newline = -1; break;
6590
0
  case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6591
0
  default: return PCRE_ERROR_BADNEWLINE;
6592
0
  }
6593
6594
0
if (newline == -2)
6595
0
  {
6596
0
  md->nltype = NLTYPE_ANYCRLF;
6597
0
  }
6598
0
else if (newline < 0)
6599
0
  {
6600
0
  md->nltype = NLTYPE_ANY;
6601
0
  }
6602
0
else
6603
0
  {
6604
0
  md->nltype = NLTYPE_FIXED;
6605
0
  if (newline > 255)
6606
0
    {
6607
0
    md->nllen = 2;
6608
0
    md->nl[0] = (newline >> 8) & 255;
6609
0
    md->nl[1] = newline & 255;
6610
0
    }
6611
0
  else
6612
0
    {
6613
0
    md->nllen = 1;
6614
0
    md->nl[0] = newline;
6615
0
    }
6616
0
  }
6617
6618
/* Partial matching was originally supported only for a restricted set of
6619
regexes; from release 8.00 there are no restrictions, but the bits are still
6620
defined (though never set). So there's no harm in leaving this code. */
6621
6622
0
if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6623
0
  return PCRE_ERROR_BADPARTIAL;
6624
6625
/* If the expression has got more back references than the offsets supplied can
6626
hold, we get a temporary chunk of working store to use during the matching.
6627
Otherwise, we can use the vector supplied, rounding down its size to a multiple
6628
of 3. */
6629
6630
0
ocount = offsetcount - (offsetcount % 3);
6631
0
arg_offset_max = (2*ocount)/3;
6632
6633
0
if (re->top_backref > 0 && re->top_backref >= ocount/3)
6634
0
  {
6635
0
  ocount = re->top_backref * 3 + 3;
6636
0
  md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6637
0
  if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6638
0
  using_temporary_offsets = TRUE;
6639
0
  DPRINTF(("Got memory to hold back references\n"));
6640
0
  }
6641
0
else md->offset_vector = offsets;
6642
6643
0
md->offset_end = ocount;
6644
0
md->offset_max = (2*ocount)/3;
6645
0
md->offset_overflow = FALSE;
6646
0
md->capture_last = -1;
6647
6648
/* Reset the working variable associated with each extraction. These should
6649
never be used unless previously set, but they get saved and restored, and so we
6650
initialize them to avoid reading uninitialized locations. Also, unset the
6651
offsets for the matched string. This is really just for tidiness with callouts,
6652
in case they inspect these fields. */
6653
6654
0
if (md->offset_vector != NULL)
6655
0
  {
6656
0
  int *iptr = md->offset_vector + ocount;
6657
0
  int *iend = iptr - re->top_bracket;
6658
0
  if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6659
0
  while (--iptr >= iend) *iptr = -1;
6660
0
  md->offset_vector[0] = md->offset_vector[1] = -1;
6661
0
  }
6662
6663
/* Set up the first character to match, if available. The first_char value is
6664
never set for an anchored regular expression, but the anchoring may be forced
6665
at run time, so we have to test for anchoring. The first char may be unset for
6666
an unanchored pattern, of course. If there's no first char and the pattern was
6667
studied, there may be a bitmap of possible first characters. */
6668
6669
0
if (!anchored)
6670
0
  {
6671
0
  if ((re->flags & PCRE_FIRSTSET) != 0)
6672
0
    {
6673
0
    has_first_char = TRUE;
6674
0
    first_char = first_char2 = (pcre_uchar)(re->first_char);
6675
0
    if ((re->flags & PCRE_FCH_CASELESS) != 0)
6676
0
      {
6677
0
      first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6678
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6679
      if (utf && first_char > 127)
6680
        first_char2 = UCD_OTHERCASE(first_char);
6681
#endif
6682
0
      }
6683
0
    }
6684
0
  else
6685
0
    if (!startline && study != NULL &&
6686
0
      (study->flags & PCRE_STUDY_MAPPED) != 0)
6687
0
        start_bits = study->start_bits;
6688
0
  }
6689
6690
/* For anchored or unanchored matches, there may be a "last known required
6691
character" set. */
6692
6693
0
if ((re->flags & PCRE_REQCHSET) != 0)
6694
0
  {
6695
0
  has_req_char = TRUE;
6696
0
  req_char = req_char2 = (pcre_uchar)(re->req_char);
6697
0
  if ((re->flags & PCRE_RCH_CASELESS) != 0)
6698
0
    {
6699
0
    req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6700
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6701
    if (utf && req_char > 127)
6702
      req_char2 = UCD_OTHERCASE(req_char);
6703
#endif
6704
0
    }
6705
0
  }
6706
6707
6708
/* ==========================================================================*/
6709
6710
/* Loop for handling unanchored repeated matching attempts; for anchored regexs
6711
the loop runs just once. */
6712
6713
0
for(;;)
6714
0
  {
6715
0
  PCRE_PUCHAR save_end_subject = end_subject;
6716
0
  PCRE_PUCHAR new_start_match;
6717
6718
  /* If firstline is TRUE, the start of the match is constrained to the first
6719
  line of a multiline string. That is, the match must be before or at the first
6720
  newline. Implement this by temporarily adjusting end_subject so that we stop
6721
  scanning at a newline. If the match fails at the newline, later code breaks
6722
  this loop. */
6723
6724
0
  if (firstline)
6725
0
    {
6726
0
    PCRE_PUCHAR t = start_match;
6727
0
#ifdef SUPPORT_UTF
6728
0
    if (utf)
6729
0
      {
6730
0
      while (t < md->end_subject && !IS_NEWLINE(t))
6731
0
        {
6732
0
        t++;
6733
0
        ACROSSCHAR(t < end_subject, *t, t++);
6734
0
        }
6735
0
      }
6736
0
    else
6737
0
#endif
6738
0
    while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6739
0
    end_subject = t;
6740
0
    }
6741
6742
  /* There are some optimizations that avoid running the match if a known
6743
  starting point is not found, or if a known later character is not present.
6744
  However, there is an option that disables these, for testing and for ensuring
6745
  that all callouts do actually occur. The option can be set in the regex by
6746
  (*NO_START_OPT) or passed in match-time options. */
6747
6748
0
  if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6749
0
    {
6750
    /* Advance to a unique first char if there is one. */
6751
6752
0
    if (has_first_char)
6753
0
      {
6754
0
      if (first_char != first_char2)
6755
0
        while (start_match < end_subject &&
6756
0
            *start_match != first_char && *start_match != first_char2)
6757
0
          start_match++;
6758
0
      else
6759
0
        while (start_match < end_subject && *start_match != first_char)
6760
0
          start_match++;
6761
0
      }
6762
6763
    /* Or to just after a linebreak for a multiline match */
6764
6765
0
    else if (startline)
6766
0
      {
6767
0
      if (start_match > md->start_subject + start_offset)
6768
0
        {
6769
0
#ifdef SUPPORT_UTF
6770
0
        if (utf)
6771
0
          {
6772
0
          while (start_match < end_subject && !WAS_NEWLINE(start_match))
6773
0
            {
6774
0
            start_match++;
6775
0
            ACROSSCHAR(start_match < end_subject, *start_match,
6776
0
              start_match++);
6777
0
            }
6778
0
          }
6779
0
        else
6780
0
#endif
6781
0
        while (start_match < end_subject && !WAS_NEWLINE(start_match))
6782
0
          start_match++;
6783
6784
        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6785
        and we are now at a LF, advance the match position by one more character.
6786
        */
6787
6788
0
        if (start_match[-1] == CHAR_CR &&
6789
0
             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6790
0
             start_match < end_subject &&
6791
0
             *start_match == CHAR_NL)
6792
0
          start_match++;
6793
0
        }
6794
0
      }
6795
6796
    /* Or to a non-unique first byte after study */
6797
6798
0
    else if (start_bits != NULL)
6799
0
      {
6800
0
      while (start_match < end_subject)
6801
0
        {
6802
0
        unsigned int c = *start_match;
6803
#ifndef COMPILE_PCRE8
6804
        if (c > 255) c = 255;
6805
#endif
6806
0
        if ((start_bits[c/8] & (1 << (c&7))) == 0)
6807
0
          {
6808
0
          start_match++;
6809
0
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
6810
          /* In non 8-bit mode, the iteration will stop for
6811
          characters > 255 at the beginning or not stop at all. */
6812
0
          if (utf)
6813
0
            ACROSSCHAR(start_match < end_subject, *start_match,
6814
0
              start_match++);
6815
0
#endif
6816
0
          }
6817
0
        else break;
6818
0
        }
6819
0
      }
6820
0
    }   /* Starting optimizations */
6821
6822
  /* Restore fudged end_subject */
6823
6824
0
  end_subject = save_end_subject;
6825
6826
  /* The following two optimizations are disabled for partial matching or if
6827
  disabling is explicitly requested. */
6828
6829
0
  if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6830
0
    {
6831
    /* If the pattern was studied, a minimum subject length may be set. This is
6832
    a lower bound; no actual string of that length may actually match the
6833
    pattern. Although the value is, strictly, in characters, we treat it as
6834
    bytes to avoid spending too much time in this optimization. */
6835
6836
0
    if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6837
0
        (pcre_uint32)(end_subject - start_match) < study->minlength)
6838
0
      {
6839
0
      rc = MATCH_NOMATCH;
6840
0
      break;
6841
0
      }
6842
6843
    /* If req_char is set, we know that that character must appear in the
6844
    subject for the match to succeed. If the first character is set, req_char
6845
    must be later in the subject; otherwise the test starts at the match point.
6846
    This optimization can save a huge amount of backtracking in patterns with
6847
    nested unlimited repeats that aren't going to match. Writing separate code
6848
    for cased/caseless versions makes it go faster, as does using an
6849
    autoincrement and backing off on a match.
6850
6851
    HOWEVER: when the subject string is very, very long, searching to its end
6852
    can take a long time, and give bad performance on quite ordinary patterns.
6853
    This showed up when somebody was matching something like /^\d+C/ on a
6854
    32-megabyte string... so we don't do this when the string is sufficiently
6855
    long. */
6856
6857
0
    if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6858
0
      {
6859
0
      PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6860
6861
      /* We don't need to repeat the search if we haven't yet reached the
6862
      place we found it at last time. */
6863
6864
0
      if (p > req_char_ptr)
6865
0
        {
6866
0
        if (req_char != req_char2)
6867
0
          {
6868
0
          while (p < end_subject)
6869
0
            {
6870
0
            int pp = *p++;
6871
0
            if (pp == req_char || pp == req_char2) { p--; break; }
6872
0
            }
6873
0
          }
6874
0
        else
6875
0
          {
6876
0
          while (p < end_subject)
6877
0
            {
6878
0
            if (*p++ == req_char) { p--; break; }
6879
0
            }
6880
0
          }
6881
6882
        /* If we can't find the required character, break the matching loop,
6883
        forcing a match failure. */
6884
6885
0
        if (p >= end_subject)
6886
0
          {
6887
0
          rc = MATCH_NOMATCH;
6888
0
          break;
6889
0
          }
6890
6891
        /* If we have found the required character, save the point where we
6892
        found it, so that we don't search again next time round the loop if
6893
        the start hasn't passed this character yet. */
6894
6895
0
        req_char_ptr = p;
6896
0
        }
6897
0
      }
6898
0
    }
6899
6900
#ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
6901
  printf(">>>> Match against: ");
6902
  pchars(start_match, end_subject - start_match, TRUE, md);
6903
  printf("\n");
6904
#endif
6905
6906
  /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6907
  first starting point for which a partial match was found. */
6908
6909
0
  md->start_match_ptr = start_match;
6910
0
  md->start_used_ptr = start_match;
6911
0
  md->match_call_count = 0;
6912
0
  md->match_function_type = 0;
6913
0
  md->end_offset_top = 0;
6914
0
  rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6915
0
  if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6916
6917
0
  switch(rc)
6918
0
    {
6919
    /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6920
    the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6921
    entirely. The only way we can do that is to re-do the match at the same
6922
    point, with a flag to force SKIP with an argument to be ignored. Just
6923
    treating this case as NOMATCH does not work because it does not check other
6924
    alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6925
6926
0
    case MATCH_SKIP_ARG:
6927
0
    new_start_match = start_match;
6928
0
    md->ignore_skip_arg = TRUE;
6929
0
    break;
6930
6931
    /* SKIP passes back the next starting point explicitly, but if it is the
6932
    same as the match we have just done, treat it as NOMATCH. */
6933
6934
0
    case MATCH_SKIP:
6935
0
    if (md->start_match_ptr != start_match)
6936
0
      {
6937
0
      new_start_match = md->start_match_ptr;
6938
0
      break;
6939
0
      }
6940
    /* Fall through */
6941
6942
    /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6943
    exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
6944
6945
0
    case MATCH_NOMATCH:
6946
0
    case MATCH_PRUNE:
6947
0
    case MATCH_THEN:
6948
0
    md->ignore_skip_arg = FALSE;
6949
0
    new_start_match = start_match + 1;
6950
0
#ifdef SUPPORT_UTF
6951
0
    if (utf)
6952
0
      ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6953
0
        new_start_match++);
6954
0
#endif
6955
0
    break;
6956
6957
    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6958
6959
0
    case MATCH_COMMIT:
6960
0
    rc = MATCH_NOMATCH;
6961
0
    goto ENDLOOP;
6962
6963
    /* Any other return is either a match, or some kind of error. */
6964
6965
0
    default:
6966
0
    goto ENDLOOP;
6967
0
    }
6968
6969
  /* Control reaches here for the various types of "no match at this point"
6970
  result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6971
6972
0
  rc = MATCH_NOMATCH;
6973
6974
  /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6975
  newline in the subject (though it may continue over the newline). Therefore,
6976
  if we have just failed to match, starting at a newline, do not continue. */
6977
6978
0
  if (firstline && IS_NEWLINE(start_match)) break;
6979
6980
  /* Advance to new matching position */
6981
6982
0
  start_match = new_start_match;
6983
6984
  /* Break the loop if the pattern is anchored or if we have passed the end of
6985
  the subject. */
6986
6987
0
  if (anchored || start_match > end_subject) break;
6988
6989
  /* If we have just passed a CR and we are now at a LF, and the pattern does
6990
  not contain any explicit matches for \r or \n, and the newline option is CRLF
6991
  or ANY or ANYCRLF, advance the match position by one more character. In
6992
  normal matching start_match will aways be greater than the first position at
6993
  this stage, but a failed *SKIP can cause a return at the same point, which is
6994
  why the first test exists. */
6995
6996
0
  if (start_match > (PCRE_PUCHAR)subject + start_offset &&
6997
0
      start_match[-1] == CHAR_CR &&
6998
0
      start_match < end_subject &&
6999
0
      *start_match == CHAR_NL &&
7000
0
      (re->flags & PCRE_HASCRORLF) == 0 &&
7001
0
        (md->nltype == NLTYPE_ANY ||
7002
0
         md->nltype == NLTYPE_ANYCRLF ||
7003
0
         md->nllen == 2))
7004
0
    start_match++;
7005
7006
0
  md->mark = NULL;   /* Reset for start of next match attempt */
7007
0
  }                  /* End of for(;;) "bumpalong" loop */
7008
7009
/* ==========================================================================*/
7010
7011
/* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
7012
conditions is true:
7013
7014
(1) The pattern is anchored or the match was failed by (*COMMIT);
7015
7016
(2) We are past the end of the subject;
7017
7018
(3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
7019
    this option requests that a match occur at or before the first newline in
7020
    the subject.
7021
7022
When we have a match and the offset vector is big enough to deal with any
7023
backreferences, captured substring offsets will already be set up. In the case
7024
where we had to get some local store to hold offsets for backreference
7025
processing, copy those that we can. In this case there need not be overflow if
7026
certain parts of the pattern were not used, even though there are more
7027
capturing parentheses than vector slots. */
7028
7029
0
ENDLOOP:
7030
7031
0
if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
7032
0
  {
7033
0
  if (using_temporary_offsets)
7034
0
    {
7035
0
    if (arg_offset_max >= 4)
7036
0
      {
7037
0
      memcpy(offsets + 2, md->offset_vector + 2,
7038
0
        (arg_offset_max - 2) * sizeof(int));
7039
0
      DPRINTF(("Copied offsets from temporary memory\n"));
7040
0
      }
7041
0
    if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
7042
0
    DPRINTF(("Freeing temporary memory\n"));
7043
0
    (PUBL(free))(md->offset_vector);
7044
0
    }
7045
7046
  /* Set the return code to the number of captured strings, or 0 if there were
7047
  too many to fit into the vector. */
7048
7049
0
  rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
7050
0
    0 : md->end_offset_top/2;
7051
7052
  /* If there is space in the offset vector, set any unused pairs at the end of
7053
  the pattern to -1 for backwards compatibility. It is documented that this
7054
  happens. In earlier versions, the whole set of potential capturing offsets
7055
  was set to -1 each time round the loop, but this is handled differently now.
7056
  "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
7057
  those at the end that need unsetting here. We can't just unset them all at
7058
  the start of the whole thing because they may get set in one branch that is
7059
  not the final matching branch. */
7060
7061
0
  if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
7062
0
    {
7063
0
    int *iptr, *iend;
7064
0
    int resetcount = 2 + re->top_bracket * 2;
7065
0
    if (resetcount > offsetcount) resetcount = offsetcount;
7066
0
    iptr = offsets + md->end_offset_top;
7067
0
    iend = offsets + resetcount;
7068
0
    while (iptr < iend) *iptr++ = -1;
7069
0
    }
7070
7071
  /* If there is space, set up the whole thing as substring 0. The value of
7072
  md->start_match_ptr might be modified if \K was encountered on the success
7073
  matching path. */
7074
7075
0
  if (offsetcount < 2) rc = 0; else
7076
0
    {
7077
0
    offsets[0] = (int)(md->start_match_ptr - md->start_subject);
7078
0
    offsets[1] = (int)(md->end_match_ptr - md->start_subject);
7079
0
    }
7080
7081
  /* Return MARK data if requested */
7082
7083
0
  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7084
0
    *(extra_data->mark) = (pcre_uchar *)md->mark;
7085
0
  DPRINTF((">>>> returning %d\n", rc));
7086
#ifdef NO_RECURSE
7087
  release_match_heapframes(&frame_zero);
7088
#endif
7089
0
  return rc;
7090
0
  }
7091
7092
/* Control gets here if there has been an error, or if the overall match
7093
attempt has failed at all permitted starting positions. */
7094
7095
0
if (using_temporary_offsets)
7096
0
  {
7097
0
  DPRINTF(("Freeing temporary memory\n"));
7098
0
  (PUBL(free))(md->offset_vector);
7099
0
  }
7100
7101
/* For anything other than nomatch or partial match, just return the code. */
7102
7103
0
if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7104
0
  {
7105
0
  DPRINTF((">>>> error: returning %d\n", rc));
7106
#ifdef NO_RECURSE
7107
  release_match_heapframes(&frame_zero);
7108
#endif
7109
0
  return rc;
7110
0
  }
7111
7112
/* Handle partial matches - disable any mark data */
7113
7114
0
if (start_partial != NULL)
7115
0
  {
7116
0
  DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7117
0
  md->mark = NULL;
7118
0
  if (offsetcount > 1)
7119
0
    {
7120
0
    offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7121
0
    offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7122
0
    }
7123
0
  rc = PCRE_ERROR_PARTIAL;
7124
0
  }
7125
7126
/* This is the classic nomatch case */
7127
7128
0
else
7129
0
  {
7130
0
  DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7131
0
  rc = PCRE_ERROR_NOMATCH;
7132
0
  }
7133
7134
/* Return the MARK data if it has been requested. */
7135
7136
0
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7137
0
  *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7138
#ifdef NO_RECURSE
7139
  release_match_heapframes(&frame_zero);
7140
#endif
7141
0
return rc;
7142
0
}
7143
7144
/* End of pcre_exec.c */