Coverage Report

Created: 2026-02-26 06:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/pcre2/src/pcre2_auto_possess.c
Line
Count
Source
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2016-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
/* This module contains functions that scan a compiled pattern and change
43
repeats into possessive repeats where possible. */
44
45
46
#include "pcre2_internal.h"
47
48
49
50
/* This macro represents the max size of list[] and that is used to keep
51
track of UCD info in several places, it should be kept on sync with the
52
value used by GenerateUcd.py */
53
1.83M
#define MAX_LIST 8
54
55
/*************************************************
56
*        Tables for auto-possessification        *
57
*************************************************/
58
59
/* This table is used to check whether auto-possessification is possible
60
between adjacent character-type opcodes. The left-hand (repeated) opcode is
61
used to select the row, and the right-hand opcode is use to select the column.
62
A value of 1 means that auto-possessification is OK. For example, the second
63
value in the first row means that \D+\d can be turned into \D++\d.
64
65
The Unicode property types (\P and \p) have to be present to fill out the table
66
because of what their opcode values are, but the table values should always be
67
zero because property types are handled separately in the code. The last four
68
columns apply to items that cannot be repeated, so there is no need to have
69
rows for them. Note that OP_DIGIT etc. are generated only when PCRE2_UCP is
70
*not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
71
72
#define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
73
#define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
74
75
static const uint8_t autoposstab[APTROWS][APTCOLS] = {
76
/* \D \d \S \s \W \w  . .+ \C \P \p \R \H \h \V \v \X \Z \z  $ $M */
77
  { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \D */
78
  { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \d */
79
  { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \S */
80
  { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \s */
81
  { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \W */
82
  { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \w */
83
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .  */
84
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .+ */
85
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \C */
86
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \P */
87
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \p */
88
  { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \R */
89
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \H */
90
  { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \h */
91
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \V */
92
  { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 },  /* \v */
93
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }   /* \X */
94
};
95
96
#ifdef SUPPORT_UNICODE
97
/* This table is used to check whether auto-possessification is possible
98
between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
99
left-hand (repeated) opcode is used to select the row, and the right-hand
100
opcode is used to select the column. The values are as follows:
101
102
  0   Always return FALSE (never auto-possessify)
103
  1   Character groups are distinct (possessify if both are OP_PROP)
104
  2   Check character categories in the same group (general or particular)
105
  3   TRUE if the two opcodes are not the same (PROP vs NOTPROP)
106
107
  4   Check left general category vs right particular category
108
  5   Check right general category vs left particular category
109
110
  6   Left alphanum vs right general category
111
  7   Left space vs right general category
112
  8   Left word vs right general category
113
114
  9   Right alphanum vs left general category
115
 10   Right space vs left general category
116
 11   Right word vs left general category
117
118
 12   Left alphanum vs right particular category
119
 13   Left space vs right particular category
120
 14   Left word vs right particular category
121
122
 15   Right alphanum vs left particular category
123
 16   Right space vs left particular category
124
 17   Right word vs left particular category
125
*/
126
127
static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
128
/* LAMP GC  PC  SC  SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
129
  { 3,  0,  0,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_LAMP */
130
  { 0,  2,  4,  0,   0,    9,   10,     10,  11,    0,   0,    0,    0 },  /* PT_GC */
131
  { 0,  5,  2,  0,   0,   15,   16,     16,  17,    0,   0,    0,    0 },  /* PT_PC */
132
  { 0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SC */
133
  { 0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SCX */
134
  { 3,  6, 12,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_ALNUM */
135
  { 1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_SPACE */
136
  { 1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_PXSPACE */
137
  { 0,  8, 14,  0,   0,    0,    1,      1,   3,    0,   0,    0,    0 },  /* PT_WORD */
138
  { 0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_CLIST */
139
  { 0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   3,    0,    0 },  /* PT_UCNC */
140
  { 0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_BIDICL */
141
  { 0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 }   /* PT_BOOL */
142
  /* PT_ANY does not need a record. */
143
};
144
145
/* This table is used to check whether auto-possessification is possible
146
between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
147
specifies a general category and the other specifies a particular category. The
148
row is selected by the general category and the column by the particular
149
category. The value is 1 if the particular category is not part of the general
150
category. */
151
152
static const uint8_t catposstab[7][30] = {
153
/* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
154
  { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* C */
155
  { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* L */
156
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* M */
157
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* N */
158
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },  /* P */
159
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 },  /* S */
160
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 }   /* Z */
161
};
162
163
/* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
164
a general or particular category. The properties in each row are those
165
that apply to the character set in question. Duplication means that a little
166
unnecessary work is done when checking, but this keeps things much simpler
167
because they can all use the same code. For more details see the comment where
168
this table is used.
169
170
Note: SPACE and PXSPACE used to be different because Perl excluded VT from
171
"space", but from Perl 5.18 it's included, so both categories are treated the
172
same here. */
173
174
static const uint8_t posspropstab[3][4] = {
175
  { ucp_L, ucp_N, ucp_N, ucp_Nl },  /* ALNUM, 3rd and 4th values redundant */
176
  { ucp_Z, ucp_Z, ucp_C, ucp_Cc },  /* SPACE and PXSPACE, 2nd value redundant */
177
  { ucp_L, ucp_N, ucp_P, ucp_Po }   /* WORD */
178
};
179
#endif  /* SUPPORT_UNICODE */
180
181
182
183
#ifdef SUPPORT_UNICODE
184
/*************************************************
185
*        Check a character and a property        *
186
*************************************************/
187
188
/* This function is called by compare_opcodes() when a property item is
189
adjacent to a fixed character.
190
191
Arguments:
192
  c            the character
193
  ptype        the property type
194
  pdata        the data for the type
195
  negated      TRUE if it's a negated property (\P or \p{^)
196
197
Returns:       TRUE if auto-possessifying is OK
198
*/
199
200
static BOOL
201
check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
202
  BOOL negated)
203
998k
{
204
998k
BOOL ok, rc;
205
998k
const uint32_t *p;
206
998k
const ucd_record *prop = GET_UCD(c);
207
208
998k
switch(ptype)
209
998k
  {
210
35.5k
  case PT_LAMP:
211
35.5k
  return (prop->chartype == ucp_Lu ||
212
24.6k
          prop->chartype == ucp_Ll ||
213
14.1k
          prop->chartype == ucp_Lt) == negated;
214
215
79.2k
  case PT_GC:
216
79.2k
  return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
217
218
118k
  case PT_PC:
219
118k
  return (pdata == prop->chartype) == negated;
220
221
32.7k
  case PT_SC:
222
32.7k
  return (pdata == prop->script) == negated;
223
224
82.2k
  case PT_SCX:
225
82.2k
  ok = (pdata == prop->script
226
72.2k
        || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
227
82.2k
  return ok == negated;
228
229
  /* These are specials */
230
231
10.2k
  case PT_ALNUM:
232
10.2k
  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
233
6.93k
          PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
234
235
  /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
236
  means that Perl space and POSIX space are now identical. PCRE was changed
237
  at release 8.34. */
238
239
354k
  case PT_SPACE:    /* Perl space */
240
359k
  case PT_PXSPACE:  /* POSIX space */
241
359k
  switch(c)
242
359k
    {
243
1.09M
    HSPACE_CASES:
244
1.09M
    VSPACE_CASES:
245
155k
    rc = negated;
246
155k
    break;
247
248
203k
    default:
249
203k
    rc = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
250
359k
    }
251
359k
  return rc;
252
253
150k
  case PT_WORD:
254
150k
  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
255
77.7k
          PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
256
69.1k
          c == CHAR_UNDERSCORE) == negated;
257
258
0
  case PT_CLIST:
259
0
  p = PRIV(ucd_caseless_sets) + prop->caseset;
260
0
  for (;;)
261
0
    {
262
0
    if (c < *p) return !negated;
263
0
    if (c == *p++) return negated;
264
0
    }
265
  /* LCOV_EXCL_START */
266
0
  PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
267
0
  break;
268
  /* LCOV_EXCL_STOP */
269
270
  /* Haven't yet thought these through. */
271
272
20.9k
  case PT_BIDICL:
273
20.9k
  return FALSE;
274
275
48.8k
  case PT_BOOL:
276
48.8k
  return FALSE;
277
998k
  }
278
279
59.8k
return FALSE;
280
998k
}
pcre2_auto_possess.c:check_char_prop
Line
Count
Source
203
661k
{
204
661k
BOOL ok, rc;
205
661k
const uint32_t *p;
206
661k
const ucd_record *prop = GET_UCD(c);
207
208
661k
switch(ptype)
209
661k
  {
210
27.1k
  case PT_LAMP:
211
27.1k
  return (prop->chartype == ucp_Lu ||
212
19.0k
          prop->chartype == ucp_Ll ||
213
10.4k
          prop->chartype == ucp_Lt) == negated;
214
215
46.2k
  case PT_GC:
216
46.2k
  return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
217
218
76.6k
  case PT_PC:
219
76.6k
  return (pdata == prop->chartype) == negated;
220
221
28.3k
  case PT_SC:
222
28.3k
  return (pdata == prop->script) == negated;
223
224
50.6k
  case PT_SCX:
225
50.6k
  ok = (pdata == prop->script
226
41.9k
        || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
227
50.6k
  return ok == negated;
228
229
  /* These are specials */
230
231
7.37k
  case PT_ALNUM:
232
7.37k
  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
233
4.57k
          PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
234
235
  /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
236
  means that Perl space and POSIX space are now identical. PCRE was changed
237
  at release 8.34. */
238
239
224k
  case PT_SPACE:    /* Perl space */
240
227k
  case PT_PXSPACE:  /* POSIX space */
241
227k
  switch(c)
242
227k
    {
243
752k
    HSPACE_CASES:
244
752k
    VSPACE_CASES:
245
108k
    rc = negated;
246
108k
    break;
247
248
119k
    default:
249
119k
    rc = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
250
227k
    }
251
227k
  return rc;
252
253
100k
  case PT_WORD:
254
100k
  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
255
51.3k
          PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
256
44.2k
          c == CHAR_UNDERSCORE) == negated;
257
258
0
  case PT_CLIST:
259
0
  p = PRIV(ucd_caseless_sets) + prop->caseset;
260
0
  for (;;)
261
0
    {
262
0
    if (c < *p) return !negated;
263
0
    if (c == *p++) return negated;
264
0
    }
265
  /* LCOV_EXCL_START */
266
0
  PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
267
0
  break;
268
  /* LCOV_EXCL_STOP */
269
270
  /* Haven't yet thought these through. */
271
272
15.5k
  case PT_BIDICL:
273
15.5k
  return FALSE;
274
275
21.9k
  case PT_BOOL:
276
21.9k
  return FALSE;
277
661k
  }
278
279
59.4k
return FALSE;
280
661k
}
pcre2_auto_possess.c:check_char_prop
Line
Count
Source
203
336k
{
204
336k
BOOL ok, rc;
205
336k
const uint32_t *p;
206
336k
const ucd_record *prop = GET_UCD(c);
207
208
336k
switch(ptype)
209
336k
  {
210
8.45k
  case PT_LAMP:
211
8.45k
  return (prop->chartype == ucp_Lu ||
212
5.58k
          prop->chartype == ucp_Ll ||
213
3.78k
          prop->chartype == ucp_Lt) == negated;
214
215
33.0k
  case PT_GC:
216
33.0k
  return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
217
218
41.9k
  case PT_PC:
219
41.9k
  return (pdata == prop->chartype) == negated;
220
221
4.48k
  case PT_SC:
222
4.48k
  return (pdata == prop->script) == negated;
223
224
31.6k
  case PT_SCX:
225
31.6k
  ok = (pdata == prop->script
226
30.2k
        || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
227
31.6k
  return ok == negated;
228
229
  /* These are specials */
230
231
2.90k
  case PT_ALNUM:
232
2.90k
  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
233
2.35k
          PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
234
235
  /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
236
  means that Perl space and POSIX space are now identical. PCRE was changed
237
  at release 8.34. */
238
239
130k
  case PT_SPACE:    /* Perl space */
240
131k
  case PT_PXSPACE:  /* POSIX space */
241
131k
  switch(c)
242
131k
    {
243
345k
    HSPACE_CASES:
244
345k
    VSPACE_CASES:
245
47.3k
    rc = negated;
246
47.3k
    break;
247
248
84.5k
    default:
249
84.5k
    rc = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
250
131k
    }
251
131k
  return rc;
252
253
49.9k
  case PT_WORD:
254
49.9k
  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
255
26.4k
          PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
256
24.8k
          c == CHAR_UNDERSCORE) == negated;
257
258
0
  case PT_CLIST:
259
0
  p = PRIV(ucd_caseless_sets) + prop->caseset;
260
0
  for (;;)
261
0
    {
262
0
    if (c < *p) return !negated;
263
0
    if (c == *p++) return negated;
264
0
    }
265
  /* LCOV_EXCL_START */
266
0
  PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
267
0
  break;
268
  /* LCOV_EXCL_STOP */
269
270
  /* Haven't yet thought these through. */
271
272
5.36k
  case PT_BIDICL:
273
5.36k
  return FALSE;
274
275
26.9k
  case PT_BOOL:
276
26.9k
  return FALSE;
277
336k
  }
278
279
364
return FALSE;
280
336k
}
281
#endif  /* SUPPORT_UNICODE */
282
283
284
285
/*************************************************
286
*        Base opcode of repeated opcodes         *
287
*************************************************/
288
289
/* Returns the base opcode for repeated single character type opcodes. If the
290
opcode is not a repeated character type, it returns with the original value.
291
292
Arguments:  c opcode
293
Returns:    base opcode for the type
294
*/
295
296
static PCRE2_UCHAR
297
get_repeat_base(PCRE2_UCHAR c)
298
129M
{
299
129M
return (c > OP_TYPEPOSUPTO)? c :
300
129M
       (c >= OP_TYPESTAR)?   OP_TYPESTAR :
301
129M
       (c >= OP_NOTSTARI)?   OP_NOTSTARI :
302
86.5M
       (c >= OP_NOTSTAR)?    OP_NOTSTAR :
303
85.6M
       (c >= OP_STARI)?      OP_STARI :
304
82.6M
                             OP_STAR;
305
129M
}
306
307
308
/*************************************************
309
*        Fill the character property list        *
310
*************************************************/
311
312
/* Checks whether the code points to an opcode that can take part in auto-
313
possessification, and if so, fills a list with its properties.
314
315
Arguments:
316
  code        points to start of expression
317
  utf         TRUE if in UTF mode
318
  ucp         TRUE if in UCP mode
319
  fcc         points to the case-flipping table
320
  list        points to output list
321
              list[0] will be filled with the opcode
322
              list[1] will be non-zero if this opcode
323
                can match an empty character string
324
              list[2..7] depends on the opcode
325
326
Returns:      points to the start of the next opcode if *code is accepted
327
              NULL if *code is not accepted
328
*/
329
330
static PCRE2_SPTR
331
get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
332
  uint32_t *list)
333
89.4M
{
334
89.4M
PCRE2_UCHAR c = *code;
335
89.4M
PCRE2_UCHAR base;
336
89.4M
PCRE2_SPTR end;
337
89.4M
PCRE2_SPTR class_end;
338
89.4M
uint32_t chr;
339
340
89.4M
#ifdef SUPPORT_UNICODE
341
89.4M
uint32_t *clist_dest;
342
89.4M
const uint32_t *clist_src;
343
#else
344
(void)utf;    /* Suppress "unused parameter" compiler warnings */
345
(void)ucp;
346
#endif
347
348
89.4M
list[0] = c;
349
89.4M
list[1] = FALSE;
350
89.4M
code++;
351
352
89.4M
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
353
64.2M
  {
354
64.2M
  base = get_repeat_base(c);
355
64.2M
  c -= (base - OP_STAR);
356
357
64.2M
  if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
358
3.00M
    code += IMM2_SIZE;
359
360
64.2M
  list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
361
46.5M
             c != OP_POSPLUS);
362
363
64.2M
  switch(base)
364
64.2M
    {
365
33.3M
    case OP_STAR:
366
33.3M
    list[0] = OP_CHAR;
367
33.3M
    break;
368
369
7.73M
    case OP_STARI:
370
7.73M
    list[0] = OP_CHARI;
371
7.73M
    break;
372
373
1.41M
    case OP_NOTSTAR:
374
1.41M
    list[0] = OP_NOT;
375
1.41M
    break;
376
377
420k
    case OP_NOTSTARI:
378
420k
    list[0] = OP_NOTI;
379
420k
    break;
380
381
21.3M
    case OP_TYPESTAR:
382
21.3M
    list[0] = *code;
383
21.3M
    code++;
384
21.3M
    break;
385
64.2M
    }
386
64.2M
  c = list[0];
387
64.2M
  }
388
389
89.4M
switch(c)
390
89.4M
  {
391
486k
  case OP_NOT_DIGIT:
392
3.19M
  case OP_DIGIT:
393
5.18M
  case OP_NOT_WHITESPACE:
394
5.65M
  case OP_WHITESPACE:
395
7.30M
  case OP_NOT_WORDCHAR:
396
8.41M
  case OP_WORDCHAR:
397
11.6M
  case OP_ANY:
398
12.3M
  case OP_ALLANY:
399
13.9M
  case OP_ANYNL:
400
15.1M
  case OP_NOT_HSPACE:
401
16.2M
  case OP_HSPACE:
402
17.0M
  case OP_NOT_VSPACE:
403
17.4M
  case OP_VSPACE:
404
18.7M
  case OP_EXTUNI:
405
18.8M
  case OP_EODN:
406
18.9M
  case OP_EOD:
407
19.1M
  case OP_DOLL:
408
19.2M
  case OP_DOLLM:
409
19.2M
  return code;
410
411
45.7M
  case OP_CHAR:
412
47.1M
  case OP_NOT:
413
47.1M
  GETCHARINCTEST(chr, code);
414
47.1M
  list[2] = chr;
415
47.1M
  list[3] = NOTACHAR;
416
47.1M
  return code;
417
418
11.6M
  case OP_CHARI:
419
12.0M
  case OP_NOTI:
420
12.0M
  list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
421
12.0M
  GETCHARINCTEST(chr, code);
422
12.0M
  list[2] = chr;
423
424
12.0M
#ifdef SUPPORT_UNICODE
425
12.0M
  if (chr < 128 || (chr < 256 && !utf && !ucp))
426
7.41M
    list[3] = fcc[chr];
427
4.65M
  else
428
4.65M
    list[3] = UCD_OTHERCASE(chr);
429
#elif defined SUPPORT_WIDE_CHARS
430
  list[3] = (chr < 256) ? fcc[chr] : chr;
431
#else
432
  list[3] = fcc[chr];
433
#endif
434
435
  /* The othercase might be the same value. */
436
437
12.0M
  if (chr == list[3])
438
9.17M
    list[3] = NOTACHAR;
439
2.89M
  else
440
2.89M
    list[4] = NOTACHAR;
441
12.0M
  return code;
442
443
0
#ifdef SUPPORT_UNICODE
444
1.95M
  case OP_PROP:
445
4.35M
  case OP_NOTPROP:
446
4.35M
  if (code[0] != PT_CLIST)
447
3.89M
    {
448
3.89M
    list[2] = code[0];
449
3.89M
    list[3] = code[1];
450
3.89M
    return code + 2;
451
3.89M
    }
452
453
  /* Convert only if we have enough space. */
454
455
456k
  clist_src = PRIV(ucd_caseless_sets) + code[1];
456
456k
  clist_dest = list + 2;
457
456k
  code += 2;
458
459
1.83M
  do {
460
1.83M
     if (clist_dest >= list + MAX_LIST)
461
0
       {
462
       /* Early return if there is not enough space. GenerateUcd.py
463
       generated a list with more than 5 characters and something
464
       must be done about that going forward. */
465
0
       PCRE2_DEBUG_UNREACHABLE();   /* Remove if it ever triggers */
466
0
       list[2] = code[0];
467
0
       list[3] = code[1];
468
0
       return code;
469
0
       }
470
1.83M
     *clist_dest++ = *clist_src;
471
1.83M
     }
472
1.83M
  while(*clist_src++ != NOTACHAR);
473
474
  /* All characters are stored. The terminating NOTACHAR is copied from the
475
  clist itself. */
476
477
456k
  list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
478
456k
  return code;
479
0
#endif
480
481
761k
  case OP_NCLASS:
482
2.51M
  case OP_CLASS:
483
2.51M
#ifdef SUPPORT_WIDE_CHARS
484
4.25M
  case OP_XCLASS:
485
4.45M
  case OP_ECLASS:
486
4.45M
  if (c == OP_XCLASS || c == OP_ECLASS)
487
1.93M
    end = code + GET(code, 0) - 1;
488
2.51M
  else
489
2.51M
#endif
490
2.51M
    end = code + 32 / sizeof(PCRE2_UCHAR);
491
4.45M
  class_end = end;
492
493
4.45M
  switch(*end)
494
4.45M
    {
495
506k
    case OP_CRSTAR:
496
625k
    case OP_CRMINSTAR:
497
1.44M
    case OP_CRQUERY:
498
1.61M
    case OP_CRMINQUERY:
499
1.62M
    case OP_CRPOSSTAR:
500
1.62M
    case OP_CRPOSQUERY:
501
1.62M
    list[1] = TRUE;
502
1.62M
    end++;
503
1.62M
    break;
504
505
1.27M
    case OP_CRPLUS:
506
1.55M
    case OP_CRMINPLUS:
507
1.57M
    case OP_CRPOSPLUS:
508
1.57M
    end++;
509
1.57M
    break;
510
511
558k
    case OP_CRRANGE:
512
793k
    case OP_CRMINRANGE:
513
805k
    case OP_CRPOSRANGE:
514
805k
    list[1] = (GET2(end, 1) == 0);
515
805k
    end += 1 + 2 * IMM2_SIZE;
516
805k
    break;
517
4.45M
    }
518
4.45M
  list[2] = (uint32_t)(end - code);
519
4.45M
  list[3] = (uint32_t)(end - class_end);
520
4.45M
  return end;
521
89.4M
  }
522
523
2.15M
return NULL;    /* Opcode not accepted */
524
89.4M
}
pcre2_auto_possess.c:get_chr_property_list
Line
Count
Source
333
44.8M
{
334
44.8M
PCRE2_UCHAR c = *code;
335
44.8M
PCRE2_UCHAR base;
336
44.8M
PCRE2_SPTR end;
337
44.8M
PCRE2_SPTR class_end;
338
44.8M
uint32_t chr;
339
340
44.8M
#ifdef SUPPORT_UNICODE
341
44.8M
uint32_t *clist_dest;
342
44.8M
const uint32_t *clist_src;
343
#else
344
(void)utf;    /* Suppress "unused parameter" compiler warnings */
345
(void)ucp;
346
#endif
347
348
44.8M
list[0] = c;
349
44.8M
list[1] = FALSE;
350
44.8M
code++;
351
352
44.8M
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
353
33.9M
  {
354
33.9M
  base = get_repeat_base(c);
355
33.9M
  c -= (base - OP_STAR);
356
357
33.9M
  if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
358
1.38M
    code += IMM2_SIZE;
359
360
33.9M
  list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
361
24.9M
             c != OP_POSPLUS);
362
363
33.9M
  switch(base)
364
33.9M
    {
365
19.7M
    case OP_STAR:
366
19.7M
    list[0] = OP_CHAR;
367
19.7M
    break;
368
369
3.90M
    case OP_STARI:
370
3.90M
    list[0] = OP_CHARI;
371
3.90M
    break;
372
373
688k
    case OP_NOTSTAR:
374
688k
    list[0] = OP_NOT;
375
688k
    break;
376
377
258k
    case OP_NOTSTARI:
378
258k
    list[0] = OP_NOTI;
379
258k
    break;
380
381
9.36M
    case OP_TYPESTAR:
382
9.36M
    list[0] = *code;
383
9.36M
    code++;
384
9.36M
    break;
385
33.9M
    }
386
33.9M
  c = list[0];
387
33.9M
  }
388
389
44.8M
switch(c)
390
44.8M
  {
391
269k
  case OP_NOT_DIGIT:
392
720k
  case OP_DIGIT:
393
1.45M
  case OP_NOT_WHITESPACE:
394
1.65M
  case OP_WHITESPACE:
395
2.07M
  case OP_NOT_WORDCHAR:
396
2.86M
  case OP_WORDCHAR:
397
5.45M
  case OP_ANY:
398
5.74M
  case OP_ALLANY:
399
6.19M
  case OP_ANYNL:
400
6.89M
  case OP_NOT_HSPACE:
401
7.67M
  case OP_HSPACE:
402
7.81M
  case OP_NOT_VSPACE:
403
7.91M
  case OP_VSPACE:
404
8.49M
  case OP_EXTUNI:
405
8.52M
  case OP_EODN:
406
8.54M
  case OP_EOD:
407
8.66M
  case OP_DOLL:
408
8.69M
  case OP_DOLLM:
409
8.69M
  return code;
410
411
24.9M
  case OP_CHAR:
412
25.6M
  case OP_NOT:
413
25.6M
  GETCHARINCTEST(chr, code);
414
25.6M
  list[2] = chr;
415
25.6M
  list[3] = NOTACHAR;
416
25.6M
  return code;
417
418
5.42M
  case OP_CHARI:
419
5.72M
  case OP_NOTI:
420
5.72M
  list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
421
5.72M
  GETCHARINCTEST(chr, code);
422
5.72M
  list[2] = chr;
423
424
5.72M
#ifdef SUPPORT_UNICODE
425
5.72M
  if (chr < 128 || (chr < 256 && !utf && !ucp))
426
4.88M
    list[3] = fcc[chr];
427
845k
  else
428
845k
    list[3] = UCD_OTHERCASE(chr);
429
#elif defined SUPPORT_WIDE_CHARS
430
  list[3] = (chr < 256) ? fcc[chr] : chr;
431
#else
432
  list[3] = fcc[chr];
433
#endif
434
435
  /* The othercase might be the same value. */
436
437
5.72M
  if (chr == list[3])
438
3.93M
    list[3] = NOTACHAR;
439
1.79M
  else
440
1.79M
    list[4] = NOTACHAR;
441
5.72M
  return code;
442
443
0
#ifdef SUPPORT_UNICODE
444
595k
  case OP_PROP:
445
1.65M
  case OP_NOTPROP:
446
1.65M
  if (code[0] != PT_CLIST)
447
1.40M
    {
448
1.40M
    list[2] = code[0];
449
1.40M
    list[3] = code[1];
450
1.40M
    return code + 2;
451
1.40M
    }
452
453
  /* Convert only if we have enough space. */
454
455
253k
  clist_src = PRIV(ucd_caseless_sets) + code[1];
456
253k
  clist_dest = list + 2;
457
253k
  code += 2;
458
459
1.01M
  do {
460
1.01M
     if (clist_dest >= list + MAX_LIST)
461
0
       {
462
       /* Early return if there is not enough space. GenerateUcd.py
463
       generated a list with more than 5 characters and something
464
       must be done about that going forward. */
465
0
       PCRE2_DEBUG_UNREACHABLE();   /* Remove if it ever triggers */
466
0
       list[2] = code[0];
467
0
       list[3] = code[1];
468
0
       return code;
469
0
       }
470
1.01M
     *clist_dest++ = *clist_src;
471
1.01M
     }
472
1.01M
  while(*clist_src++ != NOTACHAR);
473
474
  /* All characters are stored. The terminating NOTACHAR is copied from the
475
  clist itself. */
476
477
253k
  list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
478
253k
  return code;
479
0
#endif
480
481
593k
  case OP_NCLASS:
482
1.97M
  case OP_CLASS:
483
1.97M
#ifdef SUPPORT_WIDE_CHARS
484
2.30M
  case OP_XCLASS:
485
2.42M
  case OP_ECLASS:
486
2.42M
  if (c == OP_XCLASS || c == OP_ECLASS)
487
453k
    end = code + GET(code, 0) - 1;
488
1.97M
  else
489
1.97M
#endif
490
1.97M
    end = code + 32 / sizeof(PCRE2_UCHAR);
491
2.42M
  class_end = end;
492
493
2.42M
  switch(*end)
494
2.42M
    {
495
248k
    case OP_CRSTAR:
496
286k
    case OP_CRMINSTAR:
497
667k
    case OP_CRQUERY:
498
767k
    case OP_CRMINQUERY:
499
770k
    case OP_CRPOSSTAR:
500
774k
    case OP_CRPOSQUERY:
501
774k
    list[1] = TRUE;
502
774k
    end++;
503
774k
    break;
504
505
890k
    case OP_CRPLUS:
506
1.07M
    case OP_CRMINPLUS:
507
1.08M
    case OP_CRPOSPLUS:
508
1.08M
    end++;
509
1.08M
    break;
510
511
230k
    case OP_CRRANGE:
512
345k
    case OP_CRMINRANGE:
513
350k
    case OP_CRPOSRANGE:
514
350k
    list[1] = (GET2(end, 1) == 0);
515
350k
    end += 1 + 2 * IMM2_SIZE;
516
350k
    break;
517
2.42M
    }
518
2.42M
  list[2] = (uint32_t)(end - code);
519
2.42M
  list[3] = (uint32_t)(end - class_end);
520
2.42M
  return end;
521
44.8M
  }
522
523
694k
return NULL;    /* Opcode not accepted */
524
44.8M
}
pcre2_auto_possess.c:get_chr_property_list
Line
Count
Source
333
17.6M
{
334
17.6M
PCRE2_UCHAR c = *code;
335
17.6M
PCRE2_UCHAR base;
336
17.6M
PCRE2_SPTR end;
337
17.6M
PCRE2_SPTR class_end;
338
17.6M
uint32_t chr;
339
340
17.6M
#ifdef SUPPORT_UNICODE
341
17.6M
uint32_t *clist_dest;
342
17.6M
const uint32_t *clist_src;
343
#else
344
(void)utf;    /* Suppress "unused parameter" compiler warnings */
345
(void)ucp;
346
#endif
347
348
17.6M
list[0] = c;
349
17.6M
list[1] = FALSE;
350
17.6M
code++;
351
352
17.6M
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
353
11.2M
  {
354
11.2M
  base = get_repeat_base(c);
355
11.2M
  c -= (base - OP_STAR);
356
357
11.2M
  if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
358
652k
    code += IMM2_SIZE;
359
360
11.2M
  list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
361
8.17M
             c != OP_POSPLUS);
362
363
11.2M
  switch(base)
364
11.2M
    {
365
5.67M
    case OP_STAR:
366
5.67M
    list[0] = OP_CHAR;
367
5.67M
    break;
368
369
2.09M
    case OP_STARI:
370
2.09M
    list[0] = OP_CHARI;
371
2.09M
    break;
372
373
212k
    case OP_NOTSTAR:
374
212k
    list[0] = OP_NOT;
375
212k
    break;
376
377
73.2k
    case OP_NOTSTARI:
378
73.2k
    list[0] = OP_NOTI;
379
73.2k
    break;
380
381
3.17M
    case OP_TYPESTAR:
382
3.17M
    list[0] = *code;
383
3.17M
    code++;
384
3.17M
    break;
385
11.2M
    }
386
11.2M
  c = list[0];
387
11.2M
  }
388
389
17.6M
switch(c)
390
17.6M
  {
391
64.4k
  case OP_NOT_DIGIT:
392
201k
  case OP_DIGIT:
393
780k
  case OP_NOT_WHITESPACE:
394
893k
  case OP_WHITESPACE:
395
998k
  case OP_NOT_WORDCHAR:
396
1.12M
  case OP_WORDCHAR:
397
1.25M
  case OP_ANY:
398
1.29M
  case OP_ALLANY:
399
1.72M
  case OP_ANYNL:
400
1.89M
  case OP_NOT_HSPACE:
401
2.05M
  case OP_HSPACE:
402
2.27M
  case OP_NOT_VSPACE:
403
2.34M
  case OP_VSPACE:
404
2.72M
  case OP_EXTUNI:
405
2.74M
  case OP_EODN:
406
2.76M
  case OP_EOD:
407
2.80M
  case OP_DOLL:
408
2.84M
  case OP_DOLLM:
409
2.84M
  return code;
410
411
8.95M
  case OP_CHAR:
412
9.16M
  case OP_NOT:
413
9.16M
  GETCHARINCTEST(chr, code);
414
9.16M
  list[2] = chr;
415
9.16M
  list[3] = NOTACHAR;
416
9.16M
  return code;
417
418
3.34M
  case OP_CHARI:
419
3.41M
  case OP_NOTI:
420
3.41M
  list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
421
3.41M
  GETCHARINCTEST(chr, code);
422
3.41M
  list[2] = chr;
423
424
3.41M
#ifdef SUPPORT_UNICODE
425
3.41M
  if (chr < 128 || (chr < 256 && !utf && !ucp))
426
1.23M
    list[3] = fcc[chr];
427
2.18M
  else
428
2.18M
    list[3] = UCD_OTHERCASE(chr);
429
#elif defined SUPPORT_WIDE_CHARS
430
  list[3] = (chr < 256) ? fcc[chr] : chr;
431
#else
432
  list[3] = fcc[chr];
433
#endif
434
435
  /* The othercase might be the same value. */
436
437
3.41M
  if (chr == list[3])
438
2.86M
    list[3] = NOTACHAR;
439
549k
  else
440
549k
    list[4] = NOTACHAR;
441
3.41M
  return code;
442
443
0
#ifdef SUPPORT_UNICODE
444
447k
  case OP_PROP:
445
839k
  case OP_NOTPROP:
446
839k
  if (code[0] != PT_CLIST)
447
748k
    {
448
748k
    list[2] = code[0];
449
748k
    list[3] = code[1];
450
748k
    return code + 2;
451
748k
    }
452
453
  /* Convert only if we have enough space. */
454
455
91.0k
  clist_src = PRIV(ucd_caseless_sets) + code[1];
456
91.0k
  clist_dest = list + 2;
457
91.0k
  code += 2;
458
459
365k
  do {
460
365k
     if (clist_dest >= list + MAX_LIST)
461
0
       {
462
       /* Early return if there is not enough space. GenerateUcd.py
463
       generated a list with more than 5 characters and something
464
       must be done about that going forward. */
465
0
       PCRE2_DEBUG_UNREACHABLE();   /* Remove if it ever triggers */
466
0
       list[2] = code[0];
467
0
       list[3] = code[1];
468
0
       return code;
469
0
       }
470
365k
     *clist_dest++ = *clist_src;
471
365k
     }
472
365k
  while(*clist_src++ != NOTACHAR);
473
474
  /* All characters are stored. The terminating NOTACHAR is copied from the
475
  clist itself. */
476
477
91.0k
  list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
478
91.0k
  return code;
479
0
#endif
480
481
70.6k
  case OP_NCLASS:
482
215k
  case OP_CLASS:
483
215k
#ifdef SUPPORT_WIDE_CHARS
484
652k
  case OP_XCLASS:
485
688k
  case OP_ECLASS:
486
688k
  if (c == OP_XCLASS || c == OP_ECLASS)
487
472k
    end = code + GET(code, 0) - 1;
488
215k
  else
489
215k
#endif
490
215k
    end = code + 32 / sizeof(PCRE2_UCHAR);
491
688k
  class_end = end;
492
493
688k
  switch(*end)
494
688k
    {
495
114k
    case OP_CRSTAR:
496
164k
    case OP_CRMINSTAR:
497
265k
    case OP_CRQUERY:
498
302k
    case OP_CRMINQUERY:
499
304k
    case OP_CRPOSSTAR:
500
305k
    case OP_CRPOSQUERY:
501
305k
    list[1] = TRUE;
502
305k
    end++;
503
305k
    break;
504
505
113k
    case OP_CRPLUS:
506
139k
    case OP_CRMINPLUS:
507
144k
    case OP_CRPOSPLUS:
508
144k
    end++;
509
144k
    break;
510
511
123k
    case OP_CRRANGE:
512
162k
    case OP_CRMINRANGE:
513
163k
    case OP_CRPOSRANGE:
514
163k
    list[1] = (GET2(end, 1) == 0);
515
163k
    end += 1 + 2 * IMM2_SIZE;
516
163k
    break;
517
688k
    }
518
688k
  list[2] = (uint32_t)(end - code);
519
688k
  list[3] = (uint32_t)(end - class_end);
520
688k
  return end;
521
17.6M
  }
522
523
708k
return NULL;    /* Opcode not accepted */
524
17.6M
}
pcre2_auto_possess.c:get_chr_property_list
Line
Count
Source
333
26.8M
{
334
26.8M
PCRE2_UCHAR c = *code;
335
26.8M
PCRE2_UCHAR base;
336
26.8M
PCRE2_SPTR end;
337
26.8M
PCRE2_SPTR class_end;
338
26.8M
uint32_t chr;
339
340
26.8M
#ifdef SUPPORT_UNICODE
341
26.8M
uint32_t *clist_dest;
342
26.8M
const uint32_t *clist_src;
343
#else
344
(void)utf;    /* Suppress "unused parameter" compiler warnings */
345
(void)ucp;
346
#endif
347
348
26.8M
list[0] = c;
349
26.8M
list[1] = FALSE;
350
26.8M
code++;
351
352
26.8M
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
353
19.0M
  {
354
19.0M
  base = get_repeat_base(c);
355
19.0M
  c -= (base - OP_STAR);
356
357
19.0M
  if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
358
970k
    code += IMM2_SIZE;
359
360
19.0M
  list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
361
13.4M
             c != OP_POSPLUS);
362
363
19.0M
  switch(base)
364
19.0M
    {
365
7.86M
    case OP_STAR:
366
7.86M
    list[0] = OP_CHAR;
367
7.86M
    break;
368
369
1.73M
    case OP_STARI:
370
1.73M
    list[0] = OP_CHARI;
371
1.73M
    break;
372
373
517k
    case OP_NOTSTAR:
374
517k
    list[0] = OP_NOT;
375
517k
    break;
376
377
88.9k
    case OP_NOTSTARI:
378
88.9k
    list[0] = OP_NOTI;
379
88.9k
    break;
380
381
8.84M
    case OP_TYPESTAR:
382
8.84M
    list[0] = *code;
383
8.84M
    code++;
384
8.84M
    break;
385
19.0M
    }
386
19.0M
  c = list[0];
387
19.0M
  }
388
389
26.8M
switch(c)
390
26.8M
  {
391
152k
  case OP_NOT_DIGIT:
392
2.27M
  case OP_DIGIT:
393
2.94M
  case OP_NOT_WHITESPACE:
394
3.11M
  case OP_WHITESPACE:
395
4.23M
  case OP_NOT_WORDCHAR:
396
4.42M
  case OP_WORDCHAR:
397
4.95M
  case OP_ANY:
398
5.32M
  case OP_ALLANY:
399
6.02M
  case OP_ANYNL:
400
6.36M
  case OP_NOT_HSPACE:
401
6.56M
  case OP_HSPACE:
402
6.93M
  case OP_NOT_VSPACE:
403
7.17M
  case OP_VSPACE:
404
7.57M
  case OP_EXTUNI:
405
7.59M
  case OP_EODN:
406
7.60M
  case OP_EOD:
407
7.67M
  case OP_DOLL:
408
7.68M
  case OP_DOLLM:
409
7.68M
  return code;
410
411
11.8M
  case OP_CHAR:
412
12.3M
  case OP_NOT:
413
12.3M
  GETCHARINCTEST(chr, code);
414
12.3M
  list[2] = chr;
415
12.3M
  list[3] = NOTACHAR;
416
12.3M
  return code;
417
418
2.82M
  case OP_CHARI:
419
2.92M
  case OP_NOTI:
420
2.92M
  list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
421
2.92M
  GETCHARINCTEST(chr, code);
422
2.92M
  list[2] = chr;
423
424
2.92M
#ifdef SUPPORT_UNICODE
425
2.92M
  if (chr < 128 || (chr < 256 && !utf && !ucp))
426
1.30M
    list[3] = fcc[chr];
427
1.62M
  else
428
1.62M
    list[3] = UCD_OTHERCASE(chr);
429
#elif defined SUPPORT_WIDE_CHARS
430
  list[3] = (chr < 256) ? fcc[chr] : chr;
431
#else
432
  list[3] = fcc[chr];
433
#endif
434
435
  /* The othercase might be the same value. */
436
437
2.92M
  if (chr == list[3])
438
2.36M
    list[3] = NOTACHAR;
439
552k
  else
440
552k
    list[4] = NOTACHAR;
441
2.92M
  return code;
442
443
0
#ifdef SUPPORT_UNICODE
444
915k
  case OP_PROP:
445
1.85M
  case OP_NOTPROP:
446
1.85M
  if (code[0] != PT_CLIST)
447
1.74M
    {
448
1.74M
    list[2] = code[0];
449
1.74M
    list[3] = code[1];
450
1.74M
    return code + 2;
451
1.74M
    }
452
453
  /* Convert only if we have enough space. */
454
455
112k
  clist_src = PRIV(ucd_caseless_sets) + code[1];
456
112k
  clist_dest = list + 2;
457
112k
  code += 2;
458
459
454k
  do {
460
454k
     if (clist_dest >= list + MAX_LIST)
461
0
       {
462
       /* Early return if there is not enough space. GenerateUcd.py
463
       generated a list with more than 5 characters and something
464
       must be done about that going forward. */
465
0
       PCRE2_DEBUG_UNREACHABLE();   /* Remove if it ever triggers */
466
0
       list[2] = code[0];
467
0
       list[3] = code[1];
468
0
       return code;
469
0
       }
470
454k
     *clist_dest++ = *clist_src;
471
454k
     }
472
454k
  while(*clist_src++ != NOTACHAR);
473
474
  /* All characters are stored. The terminating NOTACHAR is copied from the
475
  clist itself. */
476
477
112k
  list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
478
112k
  return code;
479
0
#endif
480
481
97.8k
  case OP_NCLASS:
482
328k
  case OP_CLASS:
483
328k
#ifdef SUPPORT_WIDE_CHARS
484
1.28M
  case OP_XCLASS:
485
1.34M
  case OP_ECLASS:
486
1.34M
  if (c == OP_XCLASS || c == OP_ECLASS)
487
1.01M
    end = code + GET(code, 0) - 1;
488
328k
  else
489
328k
#endif
490
328k
    end = code + 32 / sizeof(PCRE2_UCHAR);
491
1.34M
  class_end = end;
492
493
1.34M
  switch(*end)
494
1.34M
    {
495
143k
    case OP_CRSTAR:
496
174k
    case OP_CRMINSTAR:
497
507k
    case OP_CRQUERY:
498
545k
    case OP_CRMINQUERY:
499
547k
    case OP_CRPOSSTAR:
500
549k
    case OP_CRPOSQUERY:
501
549k
    list[1] = TRUE;
502
549k
    end++;
503
549k
    break;
504
505
272k
    case OP_CRPLUS:
506
344k
    case OP_CRMINPLUS:
507
346k
    case OP_CRPOSPLUS:
508
346k
    end++;
509
346k
    break;
510
511
203k
    case OP_CRRANGE:
512
285k
    case OP_CRMINRANGE:
513
291k
    case OP_CRPOSRANGE:
514
291k
    list[1] = (GET2(end, 1) == 0);
515
291k
    end += 1 + 2 * IMM2_SIZE;
516
291k
    break;
517
1.34M
    }
518
1.34M
  list[2] = (uint32_t)(end - code);
519
1.34M
  list[3] = (uint32_t)(end - class_end);
520
1.34M
  return end;
521
26.8M
  }
522
523
750k
return NULL;    /* Opcode not accepted */
524
26.8M
}
525
526
527
528
/*************************************************
529
*    Scan further character sets for match       *
530
*************************************************/
531
532
/* Checks whether the base and the current opcode have a common character, in
533
which case the base cannot be possessified.
534
535
Arguments:
536
  code        points to the byte code
537
  utf         TRUE in UTF mode
538
  ucp         TRUE in UCP mode
539
  cb          compile data block
540
  base_list   the data list of the base opcode
541
  base_end    the end of the base opcode
542
  rec_limit   points to recursion depth counter
543
544
Returns:      TRUE if the auto-possessification is possible
545
*/
546
547
static BOOL
548
compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
549
  const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
550
67.3M
{
551
67.3M
PCRE2_UCHAR c;
552
67.3M
uint32_t list[MAX_LIST];
553
67.3M
const uint32_t *chr_ptr;
554
67.3M
const uint32_t *ochr_ptr;
555
67.3M
const uint32_t *list_ptr;
556
67.3M
PCRE2_SPTR next_code;
557
67.3M
#ifdef SUPPORT_WIDE_CHARS
558
67.3M
PCRE2_SPTR xclass_flags;
559
67.3M
#endif
560
67.3M
const uint8_t *class_bitset;
561
67.3M
const uint8_t *set1, *set2, *set_end;
562
67.3M
uint32_t chr;
563
67.3M
BOOL accepted, invert_bits;
564
67.3M
BOOL entered_a_group = FALSE;
565
566
67.3M
if (--(*rec_limit) <= 0) return FALSE;  /* Recursion has gone too deep */
567
568
/* Note: the base_list[1] contains whether the current opcode has a greedy
569
(represented by a non-zero value) quantifier. This is a different from
570
other character type lists, which store here that the character iterator
571
matches to an empty string (also represented by a non-zero value). */
572
573
26.3M
for(;;)
574
76.2M
  {
575
76.2M
  PCRE2_SPTR bracode;
576
577
  /* All operations move the code pointer forward.
578
  Therefore infinite recursions are not possible. */
579
580
76.2M
  c = *code;
581
582
  /* Skip over callouts */
583
584
76.2M
  if (c == OP_CALLOUT)
585
3.36M
    {
586
3.36M
    code += PRIV(OP_lengths)[c];
587
3.36M
    continue;
588
3.36M
    }
589
590
72.9M
  if (c == OP_CALLOUT_STR)
591
20.5k
    {
592
20.5k
    code += GET(code, 1 + 2*LINK_SIZE);
593
20.5k
    continue;
594
20.5k
    }
595
596
  /* At the end of a branch, skip to the end of the group and process it. */
597
598
72.8M
  if (c == OP_ALT)
599
1.85M
    {
600
8.95M
    do code += GET(code, 1); while (*code == OP_ALT);
601
1.85M
    c = *code;
602
1.85M
    }
603
604
  /* Inspect the next opcode. */
605
606
72.8M
  switch(c)
607
72.8M
    {
608
    /* We can always possessify a greedy iterator at the end of the pattern,
609
    which is reached after skipping over the final OP_KET. A non-greedy
610
    iterator must never be possessified. */
611
612
289k
    case OP_END:
613
289k
    return base_list[1] != 0;
614
615
    /* When an iterator is at the end of certain kinds of group we can inspect
616
    what follows the group by skipping over the closing ket. Note that this
617
    does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
618
    iteration is variable (could be another iteration or could be the next
619
    item). As these two opcodes are not listed in the next switch, they will
620
    end up as the next code to inspect, and return FALSE by virtue of being
621
    unsupported. */
622
623
46.1M
    case OP_KET:
624
46.3M
    case OP_KETRPOS:
625
    /* The non-greedy case cannot be converted to a possessive form. */
626
627
46.3M
    if (base_list[1] == 0) return FALSE;
628
629
    /* If the bracket is capturing it might be referenced by an OP_RECURSE
630
    so its last iterator can never be possessified if the pattern contains
631
    recursions. (This could be improved by keeping a list of group numbers that
632
    are called by recursion.) */
633
634
45.3M
    bracode = code - GET(code, 1);
635
45.3M
    switch(*bracode)
636
45.3M
      {
637
2.89M
      case OP_CBRA:
638
2.89M
      case OP_SCBRA:
639
2.93M
      case OP_CBRAPOS:
640
3.00M
      case OP_SCBRAPOS:
641
3.00M
      if (cb->had_recurse) return FALSE;
642
2.58M
      break;
643
644
      /* A script run might have to backtrack if the iterated item can match
645
      characters from more than one script. So give up unless repeating an
646
      explicit character. */
647
648
2.58M
      case OP_SCRIPT_RUN:
649
73.0k
      if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
650
19.4k
        return FALSE;
651
53.6k
      break;
652
653
      /* Atomic sub-patterns and forward assertions can always auto-possessify
654
      their last iterator. However, if the group was entered as a result of
655
      checking a previous iterator, this is not possible. */
656
657
275k
      case OP_ASSERT:
658
426k
      case OP_ASSERT_NOT:
659
518k
      case OP_ONCE:
660
518k
      return !entered_a_group;
661
662
      /* Fixed-length lookbehinds can be treated the same way, but variable
663
      length lookbehinds must not auto-possessify their last iterator. Note
664
      that in order to identify a variable length lookbehind we must check
665
      through all branches, because some may be of fixed length. */
666
667
152k
      case OP_ASSERTBACK:
668
264k
      case OP_ASSERTBACK_NOT:
669
264k
      do
670
293k
        {
671
293k
        if (bracode[1+LINK_SIZE] == OP_VREVERSE) return FALSE;  /* Variable */
672
42.5k
        bracode += GET(bracode, 1);
673
42.5k
        }
674
264k
      while (*bracode == OP_ALT);
675
14.1k
      return !entered_a_group;  /* Not variable length */
676
677
      /* Non-atomic assertions - don't possessify last iterator. This needs
678
      more thought. */
679
680
170k
      case OP_ASSERT_NA:
681
546k
      case OP_ASSERTBACK_NA:
682
546k
      return FALSE;
683
45.3M
      }
684
685
    /* Skip over the bracket and inspect what comes next. */
686
687
43.6M
    code += PRIV(OP_lengths)[c];
688
43.6M
    continue;
689
690
    /* Handle cases where the next item is a group. */
691
692
36.1k
    case OP_ONCE:
693
678k
    case OP_BRA:
694
2.09M
    case OP_CBRA:
695
2.09M
    next_code = code + GET(code, 1);
696
2.09M
    code += PRIV(OP_lengths)[c];
697
698
    /* Check each branch. We have to recurse a level for all but the last
699
    branch. */
700
701
2.75M
    while (*next_code == OP_ALT)
702
1.07M
      {
703
1.07M
      if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
704
424k
        return FALSE;
705
653k
      code = next_code + 1 + LINK_SIZE;
706
653k
      next_code += GET(next_code, 1);
707
653k
      }
708
709
1.67M
    entered_a_group = TRUE;
710
1.67M
    continue;
711
712
517k
    case OP_BRAZERO:
713
559k
    case OP_BRAMINZERO:
714
715
559k
    next_code = code + 1;
716
559k
    if (*next_code != OP_BRA && *next_code != OP_CBRA &&
717
75.1k
        *next_code != OP_ONCE) return FALSE;
718
719
556k
    do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
720
721
    /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
722
723
502k
    next_code += 1 + LINK_SIZE;
724
502k
    if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
725
502k
         rec_limit))
726
55.6k
      return FALSE;
727
728
446k
    code += PRIV(OP_lengths)[c];
729
446k
    continue;
730
731
    /* The next opcode does not need special handling; fall through and use it
732
    to see if the base can be possessified. */
733
734
23.6M
    default:
735
23.6M
    break;
736
72.8M
    }
737
738
  /* We now have the next appropriate opcode to compare with the base. Check
739
  for a supported opcode, and load its properties. */
740
741
23.6M
  code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
742
23.6M
  if (code == NULL) return FALSE;    /* Unsupported */
743
744
  /* If either opcode is a small character list, set pointers for comparing
745
  characters from that list with another list, or with a property. */
746
747
21.4M
  if (base_list[0] == OP_CHAR)
748
13.4M
    {
749
13.4M
    chr_ptr = base_list + 2;
750
13.4M
    list_ptr = list;
751
13.4M
    }
752
8.00M
  else if (list[0] == OP_CHAR)
753
6.37M
    {
754
6.37M
    chr_ptr = list + 2;
755
6.37M
    list_ptr = base_list;
756
6.37M
    }
757
758
  /* Character bitsets can also be compared to certain opcodes. */
759
760
1.63M
  else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
761
#if PCRE2_CODE_UNIT_WIDTH == 8
762
      /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
763
590k
      || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
764
#endif
765
1.63M
      )
766
312k
    {
767
#if PCRE2_CODE_UNIT_WIDTH == 8
768
208k
    if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
769
#else
770
104k
    if (base_list[0] == OP_CLASS)
771
68.5k
#endif
772
191k
      {
773
191k
      set1 = (const uint8_t *)(base_end - base_list[2]);
774
191k
      list_ptr = list;
775
191k
      }
776
120k
    else
777
120k
      {
778
120k
      set1 = (const uint8_t *)(code - list[2]);
779
120k
      list_ptr = base_list;
780
120k
      }
781
782
312k
    invert_bits = FALSE;
783
312k
    switch(list_ptr[0])
784
312k
      {
785
45.0k
      case OP_CLASS:
786
65.8k
      case OP_NCLASS:
787
65.8k
      set2 = (const uint8_t *)
788
65.8k
        ((list_ptr == list ? code : base_end) - list_ptr[2]);
789
65.8k
      break;
790
791
0
#ifdef SUPPORT_WIDE_CHARS
792
44.1k
      case OP_XCLASS:
793
44.1k
      xclass_flags = (list_ptr == list ? code : base_end) -
794
44.1k
        list_ptr[2] + LINK_SIZE;
795
44.1k
      if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
796
33.2k
      if ((*xclass_flags & XCL_MAP) == 0)
797
12.0k
        {
798
        /* No bits are set for characters < 256. */
799
12.0k
        if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
800
        /* Might be an empty repeat. */
801
4.93k
        continue;
802
12.0k
        }
803
21.2k
      set2 = (const uint8_t *)(xclass_flags + 1);
804
21.2k
      break;
805
0
#endif
806
807
15.6k
      case OP_NOT_DIGIT:
808
15.6k
      invert_bits = TRUE;
809
15.6k
      PCRE2_FALLTHROUGH /* Fall through */
810
26.6k
      case OP_DIGIT:
811
26.6k
      set2 = (const uint8_t *)(cb->cbits + cbit_digit);
812
26.6k
      break;
813
814
11.8k
      case OP_NOT_WHITESPACE:
815
11.8k
      invert_bits = TRUE;
816
11.8k
      PCRE2_FALLTHROUGH /* Fall through */
817
23.7k
      case OP_WHITESPACE:
818
23.7k
      set2 = (const uint8_t *)(cb->cbits + cbit_space);
819
23.7k
      break;
820
821
12.3k
      case OP_NOT_WORDCHAR:
822
12.3k
      invert_bits = TRUE;
823
12.3k
      PCRE2_FALLTHROUGH /* Fall through */
824
26.9k
      case OP_WORDCHAR:
825
26.9k
      set2 = (const uint8_t *)(cb->cbits + cbit_word);
826
26.9k
      break;
827
828
125k
      default:
829
125k
      return FALSE;
830
312k
      }
831
832
    /* Because the bit sets are unaligned bytes, we need to perform byte
833
    comparison here. */
834
835
164k
    set_end = set1 + 32;
836
164k
    if (invert_bits)
837
39.8k
      {
838
39.8k
      do
839
561k
        {
840
561k
        if ((*set1++ & ~(*set2++)) != 0) return FALSE;
841
561k
        }
842
533k
      while (set1 < set_end);
843
39.8k
      }
844
124k
    else
845
124k
      {
846
124k
      do
847
2.20M
        {
848
2.20M
        if ((*set1++ & *set2++) != 0) return FALSE;
849
2.20M
        }
850
2.13M
      while (set1 < set_end);
851
124k
      }
852
853
64.4k
    if (list[1] == 0) return TRUE;
854
    /* Might be an empty repeat. */
855
20.3k
    continue;
856
64.4k
    }
857
858
  /* Some property combinations also acceptable. Unicode property opcodes are
859
  processed specially; the rest can be handled with a lookup table. */
860
861
1.32M
  else
862
1.32M
    {
863
1.32M
    uint32_t leftop, rightop;
864
865
1.32M
    leftop = base_list[0];
866
1.32M
    rightop = list[0];
867
868
1.32M
#ifdef SUPPORT_UNICODE
869
1.32M
    accepted = FALSE; /* Always set in non-unicode case. */
870
1.32M
    if (leftop == OP_PROP || leftop == OP_NOTPROP)
871
306k
      {
872
306k
      if (rightop == OP_EOD)
873
9.22k
        accepted = TRUE;
874
296k
      else if (rightop == OP_PROP || rightop == OP_NOTPROP)
875
218k
        {
876
218k
        int n;
877
218k
        const uint8_t *p;
878
218k
        BOOL same = leftop == rightop;
879
218k
        BOOL lisprop = leftop == OP_PROP;
880
218k
        BOOL risprop = rightop == OP_PROP;
881
218k
        BOOL bothprop = lisprop && risprop;
882
883
        /* There's a table that specifies how each combination is to be
884
        processed:
885
          0   Always return FALSE (never auto-possessify)
886
          1   Character groups are distinct (possessify if both are OP_PROP)
887
          2   Check character categories in the same group (general or particular)
888
          3   Return TRUE if the two opcodes are not the same
889
          ... see comments below
890
        */
891
892
218k
        n = propposstab[base_list[2]][list[2]];
893
218k
        switch(n)
894
218k
          {
895
7.64k
          case 0: break;
896
11.6k
          case 1: accepted = bothprop; break;
897
16.1k
          case 2: accepted = (base_list[3] == list[3]) != same; break;
898
16.3k
          case 3: accepted = !same; break;
899
900
16.6k
          case 4:  /* Left general category, right particular category */
901
16.6k
          accepted = risprop && catposstab[base_list[3]][list[3]] == same;
902
16.6k
          break;
903
904
13.8k
          case 5:  /* Right general category, left particular category */
905
13.8k
          accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
906
13.8k
          break;
907
908
          /* This code is logically tricky. Think hard before fiddling with it.
909
          The posspropstab table has four entries per row. Each row relates to
910
          one of PCRE's special properties such as ALNUM or SPACE or WORD.
911
          Only WORD actually needs all four entries, but using repeats for the
912
          others means they can all use the same code below.
913
914
          The first two entries in each row are Unicode general categories, and
915
          apply always, because all the characters they include are part of the
916
          PCRE character set. The third and fourth entries are a general and a
917
          particular category, respectively, that include one or more relevant
918
          characters. One or the other is used, depending on whether the check
919
          is for a general or a particular category. However, in both cases the
920
          category contains more characters than the specials that are defined
921
          for the property being tested against. Therefore, it cannot be used
922
          in a NOTPROP case.
923
924
          Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
925
          Underscore is covered by ucp_P or ucp_Po. */
926
927
4.58k
          case 6:  /* Left alphanum vs right general category */
928
18.2k
          case 7:  /* Left space vs right general category */
929
36.6k
          case 8:  /* Left word vs right general category */
930
36.6k
          p = posspropstab[n-6];
931
36.6k
          accepted = risprop && lisprop ==
932
27.1k
            (list[3] != p[0] &&
933
20.7k
             list[3] != p[1] &&
934
15.3k
            (list[3] != p[2] || !lisprop));
935
36.6k
          break;
936
937
1.87k
          case 9:   /* Right alphanum vs left general category */
938
14.0k
          case 10:  /* Right space vs left general category */
939
28.9k
          case 11:  /* Right word vs left general category */
940
28.9k
          p = posspropstab[n-9];
941
28.9k
          accepted = lisprop && risprop ==
942
21.0k
            (base_list[3] != p[0] &&
943
16.7k
             base_list[3] != p[1] &&
944
10.9k
            (base_list[3] != p[2] || !risprop));
945
28.9k
          break;
946
947
2.44k
          case 12:  /* Left alphanum vs right particular category */
948
26.3k
          case 13:  /* Left space vs right particular category */
949
38.3k
          case 14:  /* Left word vs right particular category */
950
38.3k
          p = posspropstab[n-12];
951
38.3k
          accepted = risprop && lisprop ==
952
26.2k
            (catposstab[p[0]][list[3]] &&
953
20.5k
             catposstab[p[1]][list[3]] &&
954
12.8k
            (list[3] != p[3] || !lisprop));
955
38.3k
          break;
956
957
4.04k
          case 15:  /* Right alphanum vs left particular category */
958
19.6k
          case 16:  /* Right space vs left particular category */
959
31.8k
          case 17:  /* Right word vs left particular category */
960
31.8k
          p = posspropstab[n-15];
961
31.8k
          accepted = lisprop && risprop ==
962
22.6k
            (catposstab[p[0]][base_list[3]] &&
963
19.0k
             catposstab[p[1]][base_list[3]] &&
964
9.58k
            (base_list[3] != p[3] || !risprop));
965
31.8k
          break;
966
218k
          }
967
218k
        }
968
306k
      }
969
970
1.02M
    else
971
1.02M
#endif  /* SUPPORT_UNICODE */
972
973
1.02M
    accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
974
786k
           rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
975
744k
           autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
976
977
1.32M
    if (!accepted) return FALSE;
978
979
211k
    if (list[1] == 0) return TRUE;
980
    /* Might be an empty repeat. */
981
53.5k
    continue;
982
211k
    }
983
984
  /* Control reaches here only if one of the items is a small character list.
985
  All characters are checked against the other side. */
986
987
19.8M
  do
988
20.9M
    {
989
20.9M
    chr = *chr_ptr;
990
991
20.9M
    switch(list_ptr[0])
992
20.9M
      {
993
12.0M
      case OP_CHAR:
994
12.0M
      ochr_ptr = list_ptr + 2;
995
12.0M
      do
996
13.0M
        {
997
13.0M
        if (chr == *ochr_ptr) return FALSE;
998
12.3M
        ochr_ptr++;
999
12.3M
        }
1000
12.3M
      while(*ochr_ptr != NOTACHAR);
1001
11.4M
      break;
1002
1003
11.4M
      case OP_NOT:
1004
793k
      ochr_ptr = list_ptr + 2;
1005
793k
      do
1006
874k
        {
1007
874k
        if (chr == *ochr_ptr)
1008
70.7k
          break;
1009
803k
        ochr_ptr++;
1010
803k
        }
1011
803k
      while(*ochr_ptr != NOTACHAR);
1012
793k
      if (*ochr_ptr == NOTACHAR) return FALSE;   /* Not found */
1013
70.7k
      break;
1014
1015
      /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
1016
      set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
1017
1018
225k
      case OP_DIGIT:
1019
225k
      if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
1020
204k
      break;
1021
1022
204k
      case OP_NOT_DIGIT:
1023
146k
      if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
1024
8.45k
      break;
1025
1026
237k
      case OP_WHITESPACE:
1027
237k
      if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
1028
221k
      break;
1029
1030
539k
      case OP_NOT_WHITESPACE:
1031
539k
      if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
1032
32.9k
      break;
1033
1034
210k
      case OP_WORDCHAR:
1035
210k
      if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
1036
150k
      break;
1037
1038
225k
      case OP_NOT_WORDCHAR:
1039
225k
      if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
1040
53.6k
      break;
1041
1042
408k
      case OP_HSPACE:
1043
408k
      switch(chr)
1044
408k
        {
1045
160k
        HSPACE_CASES: return FALSE;
1046
247k
        default: break;
1047
408k
        }
1048
247k
      break;
1049
1050
379k
      case OP_NOT_HSPACE:
1051
379k
      switch(chr)
1052
379k
        {
1053
143k
        HSPACE_CASES: break;
1054
236k
        default: return FALSE;
1055
379k
        }
1056
143k
      break;
1057
1058
625k
      case OP_ANYNL:
1059
728k
      case OP_VSPACE:
1060
728k
      switch(chr)
1061
728k
        {
1062
127k
        VSPACE_CASES: return FALSE;
1063
601k
        default: break;
1064
728k
        }
1065
601k
      break;
1066
1067
601k
      case OP_NOT_VSPACE:
1068
264k
      switch(chr)
1069
264k
        {
1070
57.9k
        VSPACE_CASES: break;
1071
206k
        default: return FALSE;
1072
264k
        }
1073
57.9k
      break;
1074
1075
162k
      case OP_DOLL:
1076
211k
      case OP_EODN:
1077
211k
      switch (chr)
1078
211k
        {
1079
9.66k
        case CHAR_CR:
1080
20.2k
        case CHAR_LF:
1081
28.4k
        case CHAR_VT:
1082
35.9k
        case CHAR_FF:
1083
42.4k
        case CHAR_NEL:
1084
42.4k
#ifndef EBCDIC
1085
50.8k
        case 0x2028:
1086
57.2k
        case 0x2029:
1087
57.2k
#endif  /* Not EBCDIC */
1088
57.2k
        return FALSE;
1089
211k
        }
1090
153k
      break;
1091
1092
153k
      case OP_EOD:    /* Can always possessify before \z */
1093
26.0k
      break;
1094
1095
0
#ifdef SUPPORT_UNICODE
1096
390k
      case OP_PROP:
1097
998k
      case OP_NOTPROP:
1098
998k
      if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
1099
998k
            list_ptr[0] == OP_NOTPROP))
1100
563k
        return FALSE;
1101
434k
      break;
1102
434k
#endif
1103
1104
434k
      case OP_NCLASS:
1105
362k
      if (chr > 255) return FALSE;
1106
289k
      PCRE2_FALLTHROUGH /* Fall through */
1107
289k
1108
907k
      case OP_CLASS:
1109
907k
      if (chr > 255) break;
1110
804k
      class_bitset = (const uint8_t *)
1111
804k
        ((list_ptr == list ? code : base_end) - list_ptr[2]);
1112
804k
      if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
1113
450k
      break;
1114
1115
450k
#ifdef SUPPORT_WIDE_CHARS
1116
1.02M
      case OP_XCLASS:
1117
1.02M
      if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
1118
1.02M
          list_ptr[2] + LINK_SIZE, (const uint8_t*)cb->start_code, utf))
1119
378k
        return FALSE;
1120
641k
      break;
1121
1122
641k
      case OP_ECLASS:
1123
133k
      if (PRIV(eclass)(chr,
1124
133k
          (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE,
1125
133k
          (list_ptr == list ? code : base_end) - list_ptr[3],
1126
133k
          (const uint8_t*)cb->start_code, utf))
1127
62.4k
        return FALSE;
1128
70.5k
      break;
1129
70.5k
#endif /* SUPPORT_WIDE_CHARS */
1130
1131
1.35M
      default:
1132
1.35M
      return FALSE;
1133
20.9M
      }
1134
1135
15.1M
    chr_ptr++;
1136
15.1M
    }
1137
19.8M
  while(*chr_ptr != NOTACHAR);
1138
1139
  /* At least one character must be matched from this opcode. */
1140
1141
13.9M
  if (list[1] == 0) return TRUE;
1142
13.9M
  }
1143
1144
/* LCOV_EXCL_START */
1145
0
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
1146
0
return FALSE;              /* Avoid compiler warnings */
1147
/* LCOV_EXCL_STOP */
1148
26.3M
}
pcre2_auto_possess.c:compare_opcodes
Line
Count
Source
550
35.7M
{
551
35.7M
PCRE2_UCHAR c;
552
35.7M
uint32_t list[MAX_LIST];
553
35.7M
const uint32_t *chr_ptr;
554
35.7M
const uint32_t *ochr_ptr;
555
35.7M
const uint32_t *list_ptr;
556
35.7M
PCRE2_SPTR next_code;
557
35.7M
#ifdef SUPPORT_WIDE_CHARS
558
35.7M
PCRE2_SPTR xclass_flags;
559
35.7M
#endif
560
35.7M
const uint8_t *class_bitset;
561
35.7M
const uint8_t *set1, *set2, *set_end;
562
35.7M
uint32_t chr;
563
35.7M
BOOL accepted, invert_bits;
564
35.7M
BOOL entered_a_group = FALSE;
565
566
35.7M
if (--(*rec_limit) <= 0) return FALSE;  /* Recursion has gone too deep */
567
568
/* Note: the base_list[1] contains whether the current opcode has a greedy
569
(represented by a non-zero value) quantifier. This is a different from
570
other character type lists, which store here that the character iterator
571
matches to an empty string (also represented by a non-zero value). */
572
573
10.6M
for(;;)
574
27.6M
  {
575
27.6M
  PCRE2_SPTR bracode;
576
577
  /* All operations move the code pointer forward.
578
  Therefore infinite recursions are not possible. */
579
580
27.6M
  c = *code;
581
582
  /* Skip over callouts */
583
584
27.6M
  if (c == OP_CALLOUT)
585
1.16M
    {
586
1.16M
    code += PRIV(OP_lengths)[c];
587
1.16M
    continue;
588
1.16M
    }
589
590
26.4M
  if (c == OP_CALLOUT_STR)
591
10.9k
    {
592
10.9k
    code += GET(code, 1 + 2*LINK_SIZE);
593
10.9k
    continue;
594
10.9k
    }
595
596
  /* At the end of a branch, skip to the end of the group and process it. */
597
598
26.4M
  if (c == OP_ALT)
599
645k
    {
600
5.91M
    do code += GET(code, 1); while (*code == OP_ALT);
601
645k
    c = *code;
602
645k
    }
603
604
  /* Inspect the next opcode. */
605
606
26.4M
  switch(c)
607
26.4M
    {
608
    /* We can always possessify a greedy iterator at the end of the pattern,
609
    which is reached after skipping over the final OP_KET. A non-greedy
610
    iterator must never be possessified. */
611
612
101k
    case OP_END:
613
101k
    return base_list[1] != 0;
614
615
    /* When an iterator is at the end of certain kinds of group we can inspect
616
    what follows the group by skipping over the closing ket. Note that this
617
    does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
618
    iteration is variable (could be another iteration or could be the next
619
    item). As these two opcodes are not listed in the next switch, they will
620
    end up as the next code to inspect, and return FALSE by virtue of being
621
    unsupported. */
622
623
15.4M
    case OP_KET:
624
15.5M
    case OP_KETRPOS:
625
    /* The non-greedy case cannot be converted to a possessive form. */
626
627
15.5M
    if (base_list[1] == 0) return FALSE;
628
629
    /* If the bracket is capturing it might be referenced by an OP_RECURSE
630
    so its last iterator can never be possessified if the pattern contains
631
    recursions. (This could be improved by keeping a list of group numbers that
632
    are called by recursion.) */
633
634
15.2M
    bracode = code - GET(code, 1);
635
15.2M
    switch(*bracode)
636
15.2M
      {
637
1.02M
      case OP_CBRA:
638
1.02M
      case OP_SCBRA:
639
1.02M
      case OP_CBRAPOS:
640
1.08M
      case OP_SCBRAPOS:
641
1.08M
      if (cb->had_recurse) return FALSE;
642
960k
      break;
643
644
      /* A script run might have to backtrack if the iterated item can match
645
      characters from more than one script. So give up unless repeating an
646
      explicit character. */
647
648
960k
      case OP_SCRIPT_RUN:
649
35.6k
      if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
650
9.03k
        return FALSE;
651
26.5k
      break;
652
653
      /* Atomic sub-patterns and forward assertions can always auto-possessify
654
      their last iterator. However, if the group was entered as a result of
655
      checking a previous iterator, this is not possible. */
656
657
118k
      case OP_ASSERT:
658
199k
      case OP_ASSERT_NOT:
659
239k
      case OP_ONCE:
660
239k
      return !entered_a_group;
661
662
      /* Fixed-length lookbehinds can be treated the same way, but variable
663
      length lookbehinds must not auto-possessify their last iterator. Note
664
      that in order to identify a variable length lookbehind we must check
665
      through all branches, because some may be of fixed length. */
666
667
63.2k
      case OP_ASSERTBACK:
668
124k
      case OP_ASSERTBACK_NOT:
669
124k
      do
670
136k
        {
671
136k
        if (bracode[1+LINK_SIZE] == OP_VREVERSE) return FALSE;  /* Variable */
672
17.9k
        bracode += GET(bracode, 1);
673
17.9k
        }
674
124k
      while (*bracode == OP_ALT);
675
6.12k
      return !entered_a_group;  /* Not variable length */
676
677
      /* Non-atomic assertions - don't possessify last iterator. This needs
678
      more thought. */
679
680
91.5k
      case OP_ASSERT_NA:
681
133k
      case OP_ASSERTBACK_NA:
682
133k
      return FALSE;
683
15.2M
      }
684
685
    /* Skip over the bracket and inspect what comes next. */
686
687
14.6M
    code += PRIV(OP_lengths)[c];
688
14.6M
    continue;
689
690
    /* Handle cases where the next item is a group. */
691
692
18.3k
    case OP_ONCE:
693
241k
    case OP_BRA:
694
867k
    case OP_CBRA:
695
867k
    next_code = code + GET(code, 1);
696
867k
    code += PRIV(OP_lengths)[c];
697
698
    /* Check each branch. We have to recurse a level for all but the last
699
    branch. */
700
701
1.08M
    while (*next_code == OP_ALT)
702
386k
      {
703
386k
      if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
704
164k
        return FALSE;
705
221k
      code = next_code + 1 + LINK_SIZE;
706
221k
      next_code += GET(next_code, 1);
707
221k
      }
708
709
702k
    entered_a_group = TRUE;
710
702k
    continue;
711
712
181k
    case OP_BRAZERO:
713
208k
    case OP_BRAMINZERO:
714
715
208k
    next_code = code + 1;
716
208k
    if (*next_code != OP_BRA && *next_code != OP_CBRA &&
717
45.6k
        *next_code != OP_ONCE) return FALSE;
718
719
183k
    do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
720
721
    /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
722
723
172k
    next_code += 1 + LINK_SIZE;
724
172k
    if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
725
172k
         rec_limit))
726
28.8k
      return FALSE;
727
728
143k
    code += PRIV(OP_lengths)[c];
729
143k
    continue;
730
731
    /* The next opcode does not need special handling; fall through and use it
732
    to see if the base can be possessified. */
733
734
9.68M
    default:
735
9.68M
    break;
736
26.4M
    }
737
738
  /* We now have the next appropriate opcode to compare with the base. Check
739
  for a supported opcode, and load its properties. */
740
741
9.68M
  code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
742
9.68M
  if (code == NULL) return FALSE;    /* Unsupported */
743
744
  /* If either opcode is a small character list, set pointers for comparing
745
  characters from that list with another list, or with a property. */
746
747
8.99M
  if (base_list[0] == OP_CHAR)
748
6.01M
    {
749
6.01M
    chr_ptr = base_list + 2;
750
6.01M
    list_ptr = list;
751
6.01M
    }
752
2.97M
  else if (list[0] == OP_CHAR)
753
2.25M
    {
754
2.25M
    chr_ptr = list + 2;
755
2.25M
    list_ptr = base_list;
756
2.25M
    }
757
758
  /* Character bitsets can also be compared to certain opcodes. */
759
760
722k
  else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
761
590k
#if PCRE2_CODE_UNIT_WIDTH == 8
762
      /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
763
590k
      || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
764
722k
#endif
765
722k
      )
766
208k
    {
767
208k
#if PCRE2_CODE_UNIT_WIDTH == 8
768
208k
    if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
769
#else
770
    if (base_list[0] == OP_CLASS)
771
#endif
772
123k
      {
773
123k
      set1 = (const uint8_t *)(base_end - base_list[2]);
774
123k
      list_ptr = list;
775
123k
      }
776
84.9k
    else
777
84.9k
      {
778
84.9k
      set1 = (const uint8_t *)(code - list[2]);
779
84.9k
      list_ptr = base_list;
780
84.9k
      }
781
782
208k
    invert_bits = FALSE;
783
208k
    switch(list_ptr[0])
784
208k
      {
785
26.7k
      case OP_CLASS:
786
39.2k
      case OP_NCLASS:
787
39.2k
      set2 = (const uint8_t *)
788
39.2k
        ((list_ptr == list ? code : base_end) - list_ptr[2]);
789
39.2k
      break;
790
791
0
#ifdef SUPPORT_WIDE_CHARS
792
11.4k
      case OP_XCLASS:
793
11.4k
      xclass_flags = (list_ptr == list ? code : base_end) -
794
11.4k
        list_ptr[2] + LINK_SIZE;
795
11.4k
      if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
796
7.38k
      if ((*xclass_flags & XCL_MAP) == 0)
797
3.62k
        {
798
        /* No bits are set for characters < 256. */
799
3.62k
        if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
800
        /* Might be an empty repeat. */
801
2.00k
        continue;
802
3.62k
        }
803
3.76k
      set2 = (const uint8_t *)(xclass_flags + 1);
804
3.76k
      break;
805
0
#endif
806
807
7.84k
      case OP_NOT_DIGIT:
808
7.84k
      invert_bits = TRUE;
809
7.84k
      PCRE2_FALLTHROUGH /* Fall through */
810
14.5k
      case OP_DIGIT:
811
14.5k
      set2 = (const uint8_t *)(cb->cbits + cbit_digit);
812
14.5k
      break;
813
814
4.90k
      case OP_NOT_WHITESPACE:
815
4.90k
      invert_bits = TRUE;
816
4.90k
      PCRE2_FALLTHROUGH /* Fall through */
817
10.7k
      case OP_WHITESPACE:
818
10.7k
      set2 = (const uint8_t *)(cb->cbits + cbit_space);
819
10.7k
      break;
820
821
4.25k
      case OP_NOT_WORDCHAR:
822
4.25k
      invert_bits = TRUE;
823
4.25k
      PCRE2_FALLTHROUGH /* Fall through */
824
14.8k
      case OP_WORDCHAR:
825
14.8k
      set2 = (const uint8_t *)(cb->cbits + cbit_word);
826
14.8k
      break;
827
828
117k
      default:
829
117k
      return FALSE;
830
208k
      }
831
832
    /* Because the bit sets are unaligned bytes, we need to perform byte
833
    comparison here. */
834
835
83.2k
    set_end = set1 + 32;
836
83.2k
    if (invert_bits)
837
17.0k
      {
838
17.0k
      do
839
179k
        {
840
179k
        if ((*set1++ & ~(*set2++)) != 0) return FALSE;
841
179k
        }
842
166k
      while (set1 < set_end);
843
17.0k
      }
844
66.2k
    else
845
66.2k
      {
846
66.2k
      do
847
1.04M
        {
848
1.04M
        if ((*set1++ & *set2++) != 0) return FALSE;
849
1.04M
        }
850
999k
      while (set1 < set_end);
851
66.2k
      }
852
853
25.6k
    if (list[1] == 0) return TRUE;
854
    /* Might be an empty repeat. */
855
8.20k
    continue;
856
25.6k
    }
857
858
  /* Some property combinations also acceptable. Unicode property opcodes are
859
  processed specially; the rest can be handled with a lookup table. */
860
861
514k
  else
862
514k
    {
863
514k
    uint32_t leftop, rightop;
864
865
514k
    leftop = base_list[0];
866
514k
    rightop = list[0];
867
868
514k
#ifdef SUPPORT_UNICODE
869
514k
    accepted = FALSE; /* Always set in non-unicode case. */
870
514k
    if (leftop == OP_PROP || leftop == OP_NOTPROP)
871
109k
      {
872
109k
      if (rightop == OP_EOD)
873
4.06k
        accepted = TRUE;
874
105k
      else if (rightop == OP_PROP || rightop == OP_NOTPROP)
875
76.0k
        {
876
76.0k
        int n;
877
76.0k
        const uint8_t *p;
878
76.0k
        BOOL same = leftop == rightop;
879
76.0k
        BOOL lisprop = leftop == OP_PROP;
880
76.0k
        BOOL risprop = rightop == OP_PROP;
881
76.0k
        BOOL bothprop = lisprop && risprop;
882
883
        /* There's a table that specifies how each combination is to be
884
        processed:
885
          0   Always return FALSE (never auto-possessify)
886
          1   Character groups are distinct (possessify if both are OP_PROP)
887
          2   Check character categories in the same group (general or particular)
888
          3   Return TRUE if the two opcodes are not the same
889
          ... see comments below
890
        */
891
892
76.0k
        n = propposstab[base_list[2]][list[2]];
893
76.0k
        switch(n)
894
76.0k
          {
895
2.03k
          case 0: break;
896
4.18k
          case 1: accepted = bothprop; break;
897
5.06k
          case 2: accepted = (base_list[3] == list[3]) != same; break;
898
6.55k
          case 3: accepted = !same; break;
899
900
5.17k
          case 4:  /* Left general category, right particular category */
901
5.17k
          accepted = risprop && catposstab[base_list[3]][list[3]] == same;
902
5.17k
          break;
903
904
3.28k
          case 5:  /* Right general category, left particular category */
905
3.28k
          accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
906
3.28k
          break;
907
908
          /* This code is logically tricky. Think hard before fiddling with it.
909
          The posspropstab table has four entries per row. Each row relates to
910
          one of PCRE's special properties such as ALNUM or SPACE or WORD.
911
          Only WORD actually needs all four entries, but using repeats for the
912
          others means they can all use the same code below.
913
914
          The first two entries in each row are Unicode general categories, and
915
          apply always, because all the characters they include are part of the
916
          PCRE character set. The third and fourth entries are a general and a
917
          particular category, respectively, that include one or more relevant
918
          characters. One or the other is used, depending on whether the check
919
          is for a general or a particular category. However, in both cases the
920
          category contains more characters than the specials that are defined
921
          for the property being tested against. Therefore, it cannot be used
922
          in a NOTPROP case.
923
924
          Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
925
          Underscore is covered by ucp_P or ucp_Po. */
926
927
3.58k
          case 6:  /* Left alphanum vs right general category */
928
8.29k
          case 7:  /* Left space vs right general category */
929
12.2k
          case 8:  /* Left word vs right general category */
930
12.2k
          p = posspropstab[n-6];
931
12.2k
          accepted = risprop && lisprop ==
932
7.07k
            (list[3] != p[0] &&
933
5.28k
             list[3] != p[1] &&
934
3.96k
            (list[3] != p[2] || !lisprop));
935
12.2k
          break;
936
937
1.29k
          case 9:   /* Right alphanum vs left general category */
938
5.32k
          case 10:  /* Right space vs left general category */
939
9.64k
          case 11:  /* Right word vs left general category */
940
9.64k
          p = posspropstab[n-9];
941
9.64k
          accepted = lisprop && risprop ==
942
6.15k
            (base_list[3] != p[0] &&
943
4.65k
             base_list[3] != p[1] &&
944
2.74k
            (base_list[3] != p[2] || !risprop));
945
9.64k
          break;
946
947
1.56k
          case 12:  /* Left alphanum vs right particular category */
948
12.0k
          case 13:  /* Left space vs right particular category */
949
15.1k
          case 14:  /* Left word vs right particular category */
950
15.1k
          p = posspropstab[n-12];
951
15.1k
          accepted = risprop && lisprop ==
952
8.71k
            (catposstab[p[0]][list[3]] &&
953
6.73k
             catposstab[p[1]][list[3]] &&
954
4.32k
            (list[3] != p[3] || !lisprop));
955
15.1k
          break;
956
957
2.81k
          case 15:  /* Right alphanum vs left particular category */
958
9.63k
          case 16:  /* Right space vs left particular category */
959
12.6k
          case 17:  /* Right word vs left particular category */
960
12.6k
          p = posspropstab[n-15];
961
12.6k
          accepted = lisprop && risprop ==
962
9.51k
            (catposstab[p[0]][base_list[3]] &&
963
8.21k
             catposstab[p[1]][base_list[3]] &&
964
4.05k
            (base_list[3] != p[3] || !risprop));
965
12.6k
          break;
966
76.0k
          }
967
76.0k
        }
968
109k
      }
969
970
404k
    else
971
404k
#endif  /* SUPPORT_UNICODE */
972
973
404k
    accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
974
336k
           rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
975
314k
           autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
976
977
514k
    if (!accepted) return FALSE;
978
979
86.8k
    if (list[1] == 0) return TRUE;
980
    /* Might be an empty repeat. */
981
24.0k
    continue;
982
86.8k
    }
983
984
  /* Control reaches here only if one of the items is a small character list.
985
  All characters are checked against the other side. */
986
987
8.27M
  do
988
8.87M
    {
989
8.87M
    chr = *chr_ptr;
990
991
8.87M
    switch(list_ptr[0])
992
8.87M
      {
993
5.47M
      case OP_CHAR:
994
5.47M
      ochr_ptr = list_ptr + 2;
995
5.47M
      do
996
6.10M
        {
997
6.10M
        if (chr == *ochr_ptr) return FALSE;
998
5.75M
        ochr_ptr++;
999
5.75M
        }
1000
5.75M
      while(*ochr_ptr != NOTACHAR);
1001
5.12M
      break;
1002
1003
5.12M
      case OP_NOT:
1004
307k
      ochr_ptr = list_ptr + 2;
1005
307k
      do
1006
342k
        {
1007
342k
        if (chr == *ochr_ptr)
1008
28.8k
          break;
1009
313k
        ochr_ptr++;
1010
313k
        }
1011
313k
      while(*ochr_ptr != NOTACHAR);
1012
307k
      if (*ochr_ptr == NOTACHAR) return FALSE;   /* Not found */
1013
28.8k
      break;
1014
1015
      /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
1016
      set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
1017
1018
105k
      case OP_DIGIT:
1019
105k
      if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
1020
93.6k
      break;
1021
1022
93.6k
      case OP_NOT_DIGIT:
1023
60.5k
      if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
1024
5.55k
      break;
1025
1026
82.2k
      case OP_WHITESPACE:
1027
82.2k
      if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
1028
76.2k
      break;
1029
1030
134k
      case OP_NOT_WHITESPACE:
1031
134k
      if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
1032
11.5k
      break;
1033
1034
81.0k
      case OP_WORDCHAR:
1035
81.0k
      if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
1036
51.4k
      break;
1037
1038
73.3k
      case OP_NOT_WORDCHAR:
1039
73.3k
      if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
1040
30.4k
      break;
1041
1042
201k
      case OP_HSPACE:
1043
201k
      switch(chr)
1044
201k
        {
1045
34.4k
        HSPACE_CASES: return FALSE;
1046
166k
        default: break;
1047
201k
        }
1048
166k
      break;
1049
1050
166k
      case OP_NOT_HSPACE:
1051
125k
      switch(chr)
1052
125k
        {
1053
40.3k
        HSPACE_CASES: break;
1054
85.0k
        default: return FALSE;
1055
125k
        }
1056
40.3k
      break;
1057
1058
163k
      case OP_ANYNL:
1059
195k
      case OP_VSPACE:
1060
195k
      switch(chr)
1061
195k
        {
1062
20.3k
        VSPACE_CASES: return FALSE;
1063
175k
        default: break;
1064
195k
        }
1065
175k
      break;
1066
1067
175k
      case OP_NOT_VSPACE:
1068
44.8k
      switch(chr)
1069
44.8k
        {
1070
16.0k
        VSPACE_CASES: break;
1071
28.8k
        default: return FALSE;
1072
44.8k
        }
1073
16.0k
      break;
1074
1075
82.1k
      case OP_DOLL:
1076
98.7k
      case OP_EODN:
1077
98.7k
      switch (chr)
1078
98.7k
        {
1079
4.91k
        case CHAR_CR:
1080
6.65k
        case CHAR_LF:
1081
8.61k
        case CHAR_VT:
1082
10.9k
        case CHAR_FF:
1083
14.0k
        case CHAR_NEL:
1084
14.0k
#ifndef EBCDIC
1085
15.4k
        case 0x2028:
1086
17.0k
        case 0x2029:
1087
17.0k
#endif  /* Not EBCDIC */
1088
17.0k
        return FALSE;
1089
98.7k
        }
1090
81.7k
      break;
1091
1092
81.7k
      case OP_EOD:    /* Can always possessify before \z */
1093
8.79k
      break;
1094
1095
0
#ifdef SUPPORT_UNICODE
1096
119k
      case OP_PROP:
1097
343k
      case OP_NOTPROP:
1098
343k
      if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
1099
343k
            list_ptr[0] == OP_NOTPROP))
1100
208k
        return FALSE;
1101
135k
      break;
1102
135k
#endif
1103
1104
247k
      case OP_NCLASS:
1105
247k
      if (chr > 255) return FALSE;
1106
245k
      PCRE2_FALLTHROUGH /* Fall through */
1107
245k
1108
656k
      case OP_CLASS:
1109
656k
      if (chr > 255) break;
1110
649k
      class_bitset = (const uint8_t *)
1111
649k
        ((list_ptr == list ? code : base_end) - list_ptr[2]);
1112
649k
      if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
1113
354k
      break;
1114
1115
354k
#ifdef SUPPORT_WIDE_CHARS
1116
354k
      case OP_XCLASS:
1117
111k
      if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
1118
111k
          list_ptr[2] + LINK_SIZE, (const uint8_t*)cb->start_code, utf))
1119
47.0k
        return FALSE;
1120
64.3k
      break;
1121
1122
64.3k
      case OP_ECLASS:
1123
51.3k
      if (PRIV(eclass)(chr,
1124
51.3k
          (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE,
1125
51.3k
          (list_ptr == list ? code : base_end) - list_ptr[3],
1126
51.3k
          (const uint8_t*)cb->start_code, utf))
1127
33.5k
        return FALSE;
1128
17.7k
      break;
1129
17.7k
#endif /* SUPPORT_WIDE_CHARS */
1130
1131
710k
      default:
1132
710k
      return FALSE;
1133
8.87M
      }
1134
1135
6.49M
    chr_ptr++;
1136
6.49M
    }
1137
8.27M
  while(*chr_ptr != NOTACHAR);
1138
1139
  /* At least one character must be matched from this opcode. */
1140
1141
5.89M
  if (list[1] == 0) return TRUE;
1142
5.89M
  }
1143
1144
/* LCOV_EXCL_START */
1145
0
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
1146
0
return FALSE;              /* Avoid compiler warnings */
1147
/* LCOV_EXCL_STOP */
1148
10.6M
}
pcre2_auto_possess.c:compare_opcodes
Line
Count
Source
550
31.6M
{
551
31.6M
PCRE2_UCHAR c;
552
31.6M
uint32_t list[MAX_LIST];
553
31.6M
const uint32_t *chr_ptr;
554
31.6M
const uint32_t *ochr_ptr;
555
31.6M
const uint32_t *list_ptr;
556
31.6M
PCRE2_SPTR next_code;
557
31.6M
#ifdef SUPPORT_WIDE_CHARS
558
31.6M
PCRE2_SPTR xclass_flags;
559
31.6M
#endif
560
31.6M
const uint8_t *class_bitset;
561
31.6M
const uint8_t *set1, *set2, *set_end;
562
31.6M
uint32_t chr;
563
31.6M
BOOL accepted, invert_bits;
564
31.6M
BOOL entered_a_group = FALSE;
565
566
31.6M
if (--(*rec_limit) <= 0) return FALSE;  /* Recursion has gone too deep */
567
568
/* Note: the base_list[1] contains whether the current opcode has a greedy
569
(represented by a non-zero value) quantifier. This is a different from
570
other character type lists, which store here that the character iterator
571
matches to an empty string (also represented by a non-zero value). */
572
573
15.6M
for(;;)
574
48.6M
  {
575
48.6M
  PCRE2_SPTR bracode;
576
577
  /* All operations move the code pointer forward.
578
  Therefore infinite recursions are not possible. */
579
580
48.6M
  c = *code;
581
582
  /* Skip over callouts */
583
584
48.6M
  if (c == OP_CALLOUT)
585
2.20M
    {
586
2.20M
    code += PRIV(OP_lengths)[c];
587
2.20M
    continue;
588
2.20M
    }
589
590
46.4M
  if (c == OP_CALLOUT_STR)
591
9.53k
    {
592
9.53k
    code += GET(code, 1 + 2*LINK_SIZE);
593
9.53k
    continue;
594
9.53k
    }
595
596
  /* At the end of a branch, skip to the end of the group and process it. */
597
598
46.4M
  if (c == OP_ALT)
599
1.21M
    {
600
3.04M
    do code += GET(code, 1); while (*code == OP_ALT);
601
1.21M
    c = *code;
602
1.21M
    }
603
604
  /* Inspect the next opcode. */
605
606
46.4M
  switch(c)
607
46.4M
    {
608
    /* We can always possessify a greedy iterator at the end of the pattern,
609
    which is reached after skipping over the final OP_KET. A non-greedy
610
    iterator must never be possessified. */
611
612
188k
    case OP_END:
613
188k
    return base_list[1] != 0;
614
615
    /* When an iterator is at the end of certain kinds of group we can inspect
616
    what follows the group by skipping over the closing ket. Note that this
617
    does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
618
    iteration is variable (could be another iteration or could be the next
619
    item). As these two opcodes are not listed in the next switch, they will
620
    end up as the next code to inspect, and return FALSE by virtue of being
621
    unsupported. */
622
623
30.6M
    case OP_KET:
624
30.7M
    case OP_KETRPOS:
625
    /* The non-greedy case cannot be converted to a possessive form. */
626
627
30.7M
    if (base_list[1] == 0) return FALSE;
628
629
    /* If the bracket is capturing it might be referenced by an OP_RECURSE
630
    so its last iterator can never be possessified if the pattern contains
631
    recursions. (This could be improved by keeping a list of group numbers that
632
    are called by recursion.) */
633
634
30.1M
    bracode = code - GET(code, 1);
635
30.1M
    switch(*bracode)
636
30.1M
      {
637
1.87M
      case OP_CBRA:
638
1.87M
      case OP_SCBRA:
639
1.90M
      case OP_CBRAPOS:
640
1.91M
      case OP_SCBRAPOS:
641
1.91M
      if (cb->had_recurse) return FALSE;
642
1.62M
      break;
643
644
      /* A script run might have to backtrack if the iterated item can match
645
      characters from more than one script. So give up unless repeating an
646
      explicit character. */
647
648
1.62M
      case OP_SCRIPT_RUN:
649
37.4k
      if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
650
10.4k
        return FALSE;
651
27.0k
      break;
652
653
      /* Atomic sub-patterns and forward assertions can always auto-possessify
654
      their last iterator. However, if the group was entered as a result of
655
      checking a previous iterator, this is not possible. */
656
657
156k
      case OP_ASSERT:
658
227k
      case OP_ASSERT_NOT:
659
278k
      case OP_ONCE:
660
278k
      return !entered_a_group;
661
662
      /* Fixed-length lookbehinds can be treated the same way, but variable
663
      length lookbehinds must not auto-possessify their last iterator. Note
664
      that in order to identify a variable length lookbehind we must check
665
      through all branches, because some may be of fixed length. */
666
667
89.3k
      case OP_ASSERTBACK:
668
140k
      case OP_ASSERTBACK_NOT:
669
140k
      do
670
156k
        {
671
156k
        if (bracode[1+LINK_SIZE] == OP_VREVERSE) return FALSE;  /* Variable */
672
24.6k
        bracode += GET(bracode, 1);
673
24.6k
        }
674
140k
      while (*bracode == OP_ALT);
675
8.05k
      return !entered_a_group;  /* Not variable length */
676
677
      /* Non-atomic assertions - don't possessify last iterator. This needs
678
      more thought. */
679
680
79.3k
      case OP_ASSERT_NA:
681
412k
      case OP_ASSERTBACK_NA:
682
412k
      return FALSE;
683
30.1M
      }
684
685
    /* Skip over the bracket and inspect what comes next. */
686
687
28.9M
    code += PRIV(OP_lengths)[c];
688
28.9M
    continue;
689
690
    /* Handle cases where the next item is a group. */
691
692
17.8k
    case OP_ONCE:
693
437k
    case OP_BRA:
694
1.23M
    case OP_CBRA:
695
1.23M
    next_code = code + GET(code, 1);
696
1.23M
    code += PRIV(OP_lengths)[c];
697
698
    /* Check each branch. We have to recurse a level for all but the last
699
    branch. */
700
701
1.66M
    while (*next_code == OP_ALT)
702
691k
      {
703
691k
      if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
704
259k
        return FALSE;
705
432k
      code = next_code + 1 + LINK_SIZE;
706
432k
      next_code += GET(next_code, 1);
707
432k
      }
708
709
971k
    entered_a_group = TRUE;
710
971k
    continue;
711
712
335k
    case OP_BRAZERO:
713
350k
    case OP_BRAMINZERO:
714
715
350k
    next_code = code + 1;
716
350k
    if (*next_code != OP_BRA && *next_code != OP_CBRA &&
717
29.5k
        *next_code != OP_ONCE) return FALSE;
718
719
372k
    do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
720
721
    /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
722
723
329k
    next_code += 1 + LINK_SIZE;
724
329k
    if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
725
329k
         rec_limit))
726
26.8k
      return FALSE;
727
728
302k
    code += PRIV(OP_lengths)[c];
729
302k
    continue;
730
731
    /* The next opcode does not need special handling; fall through and use it
732
    to see if the base can be possessified. */
733
734
13.9M
    default:
735
13.9M
    break;
736
46.4M
    }
737
738
  /* We now have the next appropriate opcode to compare with the base. Check
739
  for a supported opcode, and load its properties. */
740
741
13.9M
  code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
742
13.9M
  if (code == NULL) return FALSE;    /* Unsupported */
743
744
  /* If either opcode is a small character list, set pointers for comparing
745
  characters from that list with another list, or with a property. */
746
747
12.4M
  if (base_list[0] == OP_CHAR)
748
7.43M
    {
749
7.43M
    chr_ptr = base_list + 2;
750
7.43M
    list_ptr = list;
751
7.43M
    }
752
5.02M
  else if (list[0] == OP_CHAR)
753
4.11M
    {
754
4.11M
    chr_ptr = list + 2;
755
4.11M
    list_ptr = base_list;
756
4.11M
    }
757
758
  /* Character bitsets can also be compared to certain opcodes. */
759
760
916k
  else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
761
#if PCRE2_CODE_UNIT_WIDTH == 8
762
      /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
763
      || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
764
#endif
765
916k
      )
766
104k
    {
767
#if PCRE2_CODE_UNIT_WIDTH == 8
768
    if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
769
#else
770
104k
    if (base_list[0] == OP_CLASS)
771
68.5k
#endif
772
68.5k
      {
773
68.5k
      set1 = (const uint8_t *)(base_end - base_list[2]);
774
68.5k
      list_ptr = list;
775
68.5k
      }
776
35.7k
    else
777
35.7k
      {
778
35.7k
      set1 = (const uint8_t *)(code - list[2]);
779
35.7k
      list_ptr = base_list;
780
35.7k
      }
781
782
104k
    invert_bits = FALSE;
783
104k
    switch(list_ptr[0])
784
104k
      {
785
18.3k
      case OP_CLASS:
786
26.5k
      case OP_NCLASS:
787
26.5k
      set2 = (const uint8_t *)
788
26.5k
        ((list_ptr == list ? code : base_end) - list_ptr[2]);
789
26.5k
      break;
790
791
0
#ifdef SUPPORT_WIDE_CHARS
792
32.7k
      case OP_XCLASS:
793
32.7k
      xclass_flags = (list_ptr == list ? code : base_end) -
794
32.7k
        list_ptr[2] + LINK_SIZE;
795
32.7k
      if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
796
25.8k
      if ((*xclass_flags & XCL_MAP) == 0)
797
8.43k
        {
798
        /* No bits are set for characters < 256. */
799
8.43k
        if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
800
        /* Might be an empty repeat. */
801
2.92k
        continue;
802
8.43k
        }
803
17.4k
      set2 = (const uint8_t *)(xclass_flags + 1);
804
17.4k
      break;
805
0
#endif
806
807
7.79k
      case OP_NOT_DIGIT:
808
7.79k
      invert_bits = TRUE;
809
7.79k
      PCRE2_FALLTHROUGH /* Fall through */
810
12.0k
      case OP_DIGIT:
811
12.0k
      set2 = (const uint8_t *)(cb->cbits + cbit_digit);
812
12.0k
      break;
813
814
6.94k
      case OP_NOT_WHITESPACE:
815
6.94k
      invert_bits = TRUE;
816
6.94k
      PCRE2_FALLTHROUGH /* Fall through */
817
12.9k
      case OP_WHITESPACE:
818
12.9k
      set2 = (const uint8_t *)(cb->cbits + cbit_space);
819
12.9k
      break;
820
821
8.06k
      case OP_NOT_WORDCHAR:
822
8.06k
      invert_bits = TRUE;
823
8.06k
      PCRE2_FALLTHROUGH /* Fall through */
824
12.0k
      case OP_WORDCHAR:
825
12.0k
      set2 = (const uint8_t *)(cb->cbits + cbit_word);
826
12.0k
      break;
827
828
7.86k
      default:
829
7.86k
      return FALSE;
830
104k
      }
831
832
    /* Because the bit sets are unaligned bytes, we need to perform byte
833
    comparison here. */
834
835
81.1k
    set_end = set1 + 32;
836
81.1k
    if (invert_bits)
837
22.7k
      {
838
22.7k
      do
839
382k
        {
840
382k
        if ((*set1++ & ~(*set2++)) != 0) return FALSE;
841
382k
        }
842
367k
      while (set1 < set_end);
843
22.7k
      }
844
58.3k
    else
845
58.3k
      {
846
58.3k
      do
847
1.16M
        {
848
1.16M
        if ((*set1++ & *set2++) != 0) return FALSE;
849
1.16M
        }
850
1.13M
      while (set1 < set_end);
851
58.3k
      }
852
853
38.8k
    if (list[1] == 0) return TRUE;
854
    /* Might be an empty repeat. */
855
12.1k
    continue;
856
38.8k
    }
857
858
  /* Some property combinations also acceptable. Unicode property opcodes are
859
  processed specially; the rest can be handled with a lookup table. */
860
861
811k
  else
862
811k
    {
863
811k
    uint32_t leftop, rightop;
864
865
811k
    leftop = base_list[0];
866
811k
    rightop = list[0];
867
868
811k
#ifdef SUPPORT_UNICODE
869
811k
    accepted = FALSE; /* Always set in non-unicode case. */
870
811k
    if (leftop == OP_PROP || leftop == OP_NOTPROP)
871
196k
      {
872
196k
      if (rightop == OP_EOD)
873
5.16k
        accepted = TRUE;
874
191k
      else if (rightop == OP_PROP || rightop == OP_NOTPROP)
875
142k
        {
876
142k
        int n;
877
142k
        const uint8_t *p;
878
142k
        BOOL same = leftop == rightop;
879
142k
        BOOL lisprop = leftop == OP_PROP;
880
142k
        BOOL risprop = rightop == OP_PROP;
881
142k
        BOOL bothprop = lisprop && risprop;
882
883
        /* There's a table that specifies how each combination is to be
884
        processed:
885
          0   Always return FALSE (never auto-possessify)
886
          1   Character groups are distinct (possessify if both are OP_PROP)
887
          2   Check character categories in the same group (general or particular)
888
          3   Return TRUE if the two opcodes are not the same
889
          ... see comments below
890
        */
891
892
142k
        n = propposstab[base_list[2]][list[2]];
893
142k
        switch(n)
894
142k
          {
895
5.61k
          case 0: break;
896
7.50k
          case 1: accepted = bothprop; break;
897
11.0k
          case 2: accepted = (base_list[3] == list[3]) != same; break;
898
9.83k
          case 3: accepted = !same; break;
899
900
11.4k
          case 4:  /* Left general category, right particular category */
901
11.4k
          accepted = risprop && catposstab[base_list[3]][list[3]] == same;
902
11.4k
          break;
903
904
10.5k
          case 5:  /* Right general category, left particular category */
905
10.5k
          accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
906
10.5k
          break;
907
908
          /* This code is logically tricky. Think hard before fiddling with it.
909
          The posspropstab table has four entries per row. Each row relates to
910
          one of PCRE's special properties such as ALNUM or SPACE or WORD.
911
          Only WORD actually needs all four entries, but using repeats for the
912
          others means they can all use the same code below.
913
914
          The first two entries in each row are Unicode general categories, and
915
          apply always, because all the characters they include are part of the
916
          PCRE character set. The third and fourth entries are a general and a
917
          particular category, respectively, that include one or more relevant
918
          characters. One or the other is used, depending on whether the check
919
          is for a general or a particular category. However, in both cases the
920
          category contains more characters than the specials that are defined
921
          for the property being tested against. Therefore, it cannot be used
922
          in a NOTPROP case.
923
924
          Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
925
          Underscore is covered by ucp_P or ucp_Po. */
926
927
1.00k
          case 6:  /* Left alphanum vs right general category */
928
10.0k
          case 7:  /* Left space vs right general category */
929
24.3k
          case 8:  /* Left word vs right general category */
930
24.3k
          p = posspropstab[n-6];
931
24.3k
          accepted = risprop && lisprop ==
932
20.0k
            (list[3] != p[0] &&
933
15.4k
             list[3] != p[1] &&
934
11.3k
            (list[3] != p[2] || !lisprop));
935
24.3k
          break;
936
937
578
          case 9:   /* Right alphanum vs left general category */
938
8.69k
          case 10:  /* Right space vs left general category */
939
19.2k
          case 11:  /* Right word vs left general category */
940
19.2k
          p = posspropstab[n-9];
941
19.2k
          accepted = lisprop && risprop ==
942
14.8k
            (base_list[3] != p[0] &&
943
12.1k
             base_list[3] != p[1] &&
944
8.20k
            (base_list[3] != p[2] || !risprop));
945
19.2k
          break;
946
947
879
          case 12:  /* Left alphanum vs right particular category */
948
14.2k
          case 13:  /* Left space vs right particular category */
949
23.1k
          case 14:  /* Left word vs right particular category */
950
23.1k
          p = posspropstab[n-12];
951
23.1k
          accepted = risprop && lisprop ==
952
17.5k
            (catposstab[p[0]][list[3]] &&
953
13.8k
             catposstab[p[1]][list[3]] &&
954
8.48k
            (list[3] != p[3] || !lisprop));
955
23.1k
          break;
956
957
1.23k
          case 15:  /* Right alphanum vs left particular category */
958
9.96k
          case 16:  /* Right space vs left particular category */
959
19.1k
          case 17:  /* Right word vs left particular category */
960
19.1k
          p = posspropstab[n-15];
961
19.1k
          accepted = lisprop && risprop ==
962
13.0k
            (catposstab[p[0]][base_list[3]] &&
963
10.8k
             catposstab[p[1]][base_list[3]] &&
964
5.52k
            (base_list[3] != p[3] || !risprop));
965
19.1k
          break;
966
142k
          }
967
142k
        }
968
196k
      }
969
970
615k
    else
971
615k
#endif  /* SUPPORT_UNICODE */
972
973
615k
    accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
974
450k
           rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
975
430k
           autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
976
977
811k
    if (!accepted) return FALSE;
978
979
124k
    if (list[1] == 0) return TRUE;
980
    /* Might be an empty repeat. */
981
29.4k
    continue;
982
124k
    }
983
984
  /* Control reaches here only if one of the items is a small character list.
985
  All characters are checked against the other side. */
986
987
11.5M
  do
988
12.1M
    {
989
12.1M
    chr = *chr_ptr;
990
991
12.1M
    switch(list_ptr[0])
992
12.1M
      {
993
6.61M
      case OP_CHAR:
994
6.61M
      ochr_ptr = list_ptr + 2;
995
6.61M
      do
996
6.92M
        {
997
6.92M
        if (chr == *ochr_ptr) return FALSE;
998
6.63M
        ochr_ptr++;
999
6.63M
        }
1000
6.63M
      while(*ochr_ptr != NOTACHAR);
1001
6.31M
      break;
1002
1003
6.31M
      case OP_NOT:
1004
485k
      ochr_ptr = list_ptr + 2;
1005
485k
      do
1006
531k
        {
1007
531k
        if (chr == *ochr_ptr)
1008
41.8k
          break;
1009
489k
        ochr_ptr++;
1010
489k
        }
1011
489k
      while(*ochr_ptr != NOTACHAR);
1012
485k
      if (*ochr_ptr == NOTACHAR) return FALSE;   /* Not found */
1013
41.8k
      break;
1014
1015
      /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
1016
      set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
1017
1018
119k
      case OP_DIGIT:
1019
119k
      if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
1020
110k
      break;
1021
1022
110k
      case OP_NOT_DIGIT:
1023
86.3k
      if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
1024
2.89k
      break;
1025
1026
155k
      case OP_WHITESPACE:
1027
155k
      if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
1028
145k
      break;
1029
1030
404k
      case OP_NOT_WHITESPACE:
1031
404k
      if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
1032
21.4k
      break;
1033
1034
129k
      case OP_WORDCHAR:
1035
129k
      if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
1036
98.9k
      break;
1037
1038
152k
      case OP_NOT_WORDCHAR:
1039
152k
      if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
1040
23.2k
      break;
1041
1042
207k
      case OP_HSPACE:
1043
207k
      switch(chr)
1044
207k
        {
1045
126k
        HSPACE_CASES: return FALSE;
1046
80.9k
        default: break;
1047
207k
        }
1048
80.9k
      break;
1049
1050
254k
      case OP_NOT_HSPACE:
1051
254k
      switch(chr)
1052
254k
        {
1053
103k
        HSPACE_CASES: break;
1054
151k
        default: return FALSE;
1055
254k
        }
1056
103k
      break;
1057
1058
461k
      case OP_ANYNL:
1059
532k
      case OP_VSPACE:
1060
532k
      switch(chr)
1061
532k
        {
1062
106k
        VSPACE_CASES: return FALSE;
1063
425k
        default: break;
1064
532k
        }
1065
425k
      break;
1066
1067
425k
      case OP_NOT_VSPACE:
1068
219k
      switch(chr)
1069
219k
        {
1070
41.8k
        VSPACE_CASES: break;
1071
177k
        default: return FALSE;
1072
219k
        }
1073
41.8k
      break;
1074
1075
80.1k
      case OP_DOLL:
1076
112k
      case OP_EODN:
1077
112k
      switch (chr)
1078
112k
        {
1079
4.75k
        case CHAR_CR:
1080
13.6k
        case CHAR_LF:
1081
19.7k
        case CHAR_VT:
1082
24.9k
        case CHAR_FF:
1083
28.4k
        case CHAR_NEL:
1084
28.4k
#ifndef EBCDIC
1085
35.3k
        case 0x2028:
1086
40.2k
        case 0x2029:
1087
40.2k
#endif  /* Not EBCDIC */
1088
40.2k
        return FALSE;
1089
112k
        }
1090
72.0k
      break;
1091
1092
72.0k
      case OP_EOD:    /* Can always possessify before \z */
1093
17.2k
      break;
1094
1095
0
#ifdef SUPPORT_UNICODE
1096
271k
      case OP_PROP:
1097
655k
      case OP_NOTPROP:
1098
655k
      if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
1099
655k
            list_ptr[0] == OP_NOTPROP))
1100
355k
        return FALSE;
1101
299k
      break;
1102
299k
#endif
1103
1104
299k
      case OP_NCLASS:
1105
114k
      if (chr > 255) return FALSE;
1106
44.2k
      PCRE2_FALLTHROUGH /* Fall through */
1107
44.2k
1108
251k
      case OP_CLASS:
1109
251k
      if (chr > 255) break;
1110
155k
      class_bitset = (const uint8_t *)
1111
155k
        ((list_ptr == list ? code : base_end) - list_ptr[2]);
1112
155k
      if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
1113
96.2k
      break;
1114
1115
96.2k
#ifdef SUPPORT_WIDE_CHARS
1116
908k
      case OP_XCLASS:
1117
908k
      if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
1118
908k
          list_ptr[2] + LINK_SIZE, (const uint8_t*)cb->start_code, utf))
1119
331k
        return FALSE;
1120
577k
      break;
1121
1122
577k
      case OP_ECLASS:
1123
81.7k
      if (PRIV(eclass)(chr,
1124
81.7k
          (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE,
1125
81.7k
          (list_ptr == list ? code : base_end) - list_ptr[3],
1126
81.7k
          (const uint8_t*)cb->start_code, utf))
1127
28.9k
        return FALSE;
1128
52.7k
      break;
1129
52.7k
#endif /* SUPPORT_WIDE_CHARS */
1130
1131
645k
      default:
1132
645k
      return FALSE;
1133
12.1M
      }
1134
1135
8.62M
    chr_ptr++;
1136
8.62M
    }
1137
11.5M
  while(*chr_ptr != NOTACHAR);
1138
1139
  /* At least one character must be matched from this opcode. */
1140
1141
8.07M
  if (list[1] == 0) return TRUE;
1142
8.07M
  }
1143
1144
/* LCOV_EXCL_START */
1145
0
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
1146
0
return FALSE;              /* Avoid compiler warnings */
1147
/* LCOV_EXCL_STOP */
1148
15.6M
}
1149
1150
1151
1152
/*************************************************
1153
*    Scan compiled regex for auto-possession     *
1154
*************************************************/
1155
1156
/* Replaces single character iterations with their possessive alternatives
1157
if appropriate. This function modifies the compiled opcode! Hitting a
1158
non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
1159
bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
1160
overly complicated or large patterns. In these cases, the check just stops,
1161
leaving the remainder of the pattern unpossessified.
1162
1163
Arguments:
1164
  code        points to start of the byte code
1165
  cb          compile data block
1166
1167
Returns:      0 for success
1168
              -1 if a non-existant opcode is encountered
1169
*/
1170
1171
int
1172
PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
1173
530k
{
1174
530k
PCRE2_UCHAR c;
1175
530k
PCRE2_SPTR end;
1176
530k
PCRE2_UCHAR *repeat_opcode;
1177
530k
uint32_t list[MAX_LIST];
1178
530k
int rec_limit = 1000;  /* Was 10,000 but clang+ASAN uses a lot of stack. */
1179
530k
BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
1180
530k
BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
1181
1182
530k
for (;;)
1183
1.41G
  {
1184
1.41G
  c = *code;
1185
1186
  /* LCOV_EXCL_START */
1187
1.41G
  if (c >= OP_TABLE_LENGTH)
1188
0
    {
1189
0
    PCRE2_DEBUG_UNREACHABLE();
1190
0
    return -1;   /* Something gone wrong */
1191
0
    }
1192
  /* LCOV_EXCL_STOP */
1193
1194
1.41G
  if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1195
65.0M
    {
1196
65.0M
    c -= get_repeat_base(c) - OP_STAR;
1197
65.0M
    end = (c <= OP_MINUPTO) ?
1198
65.0M
      get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
1199
65.0M
    list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
1200
1201
65.0M
    if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
1202
62.0M
        &rec_limit))
1203
12.1M
      {
1204
12.1M
      switch(c)
1205
12.1M
        {
1206
2.86M
        case OP_STAR:
1207
2.86M
        *code += OP_POSSTAR - OP_STAR;
1208
2.86M
        break;
1209
1210
555k
        case OP_MINSTAR:
1211
555k
        *code += OP_POSSTAR - OP_MINSTAR;
1212
555k
        break;
1213
1214
2.80M
        case OP_PLUS:
1215
2.80M
        *code += OP_POSPLUS - OP_PLUS;
1216
2.80M
        break;
1217
1218
514k
        case OP_MINPLUS:
1219
514k
        *code += OP_POSPLUS - OP_MINPLUS;
1220
514k
        break;
1221
1222
3.93M
        case OP_QUERY:
1223
3.93M
        *code += OP_POSQUERY - OP_QUERY;
1224
3.93M
        break;
1225
1226
785k
        case OP_MINQUERY:
1227
785k
        *code += OP_POSQUERY - OP_MINQUERY;
1228
785k
        break;
1229
1230
612k
        case OP_UPTO:
1231
612k
        *code += OP_POSUPTO - OP_UPTO;
1232
612k
        break;
1233
1234
126k
        case OP_MINUPTO:
1235
126k
        *code += OP_POSUPTO - OP_MINUPTO;
1236
126k
        break;
1237
12.1M
        }
1238
12.1M
      }
1239
65.0M
    c = *code;
1240
65.0M
    }
1241
1.34G
  else if (c == OP_CLASS || c == OP_NCLASS
1242
1.34G
#ifdef SUPPORT_WIDE_CHARS
1243
1.34G
           || c == OP_XCLASS || c == OP_ECLASS
1244
1.34G
#endif
1245
1.34G
           )
1246
11.2M
    {
1247
11.2M
#ifdef SUPPORT_WIDE_CHARS
1248
11.2M
    if (c == OP_XCLASS || c == OP_ECLASS)
1249
5.77M
      repeat_opcode = code + GET(code, 1);
1250
5.49M
    else
1251
5.49M
#endif
1252
5.49M
      repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
1253
1254
11.2M
    c = *repeat_opcode;
1255
11.2M
    if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1256
3.71M
      {
1257
      /* The return from get_chr_property_list() will never be NULL when
1258
      *code (aka c) is one of the four class opcodes. However, gcc with
1259
      -fanalyzer notes that a NULL return is possible, and grumbles. Hence we
1260
      put in a check. */
1261
1262
3.71M
      end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
1263
3.71M
      list[1] = (c & 1) == 0;
1264
1265
3.71M
      if (end != NULL &&
1266
3.71M
          compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
1267
943k
        {
1268
943k
        switch (c)
1269
943k
          {
1270
158k
          case OP_CRSTAR:
1271
192k
          case OP_CRMINSTAR:
1272
192k
          *repeat_opcode = OP_CRPOSSTAR;
1273
192k
          break;
1274
1275
222k
          case OP_CRPLUS:
1276
266k
          case OP_CRMINPLUS:
1277
266k
          *repeat_opcode = OP_CRPOSPLUS;
1278
266k
          break;
1279
1280
166k
          case OP_CRQUERY:
1281
197k
          case OP_CRMINQUERY:
1282
197k
          *repeat_opcode = OP_CRPOSQUERY;
1283
197k
          break;
1284
1285
221k
          case OP_CRRANGE:
1286
287k
          case OP_CRMINRANGE:
1287
287k
          *repeat_opcode = OP_CRPOSRANGE;
1288
287k
          break;
1289
943k
          }
1290
943k
        }
1291
3.71M
      }
1292
11.2M
    c = *code;
1293
11.2M
    }
1294
1295
1.41G
  switch(c)
1296
1.41G
    {
1297
530k
    case OP_END:
1298
530k
    return 0;
1299
1300
4.34M
    case OP_TYPESTAR:
1301
5.17M
    case OP_TYPEMINSTAR:
1302
9.66M
    case OP_TYPEPLUS:
1303
10.6M
    case OP_TYPEMINPLUS:
1304
16.3M
    case OP_TYPEQUERY:
1305
17.7M
    case OP_TYPEMINQUERY:
1306
18.4M
    case OP_TYPEPOSSTAR:
1307
19.2M
    case OP_TYPEPOSPLUS:
1308
19.6M
    case OP_TYPEPOSQUERY:
1309
19.6M
    if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1310
19.6M
    break;
1311
1312
656k
    case OP_TYPEUPTO:
1313
801k
    case OP_TYPEMINUPTO:
1314
1.31M
    case OP_TYPEEXACT:
1315
1.63M
    case OP_TYPEPOSUPTO:
1316
1.63M
    if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
1317
222k
      code += 2;
1318
1.63M
    break;
1319
1320
120k
    case OP_CALLOUT_STR:
1321
120k
    code += GET(code, 1 + 2*LINK_SIZE);
1322
120k
    break;
1323
1324
0
#ifdef SUPPORT_WIDE_CHARS
1325
5.11M
    case OP_XCLASS:
1326
5.77M
    case OP_ECLASS:
1327
5.77M
    code += GET(code, 1);
1328
5.77M
    break;
1329
0
#endif
1330
1331
650k
    case OP_MARK:
1332
745k
    case OP_COMMIT_ARG:
1333
818k
    case OP_PRUNE_ARG:
1334
1.45M
    case OP_SKIP_ARG:
1335
1.60M
    case OP_THEN_ARG:
1336
1.60M
    code += code[1];
1337
1.60M
    break;
1338
1.41G
    }
1339
1340
  /* Add in the fixed length from the table */
1341
1342
1.41G
  code += PRIV(OP_lengths)[c];
1343
1344
  /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
1345
  followed by a multi-byte character. The length in the table is a minimum, so
1346
  we have to arrange to skip the extra code units. */
1347
1348
#ifdef MAYBE_UTF_MULTI
1349
1.06G
  if (utf) switch(c)
1350
151M
    {
1351
28.1M
    case OP_CHAR:
1352
71.1M
    case OP_CHARI:
1353
71.1M
    case OP_NOT:
1354
71.3M
    case OP_NOTI:
1355
71.5M
    case OP_STAR:
1356
71.7M
    case OP_MINSTAR:
1357
71.8M
    case OP_PLUS:
1358
72.0M
    case OP_MINPLUS:
1359
72.3M
    case OP_QUERY:
1360
72.8M
    case OP_MINQUERY:
1361
72.8M
    case OP_UPTO:
1362
72.9M
    case OP_MINUPTO:
1363
72.9M
    case OP_EXACT:
1364
73.0M
    case OP_POSSTAR:
1365
73.1M
    case OP_POSPLUS:
1366
73.3M
    case OP_POSQUERY:
1367
73.3M
    case OP_POSUPTO:
1368
73.4M
    case OP_STARI:
1369
73.7M
    case OP_MINSTARI:
1370
73.9M
    case OP_PLUSI:
1371
74.1M
    case OP_MINPLUSI:
1372
74.3M
    case OP_QUERYI:
1373
74.6M
    case OP_MINQUERYI:
1374
74.7M
    case OP_UPTOI:
1375
74.7M
    case OP_MINUPTOI:
1376
74.7M
    case OP_EXACTI:
1377
74.9M
    case OP_POSSTARI:
1378
75.1M
    case OP_POSPLUSI:
1379
75.3M
    case OP_POSQUERYI:
1380
75.4M
    case OP_POSUPTOI:
1381
75.4M
    case OP_NOTSTAR:
1382
75.4M
    case OP_NOTMINSTAR:
1383
75.4M
    case OP_NOTPLUS:
1384
75.5M
    case OP_NOTMINPLUS:
1385
75.5M
    case OP_NOTQUERY:
1386
75.5M
    case OP_NOTMINQUERY:
1387
75.6M
    case OP_NOTUPTO:
1388
75.6M
    case OP_NOTMINUPTO:
1389
75.6M
    case OP_NOTEXACT:
1390
75.6M
    case OP_NOTPOSSTAR:
1391
75.6M
    case OP_NOTPOSPLUS:
1392
75.6M
    case OP_NOTPOSQUERY:
1393
75.6M
    case OP_NOTPOSUPTO:
1394
75.6M
    case OP_NOTSTARI:
1395
75.7M
    case OP_NOTMINSTARI:
1396
75.7M
    case OP_NOTPLUSI:
1397
75.7M
    case OP_NOTMINPLUSI:
1398
75.8M
    case OP_NOTQUERYI:
1399
75.8M
    case OP_NOTMINQUERYI:
1400
75.8M
    case OP_NOTUPTOI:
1401
75.8M
    case OP_NOTMINUPTOI:
1402
75.8M
    case OP_NOTEXACTI:
1403
75.8M
    case OP_NOTPOSSTARI:
1404
75.8M
    case OP_NOTPOSPLUSI:
1405
75.8M
    case OP_NOTPOSQUERYI:
1406
75.8M
    case OP_NOTPOSUPTOI:
1407
75.8M
    if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
1408
75.8M
    break;
1409
151M
    }
1410
#else
1411
  (void)(utf);  /* Keep compiler happy by referencing function argument */
1412
#endif  /* SUPPORT_WIDE_CHARS */
1413
1.41G
  }
1414
530k
}
_pcre2_auto_possessify_8
Line
Count
Source
1173
169k
{
1174
169k
PCRE2_UCHAR c;
1175
169k
PCRE2_SPTR end;
1176
169k
PCRE2_UCHAR *repeat_opcode;
1177
169k
uint32_t list[MAX_LIST];
1178
169k
int rec_limit = 1000;  /* Was 10,000 but clang+ASAN uses a lot of stack. */
1179
169k
BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
1180
169k
BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
1181
1182
169k
for (;;)
1183
644M
  {
1184
644M
  c = *code;
1185
1186
  /* LCOV_EXCL_START */
1187
644M
  if (c >= OP_TABLE_LENGTH)
1188
0
    {
1189
0
    PCRE2_DEBUG_UNREACHABLE();
1190
0
    return -1;   /* Something gone wrong */
1191
0
    }
1192
  /* LCOV_EXCL_STOP */
1193
1194
644M
  if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1195
34.8M
    {
1196
34.8M
    c -= get_repeat_base(c) - OP_STAR;
1197
34.8M
    end = (c <= OP_MINUPTO) ?
1198
34.8M
      get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
1199
34.8M
    list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
1200
1201
34.8M
    if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
1202
33.1M
        &rec_limit))
1203
5.36M
      {
1204
5.36M
      switch(c)
1205
5.36M
        {
1206
1.35M
        case OP_STAR:
1207
1.35M
        *code += OP_POSSTAR - OP_STAR;
1208
1.35M
        break;
1209
1210
256k
        case OP_MINSTAR:
1211
256k
        *code += OP_POSSTAR - OP_MINSTAR;
1212
256k
        break;
1213
1214
1.13M
        case OP_PLUS:
1215
1.13M
        *code += OP_POSPLUS - OP_PLUS;
1216
1.13M
        break;
1217
1218
210k
        case OP_MINPLUS:
1219
210k
        *code += OP_POSPLUS - OP_MINPLUS;
1220
210k
        break;
1221
1222
1.84M
        case OP_QUERY:
1223
1.84M
        *code += OP_POSQUERY - OP_QUERY;
1224
1.84M
        break;
1225
1226
338k
        case OP_MINQUERY:
1227
338k
        *code += OP_POSQUERY - OP_MINQUERY;
1228
338k
        break;
1229
1230
195k
        case OP_UPTO:
1231
195k
        *code += OP_POSUPTO - OP_UPTO;
1232
195k
        break;
1233
1234
41.1k
        case OP_MINUPTO:
1235
41.1k
        *code += OP_POSUPTO - OP_MINUPTO;
1236
41.1k
        break;
1237
5.36M
        }
1238
5.36M
      }
1239
34.8M
    c = *code;
1240
34.8M
    }
1241
609M
  else if (c == OP_CLASS || c == OP_NCLASS
1242
604M
#ifdef SUPPORT_WIDE_CHARS
1243
604M
           || c == OP_XCLASS || c == OP_ECLASS
1244
609M
#endif
1245
609M
           )
1246
5.47M
    {
1247
5.47M
#ifdef SUPPORT_WIDE_CHARS
1248
5.47M
    if (c == OP_XCLASS || c == OP_ECLASS)
1249
1.09M
      repeat_opcode = code + GET(code, 1);
1250
4.37M
    else
1251
4.37M
#endif
1252
4.37M
      repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
1253
1254
5.47M
    c = *repeat_opcode;
1255
5.47M
    if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1256
2.05M
      {
1257
      /* The return from get_chr_property_list() will never be NULL when
1258
      *code (aka c) is one of the four class opcodes. However, gcc with
1259
      -fanalyzer notes that a NULL return is possible, and grumbles. Hence we
1260
      put in a check. */
1261
1262
2.05M
      end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
1263
2.05M
      list[1] = (c & 1) == 0;
1264
1265
2.05M
      if (end != NULL &&
1266
2.05M
          compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
1267
307k
        {
1268
307k
        switch (c)
1269
307k
          {
1270
48.3k
          case OP_CRSTAR:
1271
59.3k
          case OP_CRMINSTAR:
1272
59.3k
          *repeat_opcode = OP_CRPOSSTAR;
1273
59.3k
          break;
1274
1275
63.1k
          case OP_CRPLUS:
1276
72.3k
          case OP_CRMINPLUS:
1277
72.3k
          *repeat_opcode = OP_CRPOSPLUS;
1278
72.3k
          break;
1279
1280
56.4k
          case OP_CRQUERY:
1281
64.4k
          case OP_CRMINQUERY:
1282
64.4k
          *repeat_opcode = OP_CRPOSQUERY;
1283
64.4k
          break;
1284
1285
74.8k
          case OP_CRRANGE:
1286
111k
          case OP_CRMINRANGE:
1287
111k
          *repeat_opcode = OP_CRPOSRANGE;
1288
111k
          break;
1289
307k
          }
1290
307k
        }
1291
2.05M
      }
1292
5.47M
    c = *code;
1293
5.47M
    }
1294
1295
644M
  switch(c)
1296
644M
    {
1297
169k
    case OP_END:
1298
169k
    return 0;
1299
1300
2.75M
    case OP_TYPESTAR:
1301
3.20M
    case OP_TYPEMINSTAR:
1302
5.03M
    case OP_TYPEPLUS:
1303
5.53M
    case OP_TYPEMINPLUS:
1304
7.46M
    case OP_TYPEQUERY:
1305
8.01M
    case OP_TYPEMINQUERY:
1306
8.27M
    case OP_TYPEPOSSTAR:
1307
8.55M
    case OP_TYPEPOSPLUS:
1308
8.76M
    case OP_TYPEPOSQUERY:
1309
8.76M
    if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1310
8.76M
    break;
1311
1312
286k
    case OP_TYPEUPTO:
1313
335k
    case OP_TYPEMINUPTO:
1314
559k
    case OP_TYPEEXACT:
1315
686k
    case OP_TYPEPOSUPTO:
1316
686k
    if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
1317
90.8k
      code += 2;
1318
686k
    break;
1319
1320
37.8k
    case OP_CALLOUT_STR:
1321
37.8k
    code += GET(code, 1 + 2*LINK_SIZE);
1322
37.8k
    break;
1323
1324
0
#ifdef SUPPORT_WIDE_CHARS
1325
803k
    case OP_XCLASS:
1326
1.09M
    case OP_ECLASS:
1327
1.09M
    code += GET(code, 1);
1328
1.09M
    break;
1329
0
#endif
1330
1331
354k
    case OP_MARK:
1332
383k
    case OP_COMMIT_ARG:
1333
405k
    case OP_PRUNE_ARG:
1334
730k
    case OP_SKIP_ARG:
1335
790k
    case OP_THEN_ARG:
1336
790k
    code += code[1];
1337
790k
    break;
1338
644M
    }
1339
1340
  /* Add in the fixed length from the table */
1341
1342
643M
  code += PRIV(OP_lengths)[c];
1343
1344
  /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
1345
  followed by a multi-byte character. The length in the table is a minimum, so
1346
  we have to arrange to skip the extra code units. */
1347
1348
643M
#ifdef MAYBE_UTF_MULTI
1349
643M
  if (utf) switch(c)
1350
56.5M
    {
1351
9.35M
    case OP_CHAR:
1352
23.5M
    case OP_CHARI:
1353
23.6M
    case OP_NOT:
1354
23.7M
    case OP_NOTI:
1355
23.8M
    case OP_STAR:
1356
23.9M
    case OP_MINSTAR:
1357
24.0M
    case OP_PLUS:
1358
24.1M
    case OP_MINPLUS:
1359
24.2M
    case OP_QUERY:
1360
24.3M
    case OP_MINQUERY:
1361
24.4M
    case OP_UPTO:
1362
24.4M
    case OP_MINUPTO:
1363
24.4M
    case OP_EXACT:
1364
24.5M
    case OP_POSSTAR:
1365
24.5M
    case OP_POSPLUS:
1366
24.6M
    case OP_POSQUERY:
1367
24.6M
    case OP_POSUPTO:
1368
24.6M
    case OP_STARI:
1369
24.9M
    case OP_MINSTARI:
1370
24.9M
    case OP_PLUSI:
1371
25.0M
    case OP_MINPLUSI:
1372
25.2M
    case OP_QUERYI:
1373
25.4M
    case OP_MINQUERYI:
1374
25.4M
    case OP_UPTOI:
1375
25.4M
    case OP_MINUPTOI:
1376
25.4M
    case OP_EXACTI:
1377
25.6M
    case OP_POSSTARI:
1378
25.6M
    case OP_POSPLUSI:
1379
25.7M
    case OP_POSQUERYI:
1380
25.8M
    case OP_POSUPTOI:
1381
25.8M
    case OP_NOTSTAR:
1382
25.8M
    case OP_NOTMINSTAR:
1383
25.8M
    case OP_NOTPLUS:
1384
25.9M
    case OP_NOTMINPLUS:
1385
25.9M
    case OP_NOTQUERY:
1386
25.9M
    case OP_NOTMINQUERY:
1387
25.9M
    case OP_NOTUPTO:
1388
26.0M
    case OP_NOTMINUPTO:
1389
26.0M
    case OP_NOTEXACT:
1390
26.0M
    case OP_NOTPOSSTAR:
1391
26.0M
    case OP_NOTPOSPLUS:
1392
26.0M
    case OP_NOTPOSQUERY:
1393
26.0M
    case OP_NOTPOSUPTO:
1394
26.0M
    case OP_NOTSTARI:
1395
26.0M
    case OP_NOTMINSTARI:
1396
26.0M
    case OP_NOTPLUSI:
1397
26.1M
    case OP_NOTMINPLUSI:
1398
26.1M
    case OP_NOTQUERYI:
1399
26.1M
    case OP_NOTMINQUERYI:
1400
26.1M
    case OP_NOTUPTOI:
1401
26.1M
    case OP_NOTMINUPTOI:
1402
26.1M
    case OP_NOTEXACTI:
1403
26.1M
    case OP_NOTPOSSTARI:
1404
26.1M
    case OP_NOTPOSPLUSI:
1405
26.1M
    case OP_NOTPOSQUERYI:
1406
26.1M
    case OP_NOTPOSUPTOI:
1407
26.1M
    if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
1408
26.1M
    break;
1409
56.5M
    }
1410
#else
1411
  (void)(utf);  /* Keep compiler happy by referencing function argument */
1412
#endif  /* SUPPORT_WIDE_CHARS */
1413
643M
  }
1414
169k
}
_pcre2_auto_possessify_32
Line
Count
Source
1173
179k
{
1174
179k
PCRE2_UCHAR c;
1175
179k
PCRE2_SPTR end;
1176
179k
PCRE2_UCHAR *repeat_opcode;
1177
179k
uint32_t list[MAX_LIST];
1178
179k
int rec_limit = 1000;  /* Was 10,000 but clang+ASAN uses a lot of stack. */
1179
179k
BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
1180
179k
BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
1181
1182
179k
for (;;)
1183
343M
  {
1184
343M
  c = *code;
1185
1186
  /* LCOV_EXCL_START */
1187
343M
  if (c >= OP_TABLE_LENGTH)
1188
0
    {
1189
0
    PCRE2_DEBUG_UNREACHABLE();
1190
0
    return -1;   /* Something gone wrong */
1191
0
    }
1192
  /* LCOV_EXCL_STOP */
1193
1194
343M
  if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1195
10.9M
    {
1196
10.9M
    c -= get_repeat_base(c) - OP_STAR;
1197
10.9M
    end = (c <= OP_MINUPTO) ?
1198
10.9M
      get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
1199
10.9M
    list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
1200
1201
10.9M
    if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
1202
10.4M
        &rec_limit))
1203
3.22M
      {
1204
3.22M
      switch(c)
1205
3.22M
        {
1206
765k
        case OP_STAR:
1207
765k
        *code += OP_POSSTAR - OP_STAR;
1208
765k
        break;
1209
1210
157k
        case OP_MINSTAR:
1211
157k
        *code += OP_POSSTAR - OP_MINSTAR;
1212
157k
        break;
1213
1214
773k
        case OP_PLUS:
1215
773k
        *code += OP_POSPLUS - OP_PLUS;
1216
773k
        break;
1217
1218
141k
        case OP_MINPLUS:
1219
141k
        *code += OP_POSPLUS - OP_MINPLUS;
1220
141k
        break;
1221
1222
977k
        case OP_QUERY:
1223
977k
        *code += OP_POSQUERY - OP_QUERY;
1224
977k
        break;
1225
1226
182k
        case OP_MINQUERY:
1227
182k
        *code += OP_POSQUERY - OP_MINQUERY;
1228
182k
        break;
1229
1230
200k
        case OP_UPTO:
1231
200k
        *code += OP_POSUPTO - OP_UPTO;
1232
200k
        break;
1233
1234
24.6k
        case OP_MINUPTO:
1235
24.6k
        *code += OP_POSUPTO - OP_MINUPTO;
1236
24.6k
        break;
1237
3.22M
        }
1238
3.22M
      }
1239
10.9M
    c = *code;
1240
10.9M
    }
1241
332M
  else if (c == OP_CLASS || c == OP_NCLASS
1242
332M
#ifdef SUPPORT_WIDE_CHARS
1243
332M
           || c == OP_XCLASS || c == OP_ECLASS
1244
332M
#endif
1245
332M
           )
1246
2.24M
    {
1247
2.24M
#ifdef SUPPORT_WIDE_CHARS
1248
2.24M
    if (c == OP_XCLASS || c == OP_ECLASS)
1249
1.85M
      repeat_opcode = code + GET(code, 1);
1250
390k
    else
1251
390k
#endif
1252
390k
      repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
1253
1254
2.24M
    c = *repeat_opcode;
1255
2.24M
    if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1256
540k
      {
1257
      /* The return from get_chr_property_list() will never be NULL when
1258
      *code (aka c) is one of the four class opcodes. However, gcc with
1259
      -fanalyzer notes that a NULL return is possible, and grumbles. Hence we
1260
      put in a check. */
1261
1262
540k
      end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
1263
540k
      list[1] = (c & 1) == 0;
1264
1265
540k
      if (end != NULL &&
1266
540k
          compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
1267
235k
        {
1268
235k
        switch (c)
1269
235k
          {
1270
51.4k
          case OP_CRSTAR:
1271
64.6k
          case OP_CRMINSTAR:
1272
64.6k
          *repeat_opcode = OP_CRPOSSTAR;
1273
64.6k
          break;
1274
1275
41.5k
          case OP_CRPLUS:
1276
46.4k
          case OP_CRMINPLUS:
1277
46.4k
          *repeat_opcode = OP_CRPOSPLUS;
1278
46.4k
          break;
1279
1280
48.2k
          case OP_CRQUERY:
1281
57.6k
          case OP_CRMINQUERY:
1282
57.6k
          *repeat_opcode = OP_CRPOSQUERY;
1283
57.6k
          break;
1284
1285
58.1k
          case OP_CRRANGE:
1286
67.3k
          case OP_CRMINRANGE:
1287
67.3k
          *repeat_opcode = OP_CRPOSRANGE;
1288
67.3k
          break;
1289
235k
          }
1290
235k
        }
1291
540k
      }
1292
2.24M
    c = *code;
1293
2.24M
    }
1294
1295
343M
  switch(c)
1296
343M
    {
1297
179k
    case OP_END:
1298
179k
    return 0;
1299
1300
789k
    case OP_TYPESTAR:
1301
977k
    case OP_TYPEMINSTAR:
1302
1.53M
    case OP_TYPEPLUS:
1303
1.73M
    case OP_TYPEMINPLUS:
1304
2.03M
    case OP_TYPEQUERY:
1305
2.08M
    case OP_TYPEMINQUERY:
1306
2.26M
    case OP_TYPEPOSSTAR:
1307
2.46M
    case OP_TYPEPOSPLUS:
1308
2.55M
    case OP_TYPEPOSQUERY:
1309
2.55M
    if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1310
2.55M
    break;
1311
1312
155k
    case OP_TYPEUPTO:
1313
230k
    case OP_TYPEMINUPTO:
1314
318k
    case OP_TYPEEXACT:
1315
444k
    case OP_TYPEPOSUPTO:
1316
444k
    if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
1317
23.4k
      code += 2;
1318
444k
    break;
1319
1320
22.0k
    case OP_CALLOUT_STR:
1321
22.0k
    code += GET(code, 1 + 2*LINK_SIZE);
1322
22.0k
    break;
1323
1324
0
#ifdef SUPPORT_WIDE_CHARS
1325
1.68M
    case OP_XCLASS:
1326
1.85M
    case OP_ECLASS:
1327
1.85M
    code += GET(code, 1);
1328
1.85M
    break;
1329
0
#endif
1330
1331
123k
    case OP_MARK:
1332
167k
    case OP_COMMIT_ARG:
1333
190k
    case OP_PRUNE_ARG:
1334
315k
    case OP_SKIP_ARG:
1335
337k
    case OP_THEN_ARG:
1336
337k
    code += code[1];
1337
337k
    break;
1338
343M
    }
1339
1340
  /* Add in the fixed length from the table */
1341
1342
343M
  code += PRIV(OP_lengths)[c];
1343
1344
  /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
1345
  followed by a multi-byte character. The length in the table is a minimum, so
1346
  we have to arrange to skip the extra code units. */
1347
1348
#ifdef MAYBE_UTF_MULTI
1349
  if (utf) switch(c)
1350
    {
1351
    case OP_CHAR:
1352
    case OP_CHARI:
1353
    case OP_NOT:
1354
    case OP_NOTI:
1355
    case OP_STAR:
1356
    case OP_MINSTAR:
1357
    case OP_PLUS:
1358
    case OP_MINPLUS:
1359
    case OP_QUERY:
1360
    case OP_MINQUERY:
1361
    case OP_UPTO:
1362
    case OP_MINUPTO:
1363
    case OP_EXACT:
1364
    case OP_POSSTAR:
1365
    case OP_POSPLUS:
1366
    case OP_POSQUERY:
1367
    case OP_POSUPTO:
1368
    case OP_STARI:
1369
    case OP_MINSTARI:
1370
    case OP_PLUSI:
1371
    case OP_MINPLUSI:
1372
    case OP_QUERYI:
1373
    case OP_MINQUERYI:
1374
    case OP_UPTOI:
1375
    case OP_MINUPTOI:
1376
    case OP_EXACTI:
1377
    case OP_POSSTARI:
1378
    case OP_POSPLUSI:
1379
    case OP_POSQUERYI:
1380
    case OP_POSUPTOI:
1381
    case OP_NOTSTAR:
1382
    case OP_NOTMINSTAR:
1383
    case OP_NOTPLUS:
1384
    case OP_NOTMINPLUS:
1385
    case OP_NOTQUERY:
1386
    case OP_NOTMINQUERY:
1387
    case OP_NOTUPTO:
1388
    case OP_NOTMINUPTO:
1389
    case OP_NOTEXACT:
1390
    case OP_NOTPOSSTAR:
1391
    case OP_NOTPOSPLUS:
1392
    case OP_NOTPOSQUERY:
1393
    case OP_NOTPOSUPTO:
1394
    case OP_NOTSTARI:
1395
    case OP_NOTMINSTARI:
1396
    case OP_NOTPLUSI:
1397
    case OP_NOTMINPLUSI:
1398
    case OP_NOTQUERYI:
1399
    case OP_NOTMINQUERYI:
1400
    case OP_NOTUPTOI:
1401
    case OP_NOTMINUPTOI:
1402
    case OP_NOTEXACTI:
1403
    case OP_NOTPOSSTARI:
1404
    case OP_NOTPOSPLUSI:
1405
    case OP_NOTPOSQUERYI:
1406
    case OP_NOTPOSUPTOI:
1407
    if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
1408
    break;
1409
    }
1410
#else
1411
343M
  (void)(utf);  /* Keep compiler happy by referencing function argument */
1412
343M
#endif  /* SUPPORT_WIDE_CHARS */
1413
343M
  }
1414
179k
}
_pcre2_auto_possessify_16
Line
Count
Source
1173
181k
{
1174
181k
PCRE2_UCHAR c;
1175
181k
PCRE2_SPTR end;
1176
181k
PCRE2_UCHAR *repeat_opcode;
1177
181k
uint32_t list[MAX_LIST];
1178
181k
int rec_limit = 1000;  /* Was 10,000 but clang+ASAN uses a lot of stack. */
1179
181k
BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
1180
181k
BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
1181
1182
181k
for (;;)
1183
424M
  {
1184
424M
  c = *code;
1185
1186
  /* LCOV_EXCL_START */
1187
424M
  if (c >= OP_TABLE_LENGTH)
1188
0
    {
1189
0
    PCRE2_DEBUG_UNREACHABLE();
1190
0
    return -1;   /* Something gone wrong */
1191
0
    }
1192
  /* LCOV_EXCL_STOP */
1193
1194
424M
  if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1195
19.2M
    {
1196
19.2M
    c -= get_repeat_base(c) - OP_STAR;
1197
19.2M
    end = (c <= OP_MINUPTO) ?
1198
19.2M
      get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
1199
19.2M
    list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
1200
1201
19.2M
    if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
1202
18.4M
        &rec_limit))
1203
3.60M
      {
1204
3.60M
      switch(c)
1205
3.60M
        {
1206
746k
        case OP_STAR:
1207
746k
        *code += OP_POSSTAR - OP_STAR;
1208
746k
        break;
1209
1210
140k
        case OP_MINSTAR:
1211
140k
        *code += OP_POSSTAR - OP_MINSTAR;
1212
140k
        break;
1213
1214
899k
        case OP_PLUS:
1215
899k
        *code += OP_POSPLUS - OP_PLUS;
1216
899k
        break;
1217
1218
163k
        case OP_MINPLUS:
1219
163k
        *code += OP_POSPLUS - OP_MINPLUS;
1220
163k
        break;
1221
1222
1.11M
        case OP_QUERY:
1223
1.11M
        *code += OP_POSQUERY - OP_QUERY;
1224
1.11M
        break;
1225
1226
265k
        case OP_MINQUERY:
1227
265k
        *code += OP_POSQUERY - OP_MINQUERY;
1228
265k
        break;
1229
1230
216k
        case OP_UPTO:
1231
216k
        *code += OP_POSUPTO - OP_UPTO;
1232
216k
        break;
1233
1234
60.5k
        case OP_MINUPTO:
1235
60.5k
        *code += OP_POSUPTO - OP_MINUPTO;
1236
60.5k
        break;
1237
3.60M
        }
1238
3.60M
      }
1239
19.2M
    c = *code;
1240
19.2M
    }
1241
405M
  else if (c == OP_CLASS || c == OP_NCLASS
1242
404M
#ifdef SUPPORT_WIDE_CHARS
1243
404M
           || c == OP_XCLASS || c == OP_ECLASS
1244
405M
#endif
1245
405M
           )
1246
3.56M
    {
1247
3.56M
#ifdef SUPPORT_WIDE_CHARS
1248
3.56M
    if (c == OP_XCLASS || c == OP_ECLASS)
1249
2.83M
      repeat_opcode = code + GET(code, 1);
1250
728k
    else
1251
728k
#endif
1252
728k
      repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
1253
1254
3.56M
    c = *repeat_opcode;
1255
3.56M
    if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1256
1.12M
      {
1257
      /* The return from get_chr_property_list() will never be NULL when
1258
      *code (aka c) is one of the four class opcodes. However, gcc with
1259
      -fanalyzer notes that a NULL return is possible, and grumbles. Hence we
1260
      put in a check. */
1261
1262
1.12M
      end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
1263
1.12M
      list[1] = (c & 1) == 0;
1264
1265
1.12M
      if (end != NULL &&
1266
1.12M
          compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
1267
400k
        {
1268
400k
        switch (c)
1269
400k
          {
1270
58.5k
          case OP_CRSTAR:
1271
68.4k
          case OP_CRMINSTAR:
1272
68.4k
          *repeat_opcode = OP_CRPOSSTAR;
1273
68.4k
          break;
1274
1275
118k
          case OP_CRPLUS:
1276
147k
          case OP_CRMINPLUS:
1277
147k
          *repeat_opcode = OP_CRPOSPLUS;
1278
147k
          break;
1279
1280
61.5k
          case OP_CRQUERY:
1281
75.3k
          case OP_CRMINQUERY:
1282
75.3k
          *repeat_opcode = OP_CRPOSQUERY;
1283
75.3k
          break;
1284
1285
88.1k
          case OP_CRRANGE:
1286
108k
          case OP_CRMINRANGE:
1287
108k
          *repeat_opcode = OP_CRPOSRANGE;
1288
108k
          break;
1289
400k
          }
1290
400k
        }
1291
1.12M
      }
1292
3.56M
    c = *code;
1293
3.56M
    }
1294
1295
424M
  switch(c)
1296
424M
    {
1297
181k
    case OP_END:
1298
181k
    return 0;
1299
1300
803k
    case OP_TYPESTAR:
1301
1.00M
    case OP_TYPEMINSTAR:
1302
3.09M
    case OP_TYPEPLUS:
1303
3.35M
    case OP_TYPEMINPLUS:
1304
6.83M
    case OP_TYPEQUERY:
1305
7.67M
    case OP_TYPEMINQUERY:
1306
7.92M
    case OP_TYPEPOSSTAR:
1307
8.21M
    case OP_TYPEPOSPLUS:
1308
8.37M
    case OP_TYPEPOSQUERY:
1309
8.37M
    if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1310
8.37M
    break;
1311
1312
214k
    case OP_TYPEUPTO:
1313
235k
    case OP_TYPEMINUPTO:
1314
435k
    case OP_TYPEEXACT:
1315
505k
    case OP_TYPEPOSUPTO:
1316
505k
    if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
1317
108k
      code += 2;
1318
505k
    break;
1319
1320
60.2k
    case OP_CALLOUT_STR:
1321
60.2k
    code += GET(code, 1 + 2*LINK_SIZE);
1322
60.2k
    break;
1323
1324
0
#ifdef SUPPORT_WIDE_CHARS
1325
2.62M
    case OP_XCLASS:
1326
2.83M
    case OP_ECLASS:
1327
2.83M
    code += GET(code, 1);
1328
2.83M
    break;
1329
0
#endif
1330
1331
173k
    case OP_MARK:
1332
194k
    case OP_COMMIT_ARG:
1333
222k
    case OP_PRUNE_ARG:
1334
408k
    case OP_SKIP_ARG:
1335
472k
    case OP_THEN_ARG:
1336
472k
    code += code[1];
1337
472k
    break;
1338
424M
    }
1339
1340
  /* Add in the fixed length from the table */
1341
1342
424M
  code += PRIV(OP_lengths)[c];
1343
1344
  /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
1345
  followed by a multi-byte character. The length in the table is a minimum, so
1346
  we have to arrange to skip the extra code units. */
1347
1348
424M
#ifdef MAYBE_UTF_MULTI
1349
424M
  if (utf) switch(c)
1350
95.1M
    {
1351
18.7M
    case OP_CHAR:
1352
47.5M
    case OP_CHARI:
1353
47.5M
    case OP_NOT:
1354
47.6M
    case OP_NOTI:
1355
47.6M
    case OP_STAR:
1356
47.7M
    case OP_MINSTAR:
1357
47.8M
    case OP_PLUS:
1358
47.9M
    case OP_MINPLUS:
1359
48.1M
    case OP_QUERY:
1360
48.4M
    case OP_MINQUERY:
1361
48.4M
    case OP_UPTO:
1362
48.4M
    case OP_MINUPTO:
1363
48.4M
    case OP_EXACT:
1364
48.5M
    case OP_POSSTAR:
1365
48.5M
    case OP_POSPLUS:
1366
48.6M
    case OP_POSQUERY:
1367
48.7M
    case OP_POSUPTO:
1368
48.7M
    case OP_STARI:
1369
48.8M
    case OP_MINSTARI:
1370
48.9M
    case OP_PLUSI:
1371
49.0M
    case OP_MINPLUSI:
1372
49.1M
    case OP_QUERYI:
1373
49.2M
    case OP_MINQUERYI:
1374
49.2M
    case OP_UPTOI:
1375
49.2M
    case OP_MINUPTOI:
1376
49.2M
    case OP_EXACTI:
1377
49.3M
    case OP_POSSTARI:
1378
49.4M
    case OP_POSPLUSI:
1379
49.5M
    case OP_POSQUERYI:
1380
49.6M
    case OP_POSUPTOI:
1381
49.6M
    case OP_NOTSTAR:
1382
49.6M
    case OP_NOTMINSTAR:
1383
49.6M
    case OP_NOTPLUS:
1384
49.6M
    case OP_NOTMINPLUS:
1385
49.6M
    case OP_NOTQUERY:
1386
49.6M
    case OP_NOTMINQUERY:
1387
49.6M
    case OP_NOTUPTO:
1388
49.6M
    case OP_NOTMINUPTO:
1389
49.6M
    case OP_NOTEXACT:
1390
49.6M
    case OP_NOTPOSSTAR:
1391
49.6M
    case OP_NOTPOSPLUS:
1392
49.6M
    case OP_NOTPOSQUERY:
1393
49.6M
    case OP_NOTPOSUPTO:
1394
49.6M
    case OP_NOTSTARI:
1395
49.6M
    case OP_NOTMINSTARI:
1396
49.6M
    case OP_NOTPLUSI:
1397
49.6M
    case OP_NOTMINPLUSI:
1398
49.6M
    case OP_NOTQUERYI:
1399
49.6M
    case OP_NOTMINQUERYI:
1400
49.6M
    case OP_NOTUPTOI:
1401
49.7M
    case OP_NOTMINUPTOI:
1402
49.7M
    case OP_NOTEXACTI:
1403
49.7M
    case OP_NOTPOSSTARI:
1404
49.7M
    case OP_NOTPOSPLUSI:
1405
49.7M
    case OP_NOTPOSQUERYI:
1406
49.7M
    case OP_NOTPOSUPTOI:
1407
49.7M
    if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
1408
49.7M
    break;
1409
95.1M
    }
1410
#else
1411
  (void)(utf);  /* Keep compiler happy by referencing function argument */
1412
#endif  /* SUPPORT_WIDE_CHARS */
1413
424M
  }
1414
181k
}
1415
1416
/* End of pcre2_auto_possess.c */