Coverage Report

Created: 2026-03-31 07:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/pcre2-10.39/src/pcre2_jit_compile.c
Line
Count
Source
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
                    This module by Zoltan Herczeg
10
     Original API code Copyright (c) 1997-2012 University of Cambridge
11
          New API code Copyright (c) 2016-2019 University of Cambridge
12
13
-----------------------------------------------------------------------------
14
Redistribution and use in source and binary forms, with or without
15
modification, are permitted provided that the following conditions are met:
16
17
    * Redistributions of source code must retain the above copyright notice,
18
      this list of conditions and the following disclaimer.
19
20
    * Redistributions in binary form must reproduce the above copyright
21
      notice, this list of conditions and the following disclaimer in the
22
      documentation and/or other materials provided with the distribution.
23
24
    * Neither the name of the University of Cambridge nor the names of its
25
      contributors may be used to endorse or promote products derived from
26
      this software without specific prior written permission.
27
28
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
-----------------------------------------------------------------------------
40
*/
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#include "pcre2_internal.h"
47
48
#ifdef SUPPORT_JIT
49
50
/* All-in-one: Since we use the JIT compiler only from here,
51
we just include it. This way we don't need to touch the build
52
system files. */
53
54
#define SLJIT_CONFIG_AUTO 1
55
#define SLJIT_CONFIG_STATIC 1
56
#define SLJIT_VERBOSE 0
57
58
#ifdef PCRE2_DEBUG
59
#define SLJIT_DEBUG 1
60
#else
61
#define SLJIT_DEBUG 0
62
#endif
63
64
#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65
#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66
67
static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68
{
69
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70
return allocator->malloc(size, allocator->memory_data);
71
}
72
73
static void pcre2_jit_free(void *ptr, void *allocator_data)
74
{
75
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76
allocator->free(ptr, allocator->memory_data);
77
}
78
79
#include "sljit/sljitLir.c"
80
81
#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82
#error Unsupported architecture
83
#endif
84
85
/* Defines for debugging purposes. */
86
87
/* 1 - Use unoptimized capturing brackets.
88
   2 - Enable capture_last_ptr (includes option 1). */
89
/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90
91
/* 1 - Always have a control head. */
92
/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93
94
/* Allocate memory for the regex stack on the real machine stack.
95
Fast, but limited size. */
96
#define MACHINE_STACK_SIZE 32768
97
98
/* Growth rate for stack allocated by the OS. Should be the multiply
99
of page size. */
100
#define STACK_GROWTH_RATE 8192
101
102
/* Enable to check that the allocation could destroy temporaries. */
103
#if defined SLJIT_DEBUG && SLJIT_DEBUG
104
#define DESTROY_REGISTERS 1
105
#endif
106
107
/*
108
Short summary about the backtracking mechanism empolyed by the jit code generator:
109
110
The code generator follows the recursive nature of the PERL compatible regular
111
expressions. The basic blocks of regular expressions are condition checkers
112
whose execute different commands depending on the result of the condition check.
113
The relationship between the operators can be horizontal (concatenation) and
114
vertical (sub-expression) (See struct backtrack_common for more details).
115
116
  'ab' - 'a' and 'b' regexps are concatenated
117
  'a+' - 'a' is the sub-expression of the '+' operator
118
119
The condition checkers are boolean (true/false) checkers. Machine code is generated
120
for the checker itself and for the actions depending on the result of the checker.
121
The 'true' case is called as the matching path (expected path), and the other is called as
122
the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123
branches on the matching path.
124
125
 Greedy star operator (*) :
126
   Matching path: match happens.
127
   Backtrack path: match failed.
128
 Non-greedy star operator (*?) :
129
   Matching path: no need to perform a match.
130
   Backtrack path: match is required.
131
132
The following example shows how the code generated for a capturing bracket
133
with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134
we have the following regular expression:
135
136
   A(B|C)D
137
138
The generated code will be the following:
139
140
 A matching path
141
 '(' matching path (pushing arguments to the stack)
142
 B matching path
143
 ')' matching path (pushing arguments to the stack)
144
 D matching path
145
 return with successful match
146
147
 D backtrack path
148
 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149
 B backtrack path
150
 C expected path
151
 jump to D matching path
152
 C backtrack path
153
 A backtrack path
154
155
 Notice, that the order of backtrack code paths are the opposite of the fast
156
 code paths. In this way the topmost value on the stack is always belong
157
 to the current backtrack code path. The backtrack path must check
158
 whether there is a next alternative. If so, it needs to jump back to
159
 the matching path eventually. Otherwise it needs to clear out its own stack
160
 frame and continue the execution on the backtrack code paths.
161
*/
162
163
/*
164
Saved stack frames:
165
166
Atomic blocks and asserts require reloading the values of private data
167
when the backtrack mechanism performed. Because of OP_RECURSE, the data
168
are not necessarly known in compile time, thus we need a dynamic restore
169
mechanism.
170
171
The stack frames are stored in a chain list, and have the following format:
172
([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173
174
Thus we can restore the private data to a particular point in the stack.
175
*/
176
177
typedef struct jit_arguments {
178
  /* Pointers first. */
179
  struct sljit_stack *stack;
180
  PCRE2_SPTR str;
181
  PCRE2_SPTR begin;
182
  PCRE2_SPTR end;
183
  pcre2_match_data *match_data;
184
  PCRE2_SPTR startchar_ptr;
185
  PCRE2_UCHAR *mark_ptr;
186
  int (*callout)(pcre2_callout_block *, void *);
187
  void *callout_data;
188
  /* Everything else after. */
189
  sljit_uw offset_limit;
190
  sljit_u32 limit_match;
191
  sljit_u32 oveccount;
192
  sljit_u32 options;
193
} jit_arguments;
194
195
#define JIT_NUMBER_OF_COMPILE_MODES 3
196
197
typedef struct executable_functions {
198
  void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199
  void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200
  sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201
  sljit_u32 top_bracket;
202
  sljit_u32 limit_match;
203
} executable_functions;
204
205
typedef struct jump_list {
206
  struct sljit_jump *jump;
207
  struct jump_list *next;
208
} jump_list;
209
210
typedef struct stub_list {
211
  struct sljit_jump *start;
212
  struct sljit_label *quit;
213
  struct stub_list *next;
214
} stub_list;
215
216
enum frame_types {
217
  no_frame = -1,
218
  no_stack = -2
219
};
220
221
enum control_types {
222
  type_mark = 0,
223
  type_then_trap = 1
224
};
225
226
enum  early_fail_types {
227
  type_skip = 0,
228
  type_fail = 1,
229
  type_fail_range = 2
230
};
231
232
typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233
234
/* The following structure is the key data type for the recursive
235
code generator. It is allocated by compile_matchingpath, and contains
236
the arguments for compile_backtrackingpath. Must be the first member
237
of its descendants. */
238
typedef struct backtrack_common {
239
  /* Concatenation stack. */
240
  struct backtrack_common *prev;
241
  jump_list *nextbacktracks;
242
  /* Internal stack (for component operators). */
243
  struct backtrack_common *top;
244
  jump_list *topbacktracks;
245
  /* Opcode pointer. */
246
  PCRE2_SPTR cc;
247
} backtrack_common;
248
249
typedef struct assert_backtrack {
250
  backtrack_common common;
251
  jump_list *condfailed;
252
  /* Less than 0 if a frame is not needed. */
253
  int framesize;
254
  /* Points to our private memory word on the stack. */
255
  int private_data_ptr;
256
  /* For iterators. */
257
  struct sljit_label *matchingpath;
258
} assert_backtrack;
259
260
typedef struct bracket_backtrack {
261
  backtrack_common common;
262
  /* Where to coninue if an alternative is successfully matched. */
263
  struct sljit_label *alternative_matchingpath;
264
  /* For rmin and rmax iterators. */
265
  struct sljit_label *recursive_matchingpath;
266
  /* For greedy ? operator. */
267
  struct sljit_label *zero_matchingpath;
268
  /* Contains the branches of a failed condition. */
269
  union {
270
    /* Both for OP_COND, OP_SCOND. */
271
    jump_list *condfailed;
272
    assert_backtrack *assert;
273
    /* For OP_ONCE. Less than 0 if not needed. */
274
    int framesize;
275
    /* For brackets with >3 alternatives. */
276
    struct sljit_put_label *matching_put_label;
277
  } u;
278
  /* Points to our private memory word on the stack. */
279
  int private_data_ptr;
280
} bracket_backtrack;
281
282
typedef struct bracketpos_backtrack {
283
  backtrack_common common;
284
  /* Points to our private memory word on the stack. */
285
  int private_data_ptr;
286
  /* Reverting stack is needed. */
287
  int framesize;
288
  /* Allocated stack size. */
289
  int stacksize;
290
} bracketpos_backtrack;
291
292
typedef struct braminzero_backtrack {
293
  backtrack_common common;
294
  struct sljit_label *matchingpath;
295
} braminzero_backtrack;
296
297
typedef struct char_iterator_backtrack {
298
  backtrack_common common;
299
  /* Next iteration. */
300
  struct sljit_label *matchingpath;
301
  union {
302
    jump_list *backtracks;
303
    struct {
304
      unsigned int othercasebit;
305
      PCRE2_UCHAR chr;
306
      BOOL enabled;
307
    } charpos;
308
  } u;
309
} char_iterator_backtrack;
310
311
typedef struct ref_iterator_backtrack {
312
  backtrack_common common;
313
  /* Next iteration. */
314
  struct sljit_label *matchingpath;
315
} ref_iterator_backtrack;
316
317
typedef struct recurse_entry {
318
  struct recurse_entry *next;
319
  /* Contains the function entry label. */
320
  struct sljit_label *entry_label;
321
  /* Contains the function entry label. */
322
  struct sljit_label *backtrack_label;
323
  /* Collects the entry calls until the function is not created. */
324
  jump_list *entry_calls;
325
  /* Collects the backtrack calls until the function is not created. */
326
  jump_list *backtrack_calls;
327
  /* Points to the starting opcode. */
328
  sljit_sw start;
329
} recurse_entry;
330
331
typedef struct recurse_backtrack {
332
  backtrack_common common;
333
  /* Return to the matching path. */
334
  struct sljit_label *matchingpath;
335
  /* Recursive pattern. */
336
  recurse_entry *entry;
337
  /* Pattern is inlined. */
338
  BOOL inlined_pattern;
339
} recurse_backtrack;
340
341
#define OP_THEN_TRAP OP_TABLE_LENGTH
342
343
typedef struct then_trap_backtrack {
344
  backtrack_common common;
345
  /* If then_trap is not NULL, this structure contains the real
346
  then_trap for the backtracking path. */
347
  struct then_trap_backtrack *then_trap;
348
  /* Points to the starting opcode. */
349
  sljit_sw start;
350
  /* Exit point for the then opcodes of this alternative. */
351
  jump_list *quit;
352
  /* Frame size of the current alternative. */
353
  int framesize;
354
} then_trap_backtrack;
355
356
#define MAX_N_CHARS 12
357
#define MAX_DIFF_CHARS 5
358
359
typedef struct fast_forward_char_data {
360
  /* Number of characters in the chars array, 255 for any character. */
361
  sljit_u8 count;
362
  /* Number of last UTF-8 characters in the chars array. */
363
  sljit_u8 last_count;
364
  /* Available characters in the current position. */
365
  PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366
} fast_forward_char_data;
367
368
#define MAX_CLASS_RANGE_SIZE 4
369
#define MAX_CLASS_CHARS_SIZE 3
370
371
typedef struct compiler_common {
372
  /* The sljit ceneric compiler. */
373
  struct sljit_compiler *compiler;
374
  /* Compiled regular expression. */
375
  pcre2_real_code *re;
376
  /* First byte code. */
377
  PCRE2_SPTR start;
378
  /* Maps private data offset to each opcode. */
379
  sljit_s32 *private_data_ptrs;
380
  /* Chain list of read-only data ptrs. */
381
  void *read_only_data_head;
382
  /* Tells whether the capturing bracket is optimized. */
383
  sljit_u8 *optimized_cbracket;
384
  /* Tells whether the starting offset is a target of then. */
385
  sljit_u8 *then_offsets;
386
  /* Current position where a THEN must jump. */
387
  then_trap_backtrack *then_trap;
388
  /* Starting offset of private data for capturing brackets. */
389
  sljit_s32 cbra_ptr;
390
  /* Output vector starting point. Must be divisible by 2. */
391
  sljit_s32 ovector_start;
392
  /* Points to the starting character of the current match. */
393
  sljit_s32 start_ptr;
394
  /* Last known position of the requested byte. */
395
  sljit_s32 req_char_ptr;
396
  /* Head of the last recursion. */
397
  sljit_s32 recursive_head_ptr;
398
  /* First inspected character for partial matching.
399
     (Needed for avoiding zero length partial matches.) */
400
  sljit_s32 start_used_ptr;
401
  /* Starting pointer for partial soft matches. */
402
  sljit_s32 hit_start;
403
  /* Pointer of the match end position. */
404
  sljit_s32 match_end_ptr;
405
  /* Points to the marked string. */
406
  sljit_s32 mark_ptr;
407
  /* Recursive control verb management chain. */
408
  sljit_s32 control_head_ptr;
409
  /* Points to the last matched capture block index. */
410
  sljit_s32 capture_last_ptr;
411
  /* Fast forward skipping byte code pointer. */
412
  PCRE2_SPTR fast_forward_bc_ptr;
413
  /* Locals used by fast fail optimization. */
414
  sljit_s32 early_fail_start_ptr;
415
  sljit_s32 early_fail_end_ptr;
416
417
  /* Flipped and lower case tables. */
418
  const sljit_u8 *fcc;
419
  sljit_sw lcc;
420
  /* Mode can be PCRE2_JIT_COMPLETE and others. */
421
  int mode;
422
  /* TRUE, when empty match is accepted for partial matching. */
423
  BOOL allow_empty_partial;
424
  /* TRUE, when minlength is greater than 0. */
425
  BOOL might_be_empty;
426
  /* \K is found in the pattern. */
427
  BOOL has_set_som;
428
  /* (*SKIP:arg) is found in the pattern. */
429
  BOOL has_skip_arg;
430
  /* (*THEN) is found in the pattern. */
431
  BOOL has_then;
432
  /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
433
  BOOL has_skip_in_assert_back;
434
  /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
435
  BOOL local_quit_available;
436
  /* Currently in a positive assertion. */
437
  BOOL in_positive_assertion;
438
  /* Newline control. */
439
  int nltype;
440
  sljit_u32 nlmax;
441
  sljit_u32 nlmin;
442
  int newline;
443
  int bsr_nltype;
444
  sljit_u32 bsr_nlmax;
445
  sljit_u32 bsr_nlmin;
446
  /* Dollar endonly. */
447
  int endonly;
448
  /* Tables. */
449
  sljit_sw ctypes;
450
  /* Named capturing brackets. */
451
  PCRE2_SPTR name_table;
452
  sljit_sw name_count;
453
  sljit_sw name_entry_size;
454
455
  /* Labels and jump lists. */
456
  struct sljit_label *partialmatchlabel;
457
  struct sljit_label *quit_label;
458
  struct sljit_label *abort_label;
459
  struct sljit_label *accept_label;
460
  struct sljit_label *ff_newline_shortcut;
461
  stub_list *stubs;
462
  recurse_entry *entries;
463
  recurse_entry *currententry;
464
  jump_list *partialmatch;
465
  jump_list *quit;
466
  jump_list *positive_assertion_quit;
467
  jump_list *abort;
468
  jump_list *failed_match;
469
  jump_list *accept;
470
  jump_list *calllimit;
471
  jump_list *stackalloc;
472
  jump_list *revertframes;
473
  jump_list *wordboundary;
474
  jump_list *anynewline;
475
  jump_list *hspace;
476
  jump_list *vspace;
477
  jump_list *casefulcmp;
478
  jump_list *caselesscmp;
479
  jump_list *reset_match;
480
  BOOL unset_backref;
481
  BOOL alt_circumflex;
482
#ifdef SUPPORT_UNICODE
483
  BOOL utf;
484
  BOOL invalid_utf;
485
  BOOL ucp;
486
  /* Points to saving area for iref. */
487
  sljit_s32 iref_ptr;
488
  jump_list *getucd;
489
  jump_list *getucdtype;
490
#if PCRE2_CODE_UNIT_WIDTH == 8
491
  jump_list *utfreadchar;
492
  jump_list *utfreadtype8;
493
  jump_list *utfpeakcharback;
494
#endif
495
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
496
  jump_list *utfreadchar_invalid;
497
  jump_list *utfreadnewline_invalid;
498
  jump_list *utfmoveback_invalid;
499
  jump_list *utfpeakcharback_invalid;
500
#endif
501
#endif /* SUPPORT_UNICODE */
502
} compiler_common;
503
504
/* For byte_sequence_compare. */
505
506
typedef struct compare_context {
507
  int length;
508
  int sourcereg;
509
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
510
  int ucharptr;
511
  union {
512
    sljit_s32 asint;
513
    sljit_u16 asushort;
514
#if PCRE2_CODE_UNIT_WIDTH == 8
515
    sljit_u8 asbyte;
516
    sljit_u8 asuchars[4];
517
#elif PCRE2_CODE_UNIT_WIDTH == 16
518
    sljit_u16 asuchars[2];
519
#elif PCRE2_CODE_UNIT_WIDTH == 32
520
    sljit_u32 asuchars[1];
521
#endif
522
  } c;
523
  union {
524
    sljit_s32 asint;
525
    sljit_u16 asushort;
526
#if PCRE2_CODE_UNIT_WIDTH == 8
527
    sljit_u8 asbyte;
528
    sljit_u8 asuchars[4];
529
#elif PCRE2_CODE_UNIT_WIDTH == 16
530
    sljit_u16 asuchars[2];
531
#elif PCRE2_CODE_UNIT_WIDTH == 32
532
    sljit_u32 asuchars[1];
533
#endif
534
  } oc;
535
#endif
536
} compare_context;
537
538
/* Undefine sljit macros. */
539
#undef CMP
540
541
/* Used for accessing the elements of the stack. */
542
#define STACK(i)      ((i) * (int)sizeof(sljit_sw))
543
544
#ifdef SLJIT_PREF_SHIFT_REG
545
#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
546
/* Nothing. */
547
#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
548
#define SHIFT_REG_IS_R3
549
#else
550
#error "Unsupported shift register"
551
#endif
552
#endif
553
554
#define TMP1          SLJIT_R0
555
#ifdef SHIFT_REG_IS_R3
556
#define TMP2          SLJIT_R3
557
#define TMP3          SLJIT_R2
558
#else
559
#define TMP2          SLJIT_R2
560
#define TMP3          SLJIT_R3
561
#endif
562
#define STR_PTR       SLJIT_R1
563
#define STR_END       SLJIT_S0
564
#define STACK_TOP     SLJIT_S1
565
#define STACK_LIMIT   SLJIT_S2
566
#define COUNT_MATCH   SLJIT_S3
567
#define ARGUMENTS     SLJIT_S4
568
#define RETURN_ADDR   SLJIT_R4
569
570
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
571
#define HAS_VIRTUAL_REGISTERS 1
572
#else
573
#define HAS_VIRTUAL_REGISTERS 0
574
#endif
575
576
/* Local space layout. */
577
/* These two locals can be used by the current opcode. */
578
#define LOCALS0          (0 * sizeof(sljit_sw))
579
#define LOCALS1          (1 * sizeof(sljit_sw))
580
/* Two local variables for possessive quantifiers (char1 cannot use them). */
581
#define POSSESSIVE0      (2 * sizeof(sljit_sw))
582
#define POSSESSIVE1      (3 * sizeof(sljit_sw))
583
/* Max limit of recursions. */
584
#define LIMIT_MATCH      (4 * sizeof(sljit_sw))
585
/* The output vector is stored on the stack, and contains pointers
586
to characters. The vector data is divided into two groups: the first
587
group contains the start / end character pointers, and the second is
588
the start pointers when the end of the capturing group has not yet reached. */
589
#define OVECTOR_START    (common->ovector_start)
590
#define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
591
#define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
592
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
593
594
#if PCRE2_CODE_UNIT_WIDTH == 8
595
#define MOV_UCHAR  SLJIT_MOV_U8
596
#define IN_UCHARS(x) (x)
597
#elif PCRE2_CODE_UNIT_WIDTH == 16
598
#define MOV_UCHAR  SLJIT_MOV_U16
599
#define UCHAR_SHIFT (1)
600
#define IN_UCHARS(x) ((x) * 2)
601
#elif PCRE2_CODE_UNIT_WIDTH == 32
602
#define MOV_UCHAR  SLJIT_MOV_U32
603
#define UCHAR_SHIFT (2)
604
#define IN_UCHARS(x) ((x) * 4)
605
#else
606
#error Unsupported compiling mode
607
#endif
608
609
/* Shortcuts. */
610
#define DEFINE_COMPILER \
611
  struct sljit_compiler *compiler = common->compiler
612
#define OP1(op, dst, dstw, src, srcw) \
613
  sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
614
#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
615
  sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
616
#define OP_SRC(op, src, srcw) \
617
  sljit_emit_op_src(compiler, (op), (src), (srcw))
618
#define LABEL() \
619
  sljit_emit_label(compiler)
620
#define JUMP(type) \
621
  sljit_emit_jump(compiler, (type))
622
#define JUMPTO(type, label) \
623
  sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
624
#define JUMPHERE(jump) \
625
  sljit_set_label((jump), sljit_emit_label(compiler))
626
#define SET_LABEL(jump, label) \
627
  sljit_set_label((jump), (label))
628
#define CMP(type, src1, src1w, src2, src2w) \
629
  sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
630
#define CMPTO(type, src1, src1w, src2, src2w, label) \
631
  sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
632
#define OP_FLAGS(op, dst, dstw, type) \
633
  sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
634
#define CMOV(type, dst_reg, src, srcw) \
635
  sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
636
#define GET_LOCAL_BASE(dst, dstw, offset) \
637
  sljit_get_local_base(compiler, (dst), (dstw), (offset))
638
639
#define READ_CHAR_MAX 0x7fffffff
640
641
#define INVALID_UTF_CHAR -1
642
#define UNASSIGNED_UTF_CHAR 888
643
644
#if defined SUPPORT_UNICODE
645
#if PCRE2_CODE_UNIT_WIDTH == 8
646
647
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
648
  { \
649
  if (ptr[0] <= 0x7f) \
650
    c = *ptr++; \
651
  else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
652
    { \
653
    c = ptr[1] - 0x80; \
654
    \
655
    if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
656
      { \
657
      c |= (ptr[0] - 0xc0) << 6; \
658
      ptr += 2; \
659
      } \
660
    else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
661
      { \
662
      c = c << 6 | (ptr[2] - 0x80); \
663
      \
664
      if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
665
        { \
666
        c |= (ptr[0] - 0xe0) << 12; \
667
        ptr += 3; \
668
        \
669
        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
670
          { \
671
          invalid_action; \
672
          } \
673
        } \
674
      else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
675
        { \
676
        c = c << 6 | (ptr[3] - 0x80); \
677
        \
678
        if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
679
          { \
680
          c |= (ptr[0] - 0xf0) << 18; \
681
          ptr += 4; \
682
          \
683
          if (c >= 0x110000 || c < 0x10000) \
684
            { \
685
            invalid_action; \
686
            } \
687
          } \
688
        else \
689
          { \
690
          invalid_action; \
691
          } \
692
        } \
693
      else \
694
        { \
695
        invalid_action; \
696
        } \
697
      } \
698
    else \
699
      { \
700
      invalid_action; \
701
      } \
702
    } \
703
  else \
704
    { \
705
    invalid_action; \
706
    } \
707
  }
708
709
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
710
  { \
711
  c = ptr[-1]; \
712
  if (c <= 0x7f) \
713
    ptr--; \
714
  else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
715
    { \
716
    c -= 0x80; \
717
    \
718
    if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
719
      { \
720
      c |= (ptr[-2] - 0xc0) << 6; \
721
      ptr -= 2; \
722
      } \
723
    else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
724
      { \
725
      c = c << 6 | (ptr[-2] - 0x80); \
726
      \
727
      if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
728
        { \
729
        c |= (ptr[-3] - 0xe0) << 12; \
730
        ptr -= 3; \
731
        \
732
        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
733
          { \
734
          invalid_action; \
735
          } \
736
        } \
737
      else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
738
        { \
739
        c = c << 6 | (ptr[-3] - 0x80); \
740
        \
741
        if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
742
          { \
743
          c |= (ptr[-4] - 0xf0) << 18; \
744
          ptr -= 4; \
745
          \
746
          if (c >= 0x110000 || c < 0x10000) \
747
            { \
748
            invalid_action; \
749
            } \
750
          } \
751
        else \
752
          { \
753
          invalid_action; \
754
          } \
755
        } \
756
      else \
757
        { \
758
        invalid_action; \
759
        } \
760
      } \
761
    else \
762
      { \
763
      invalid_action; \
764
      } \
765
    } \
766
  else \
767
    { \
768
    invalid_action; \
769
    } \
770
  }
771
772
#elif PCRE2_CODE_UNIT_WIDTH == 16
773
774
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
775
  { \
776
  if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
777
    c = *ptr++; \
778
  else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
779
    { \
780
    c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
781
    ptr += 2; \
782
    } \
783
  else \
784
    { \
785
    invalid_action; \
786
    } \
787
  }
788
789
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
790
  { \
791
  c = ptr[-1]; \
792
  if (c < 0xd800 || c >= 0xe000) \
793
    ptr--; \
794
  else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
795
    { \
796
    c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
797
    ptr -= 2; \
798
    } \
799
  else \
800
    { \
801
    invalid_action; \
802
    } \
803
  }
804
805
806
#elif PCRE2_CODE_UNIT_WIDTH == 32
807
808
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
809
  { \
810
  if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
811
    c = *ptr++; \
812
  else \
813
    { \
814
    invalid_action; \
815
    } \
816
  }
817
818
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
819
  { \
820
  c = ptr[-1]; \
821
  if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
822
    ptr--; \
823
  else \
824
    { \
825
    invalid_action; \
826
    } \
827
  }
828
829
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
830
#endif /* SUPPORT_UNICODE */
831
832
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
833
{
834
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
835
do cc += GET(cc, 1); while (*cc == OP_ALT);
836
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
837
cc += 1 + LINK_SIZE;
838
return cc;
839
}
840
841
static int no_alternatives(PCRE2_SPTR cc)
842
{
843
int count = 0;
844
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
845
do
846
  {
847
  cc += GET(cc, 1);
848
  count++;
849
  }
850
while (*cc == OP_ALT);
851
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
852
return count;
853
}
854
855
/* Functions whose might need modification for all new supported opcodes:
856
 next_opcode
857
 check_opcode_types
858
 set_private_data_ptrs
859
 get_framesize
860
 init_frame
861
 get_recurse_data_length
862
 copy_recurse_data
863
 compile_matchingpath
864
 compile_backtrackingpath
865
*/
866
867
static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
868
{
869
SLJIT_UNUSED_ARG(common);
870
switch(*cc)
871
  {
872
  case OP_SOD:
873
  case OP_SOM:
874
  case OP_SET_SOM:
875
  case OP_NOT_WORD_BOUNDARY:
876
  case OP_WORD_BOUNDARY:
877
  case OP_NOT_DIGIT:
878
  case OP_DIGIT:
879
  case OP_NOT_WHITESPACE:
880
  case OP_WHITESPACE:
881
  case OP_NOT_WORDCHAR:
882
  case OP_WORDCHAR:
883
  case OP_ANY:
884
  case OP_ALLANY:
885
  case OP_NOTPROP:
886
  case OP_PROP:
887
  case OP_ANYNL:
888
  case OP_NOT_HSPACE:
889
  case OP_HSPACE:
890
  case OP_NOT_VSPACE:
891
  case OP_VSPACE:
892
  case OP_EXTUNI:
893
  case OP_EODN:
894
  case OP_EOD:
895
  case OP_CIRC:
896
  case OP_CIRCM:
897
  case OP_DOLL:
898
  case OP_DOLLM:
899
  case OP_CRSTAR:
900
  case OP_CRMINSTAR:
901
  case OP_CRPLUS:
902
  case OP_CRMINPLUS:
903
  case OP_CRQUERY:
904
  case OP_CRMINQUERY:
905
  case OP_CRRANGE:
906
  case OP_CRMINRANGE:
907
  case OP_CRPOSSTAR:
908
  case OP_CRPOSPLUS:
909
  case OP_CRPOSQUERY:
910
  case OP_CRPOSRANGE:
911
  case OP_CLASS:
912
  case OP_NCLASS:
913
  case OP_REF:
914
  case OP_REFI:
915
  case OP_DNREF:
916
  case OP_DNREFI:
917
  case OP_RECURSE:
918
  case OP_CALLOUT:
919
  case OP_ALT:
920
  case OP_KET:
921
  case OP_KETRMAX:
922
  case OP_KETRMIN:
923
  case OP_KETRPOS:
924
  case OP_REVERSE:
925
  case OP_ASSERT:
926
  case OP_ASSERT_NOT:
927
  case OP_ASSERTBACK:
928
  case OP_ASSERTBACK_NOT:
929
  case OP_ASSERT_NA:
930
  case OP_ASSERTBACK_NA:
931
  case OP_ONCE:
932
  case OP_SCRIPT_RUN:
933
  case OP_BRA:
934
  case OP_BRAPOS:
935
  case OP_CBRA:
936
  case OP_CBRAPOS:
937
  case OP_COND:
938
  case OP_SBRA:
939
  case OP_SBRAPOS:
940
  case OP_SCBRA:
941
  case OP_SCBRAPOS:
942
  case OP_SCOND:
943
  case OP_CREF:
944
  case OP_DNCREF:
945
  case OP_RREF:
946
  case OP_DNRREF:
947
  case OP_FALSE:
948
  case OP_TRUE:
949
  case OP_BRAZERO:
950
  case OP_BRAMINZERO:
951
  case OP_BRAPOSZERO:
952
  case OP_PRUNE:
953
  case OP_SKIP:
954
  case OP_THEN:
955
  case OP_COMMIT:
956
  case OP_FAIL:
957
  case OP_ACCEPT:
958
  case OP_ASSERT_ACCEPT:
959
  case OP_CLOSE:
960
  case OP_SKIPZERO:
961
  return cc + PRIV(OP_lengths)[*cc];
962
963
  case OP_CHAR:
964
  case OP_CHARI:
965
  case OP_NOT:
966
  case OP_NOTI:
967
  case OP_STAR:
968
  case OP_MINSTAR:
969
  case OP_PLUS:
970
  case OP_MINPLUS:
971
  case OP_QUERY:
972
  case OP_MINQUERY:
973
  case OP_UPTO:
974
  case OP_MINUPTO:
975
  case OP_EXACT:
976
  case OP_POSSTAR:
977
  case OP_POSPLUS:
978
  case OP_POSQUERY:
979
  case OP_POSUPTO:
980
  case OP_STARI:
981
  case OP_MINSTARI:
982
  case OP_PLUSI:
983
  case OP_MINPLUSI:
984
  case OP_QUERYI:
985
  case OP_MINQUERYI:
986
  case OP_UPTOI:
987
  case OP_MINUPTOI:
988
  case OP_EXACTI:
989
  case OP_POSSTARI:
990
  case OP_POSPLUSI:
991
  case OP_POSQUERYI:
992
  case OP_POSUPTOI:
993
  case OP_NOTSTAR:
994
  case OP_NOTMINSTAR:
995
  case OP_NOTPLUS:
996
  case OP_NOTMINPLUS:
997
  case OP_NOTQUERY:
998
  case OP_NOTMINQUERY:
999
  case OP_NOTUPTO:
1000
  case OP_NOTMINUPTO:
1001
  case OP_NOTEXACT:
1002
  case OP_NOTPOSSTAR:
1003
  case OP_NOTPOSPLUS:
1004
  case OP_NOTPOSQUERY:
1005
  case OP_NOTPOSUPTO:
1006
  case OP_NOTSTARI:
1007
  case OP_NOTMINSTARI:
1008
  case OP_NOTPLUSI:
1009
  case OP_NOTMINPLUSI:
1010
  case OP_NOTQUERYI:
1011
  case OP_NOTMINQUERYI:
1012
  case OP_NOTUPTOI:
1013
  case OP_NOTMINUPTOI:
1014
  case OP_NOTEXACTI:
1015
  case OP_NOTPOSSTARI:
1016
  case OP_NOTPOSPLUSI:
1017
  case OP_NOTPOSQUERYI:
1018
  case OP_NOTPOSUPTOI:
1019
  cc += PRIV(OP_lengths)[*cc];
1020
#ifdef SUPPORT_UNICODE
1021
  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1022
#endif
1023
  return cc;
1024
1025
  /* Special cases. */
1026
  case OP_TYPESTAR:
1027
  case OP_TYPEMINSTAR:
1028
  case OP_TYPEPLUS:
1029
  case OP_TYPEMINPLUS:
1030
  case OP_TYPEQUERY:
1031
  case OP_TYPEMINQUERY:
1032
  case OP_TYPEUPTO:
1033
  case OP_TYPEMINUPTO:
1034
  case OP_TYPEEXACT:
1035
  case OP_TYPEPOSSTAR:
1036
  case OP_TYPEPOSPLUS:
1037
  case OP_TYPEPOSQUERY:
1038
  case OP_TYPEPOSUPTO:
1039
  return cc + PRIV(OP_lengths)[*cc] - 1;
1040
1041
  case OP_ANYBYTE:
1042
#ifdef SUPPORT_UNICODE
1043
  if (common->utf) return NULL;
1044
#endif
1045
  return cc + 1;
1046
1047
  case OP_CALLOUT_STR:
1048
  return cc + GET(cc, 1 + 2*LINK_SIZE);
1049
1050
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1051
  case OP_XCLASS:
1052
  return cc + GET(cc, 1);
1053
#endif
1054
1055
  case OP_MARK:
1056
  case OP_COMMIT_ARG:
1057
  case OP_PRUNE_ARG:
1058
  case OP_SKIP_ARG:
1059
  case OP_THEN_ARG:
1060
  return cc + 1 + 2 + cc[1];
1061
1062
  default:
1063
  SLJIT_UNREACHABLE();
1064
  return NULL;
1065
  }
1066
}
1067
1068
static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1069
{
1070
int count;
1071
PCRE2_SPTR slot;
1072
PCRE2_SPTR assert_back_end = cc - 1;
1073
PCRE2_SPTR assert_na_end = cc - 1;
1074
1075
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1076
while (cc < ccend)
1077
  {
1078
  switch(*cc)
1079
    {
1080
    case OP_SET_SOM:
1081
    common->has_set_som = TRUE;
1082
    common->might_be_empty = TRUE;
1083
    cc += 1;
1084
    break;
1085
1086
    case OP_REFI:
1087
#ifdef SUPPORT_UNICODE
1088
    if (common->iref_ptr == 0)
1089
      {
1090
      common->iref_ptr = common->ovector_start;
1091
      common->ovector_start += 3 * sizeof(sljit_sw);
1092
      }
1093
#endif /* SUPPORT_UNICODE */
1094
    /* Fall through. */
1095
    case OP_REF:
1096
    common->optimized_cbracket[GET2(cc, 1)] = 0;
1097
    cc += 1 + IMM2_SIZE;
1098
    break;
1099
1100
    case OP_ASSERT_NA:
1101
    case OP_ASSERTBACK_NA:
1102
    slot = bracketend(cc);
1103
    if (slot > assert_na_end)
1104
      assert_na_end = slot;
1105
    cc += 1 + LINK_SIZE;
1106
    break;
1107
1108
    case OP_CBRAPOS:
1109
    case OP_SCBRAPOS:
1110
    common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1111
    cc += 1 + LINK_SIZE + IMM2_SIZE;
1112
    break;
1113
1114
    case OP_COND:
1115
    case OP_SCOND:
1116
    /* Only AUTO_CALLOUT can insert this opcode. We do
1117
       not intend to support this case. */
1118
    if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1119
      return FALSE;
1120
    cc += 1 + LINK_SIZE;
1121
    break;
1122
1123
    case OP_CREF:
1124
    common->optimized_cbracket[GET2(cc, 1)] = 0;
1125
    cc += 1 + IMM2_SIZE;
1126
    break;
1127
1128
    case OP_DNREF:
1129
    case OP_DNREFI:
1130
    case OP_DNCREF:
1131
    count = GET2(cc, 1 + IMM2_SIZE);
1132
    slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1133
    while (count-- > 0)
1134
      {
1135
      common->optimized_cbracket[GET2(slot, 0)] = 0;
1136
      slot += common->name_entry_size;
1137
      }
1138
    cc += 1 + 2 * IMM2_SIZE;
1139
    break;
1140
1141
    case OP_RECURSE:
1142
    /* Set its value only once. */
1143
    if (common->recursive_head_ptr == 0)
1144
      {
1145
      common->recursive_head_ptr = common->ovector_start;
1146
      common->ovector_start += sizeof(sljit_sw);
1147
      }
1148
    cc += 1 + LINK_SIZE;
1149
    break;
1150
1151
    case OP_CALLOUT:
1152
    case OP_CALLOUT_STR:
1153
    if (common->capture_last_ptr == 0)
1154
      {
1155
      common->capture_last_ptr = common->ovector_start;
1156
      common->ovector_start += sizeof(sljit_sw);
1157
      }
1158
    cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1159
    break;
1160
1161
    case OP_ASSERTBACK:
1162
    slot = bracketend(cc);
1163
    if (slot > assert_back_end)
1164
      assert_back_end = slot;
1165
    cc += 1 + LINK_SIZE;
1166
    break;
1167
1168
    case OP_THEN_ARG:
1169
    common->has_then = TRUE;
1170
    common->control_head_ptr = 1;
1171
    /* Fall through. */
1172
1173
    case OP_COMMIT_ARG:
1174
    case OP_PRUNE_ARG:
1175
    if (cc < assert_na_end)
1176
      return FALSE;
1177
    /* Fall through */
1178
    case OP_MARK:
1179
    if (common->mark_ptr == 0)
1180
      {
1181
      common->mark_ptr = common->ovector_start;
1182
      common->ovector_start += sizeof(sljit_sw);
1183
      }
1184
    cc += 1 + 2 + cc[1];
1185
    break;
1186
1187
    case OP_THEN:
1188
    common->has_then = TRUE;
1189
    common->control_head_ptr = 1;
1190
    cc += 1;
1191
    break;
1192
1193
    case OP_SKIP:
1194
    if (cc < assert_back_end)
1195
      common->has_skip_in_assert_back = TRUE;
1196
    if (cc < assert_na_end)
1197
      return FALSE;
1198
    cc += 1;
1199
    break;
1200
1201
    case OP_SKIP_ARG:
1202
    common->control_head_ptr = 1;
1203
    common->has_skip_arg = TRUE;
1204
    if (cc < assert_back_end)
1205
      common->has_skip_in_assert_back = TRUE;
1206
    if (cc < assert_na_end)
1207
      return FALSE;
1208
    cc += 1 + 2 + cc[1];
1209
    break;
1210
1211
    case OP_PRUNE:
1212
    case OP_COMMIT:
1213
    case OP_ASSERT_ACCEPT:
1214
    if (cc < assert_na_end)
1215
      return FALSE;
1216
    cc++;
1217
    break;
1218
1219
    default:
1220
    cc = next_opcode(common, cc);
1221
    if (cc == NULL)
1222
      return FALSE;
1223
    break;
1224
    }
1225
  }
1226
return TRUE;
1227
}
1228
1229
#define EARLY_FAIL_ENHANCE_MAX (1 + 3)
1230
1231
/*
1232
start:
1233
  0 - skip / early fail allowed
1234
  1 - only early fail with range allowed
1235
  >1 - (start - 1) early fail is processed
1236
1237
return: current number of iterators enhanced with fast fail
1238
*/
1239
static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start,
1240
   sljit_s32 depth, int start, BOOL fast_forward_allowed)
1241
{
1242
PCRE2_SPTR begin = cc;
1243
PCRE2_SPTR next_alt;
1244
PCRE2_SPTR end;
1245
PCRE2_SPTR accelerated_start;
1246
BOOL prev_fast_forward_allowed;
1247
int result = 0;
1248
int count;
1249
1250
SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1251
SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1252
SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1253
1254
next_alt = cc + GET(cc, 1);
1255
if (*next_alt == OP_ALT)
1256
  fast_forward_allowed = FALSE;
1257
1258
do
1259
  {
1260
  count = start;
1261
  cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1262
1263
  while (TRUE)
1264
    {
1265
    accelerated_start = NULL;
1266
1267
    switch(*cc)
1268
      {
1269
      case OP_SOD:
1270
      case OP_SOM:
1271
      case OP_SET_SOM:
1272
      case OP_NOT_WORD_BOUNDARY:
1273
      case OP_WORD_BOUNDARY:
1274
      case OP_EODN:
1275
      case OP_EOD:
1276
      case OP_CIRC:
1277
      case OP_CIRCM:
1278
      case OP_DOLL:
1279
      case OP_DOLLM:
1280
      /* Zero width assertions. */
1281
      cc++;
1282
      continue;
1283
1284
      case OP_NOT_DIGIT:
1285
      case OP_DIGIT:
1286
      case OP_NOT_WHITESPACE:
1287
      case OP_WHITESPACE:
1288
      case OP_NOT_WORDCHAR:
1289
      case OP_WORDCHAR:
1290
      case OP_ANY:
1291
      case OP_ALLANY:
1292
      case OP_ANYBYTE:
1293
      case OP_NOT_HSPACE:
1294
      case OP_HSPACE:
1295
      case OP_NOT_VSPACE:
1296
      case OP_VSPACE:
1297
      fast_forward_allowed = FALSE;
1298
      cc++;
1299
      continue;
1300
1301
      case OP_ANYNL:
1302
      case OP_EXTUNI:
1303
      fast_forward_allowed = FALSE;
1304
      if (count == 0)
1305
        count = 1;
1306
      cc++;
1307
      continue;
1308
1309
      case OP_NOTPROP:
1310
      case OP_PROP:
1311
      fast_forward_allowed = FALSE;
1312
      cc += 1 + 2;
1313
      continue;
1314
1315
      case OP_CHAR:
1316
      case OP_CHARI:
1317
      case OP_NOT:
1318
      case OP_NOTI:
1319
      fast_forward_allowed = FALSE;
1320
      cc += 2;
1321
#ifdef SUPPORT_UNICODE
1322
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1323
#endif
1324
      continue;
1325
1326
      case OP_TYPESTAR:
1327
      case OP_TYPEMINSTAR:
1328
      case OP_TYPEPLUS:
1329
      case OP_TYPEMINPLUS:
1330
      case OP_TYPEPOSSTAR:
1331
      case OP_TYPEPOSPLUS:
1332
      /* The type or prop opcode is skipped in the next iteration. */
1333
      cc += 1;
1334
1335
      if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1336
        {
1337
        accelerated_start = cc - 1;
1338
        break;
1339
        }
1340
1341
      if (count == 0)
1342
        count = 1;
1343
      fast_forward_allowed = FALSE;
1344
      continue;
1345
1346
      case OP_TYPEUPTO:
1347
      case OP_TYPEMINUPTO:
1348
      case OP_TYPEEXACT:
1349
      case OP_TYPEPOSUPTO:
1350
      cc += IMM2_SIZE;
1351
      /* Fall through */
1352
1353
      case OP_TYPEQUERY:
1354
      case OP_TYPEMINQUERY:
1355
      case OP_TYPEPOSQUERY:
1356
      /* The type or prop opcode is skipped in the next iteration. */
1357
      fast_forward_allowed = FALSE;
1358
      if (count == 0)
1359
        count = 1;
1360
      cc += 1;
1361
      continue;
1362
1363
      case OP_STAR:
1364
      case OP_MINSTAR:
1365
      case OP_PLUS:
1366
      case OP_MINPLUS:
1367
      case OP_POSSTAR:
1368
      case OP_POSPLUS:
1369
1370
      case OP_STARI:
1371
      case OP_MINSTARI:
1372
      case OP_PLUSI:
1373
      case OP_MINPLUSI:
1374
      case OP_POSSTARI:
1375
      case OP_POSPLUSI:
1376
1377
      case OP_NOTSTAR:
1378
      case OP_NOTMINSTAR:
1379
      case OP_NOTPLUS:
1380
      case OP_NOTMINPLUS:
1381
      case OP_NOTPOSSTAR:
1382
      case OP_NOTPOSPLUS:
1383
1384
      case OP_NOTSTARI:
1385
      case OP_NOTMINSTARI:
1386
      case OP_NOTPLUSI:
1387
      case OP_NOTMINPLUSI:
1388
      case OP_NOTPOSSTARI:
1389
      case OP_NOTPOSPLUSI:
1390
      accelerated_start = cc;
1391
      cc += 2;
1392
#ifdef SUPPORT_UNICODE
1393
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1394
#endif
1395
      break;
1396
1397
      case OP_UPTO:
1398
      case OP_MINUPTO:
1399
      case OP_EXACT:
1400
      case OP_POSUPTO:
1401
      case OP_UPTOI:
1402
      case OP_MINUPTOI:
1403
      case OP_EXACTI:
1404
      case OP_POSUPTOI:
1405
      case OP_NOTUPTO:
1406
      case OP_NOTMINUPTO:
1407
      case OP_NOTEXACT:
1408
      case OP_NOTPOSUPTO:
1409
      case OP_NOTUPTOI:
1410
      case OP_NOTMINUPTOI:
1411
      case OP_NOTEXACTI:
1412
      case OP_NOTPOSUPTOI:
1413
      cc += IMM2_SIZE;
1414
      /* Fall through */
1415
1416
      case OP_QUERY:
1417
      case OP_MINQUERY:
1418
      case OP_POSQUERY:
1419
      case OP_QUERYI:
1420
      case OP_MINQUERYI:
1421
      case OP_POSQUERYI:
1422
      case OP_NOTQUERY:
1423
      case OP_NOTMINQUERY:
1424
      case OP_NOTPOSQUERY:
1425
      case OP_NOTQUERYI:
1426
      case OP_NOTMINQUERYI:
1427
      case OP_NOTPOSQUERYI:
1428
      fast_forward_allowed = FALSE;
1429
      if (count == 0)
1430
        count = 1;
1431
      cc += 2;
1432
#ifdef SUPPORT_UNICODE
1433
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1434
#endif
1435
      continue;
1436
1437
      case OP_CLASS:
1438
      case OP_NCLASS:
1439
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1440
      case OP_XCLASS:
1441
      accelerated_start = cc;
1442
      cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1443
#else
1444
      accelerated_start = cc;
1445
      cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1446
#endif
1447
1448
      switch (*cc)
1449
        {
1450
        case OP_CRSTAR:
1451
        case OP_CRMINSTAR:
1452
        case OP_CRPLUS:
1453
        case OP_CRMINPLUS:
1454
        case OP_CRPOSSTAR:
1455
        case OP_CRPOSPLUS:
1456
        cc++;
1457
        break;
1458
1459
        case OP_CRRANGE:
1460
        case OP_CRMINRANGE:
1461
        case OP_CRPOSRANGE:
1462
        cc += 2 * IMM2_SIZE;
1463
        /* Fall through */
1464
        case OP_CRQUERY:
1465
        case OP_CRMINQUERY:
1466
        case OP_CRPOSQUERY:
1467
        cc++;
1468
        if (count == 0)
1469
          count = 1;
1470
        /* Fall through */
1471
        default:
1472
        accelerated_start = NULL;
1473
        fast_forward_allowed = FALSE;
1474
        continue;
1475
        }
1476
      break;
1477
1478
      case OP_ONCE:
1479
      case OP_BRA:
1480
      case OP_CBRA:
1481
      end = cc + GET(cc, 1);
1482
1483
      prev_fast_forward_allowed = fast_forward_allowed;
1484
      fast_forward_allowed = FALSE;
1485
      if (depth >= 4)
1486
        break;
1487
1488
      end = bracketend(cc) - (1 + LINK_SIZE);
1489
      if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1490
        break;
1491
1492
      count = detect_early_fail(common, cc, private_data_start, depth + 1, count, prev_fast_forward_allowed);
1493
1494
      if (PRIVATE_DATA(cc) != 0)
1495
        common->private_data_ptrs[begin - common->start] = 1;
1496
1497
      if (count < EARLY_FAIL_ENHANCE_MAX)
1498
        {
1499
        cc = end + (1 + LINK_SIZE);
1500
        continue;
1501
        }
1502
      break;
1503
1504
      case OP_KET:
1505
      SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1506
      if (cc >= next_alt)
1507
        break;
1508
      cc += 1 + LINK_SIZE;
1509
      continue;
1510
      }
1511
1512
    if (accelerated_start != NULL)
1513
      {
1514
      if (count == 0)
1515
        {
1516
        count++;
1517
1518
        if (fast_forward_allowed)
1519
          {
1520
          common->fast_forward_bc_ptr = accelerated_start;
1521
          common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1522
          *private_data_start += sizeof(sljit_sw);
1523
          }
1524
        else
1525
          {
1526
          common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1527
1528
          if (common->early_fail_start_ptr == 0)
1529
            common->early_fail_start_ptr = *private_data_start;
1530
1531
          *private_data_start += sizeof(sljit_sw);
1532
          common->early_fail_end_ptr = *private_data_start;
1533
1534
          if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1535
            return EARLY_FAIL_ENHANCE_MAX;
1536
          }
1537
        }
1538
      else
1539
        {
1540
        common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1541
1542
        if (common->early_fail_start_ptr == 0)
1543
          common->early_fail_start_ptr = *private_data_start;
1544
1545
        *private_data_start += 2 * sizeof(sljit_sw);
1546
        common->early_fail_end_ptr = *private_data_start;
1547
1548
        if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1549
          return EARLY_FAIL_ENHANCE_MAX;
1550
        }
1551
1552
      /* Cannot be part of a repeat. */
1553
      common->private_data_ptrs[begin - common->start] = 1;
1554
      count++;
1555
1556
      if (count < EARLY_FAIL_ENHANCE_MAX)
1557
        continue;
1558
      }
1559
1560
    break;
1561
    }
1562
1563
  if (*cc != OP_ALT && *cc != OP_KET)
1564
    result = EARLY_FAIL_ENHANCE_MAX;
1565
  else if (result < count)
1566
    result = count;
1567
1568
  cc = next_alt;
1569
  next_alt = cc + GET(cc, 1);
1570
  }
1571
while (*cc == OP_ALT);
1572
1573
return result;
1574
}
1575
1576
static int get_class_iterator_size(PCRE2_SPTR cc)
1577
{
1578
sljit_u32 min;
1579
sljit_u32 max;
1580
switch(*cc)
1581
  {
1582
  case OP_CRSTAR:
1583
  case OP_CRPLUS:
1584
  return 2;
1585
1586
  case OP_CRMINSTAR:
1587
  case OP_CRMINPLUS:
1588
  case OP_CRQUERY:
1589
  case OP_CRMINQUERY:
1590
  return 1;
1591
1592
  case OP_CRRANGE:
1593
  case OP_CRMINRANGE:
1594
  min = GET2(cc, 1);
1595
  max = GET2(cc, 1 + IMM2_SIZE);
1596
  if (max == 0)
1597
    return (*cc == OP_CRRANGE) ? 2 : 1;
1598
  max -= min;
1599
  if (max > 2)
1600
    max = 2;
1601
  return max;
1602
1603
  default:
1604
  return 0;
1605
  }
1606
}
1607
1608
static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1609
{
1610
PCRE2_SPTR end = bracketend(begin);
1611
PCRE2_SPTR next;
1612
PCRE2_SPTR next_end;
1613
PCRE2_SPTR max_end;
1614
PCRE2_UCHAR type;
1615
sljit_sw length = end - begin;
1616
sljit_s32 min, max, i;
1617
1618
/* Detect fixed iterations first. */
1619
if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1620
  return FALSE;
1621
1622
/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1623
 * Skip the check of the second part. */
1624
if (PRIVATE_DATA(end - LINK_SIZE) == 0)
1625
  return TRUE;
1626
1627
next = end;
1628
min = 1;
1629
while (1)
1630
  {
1631
  if (*next != *begin)
1632
    break;
1633
  next_end = bracketend(next);
1634
  if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1635
    break;
1636
  next = next_end;
1637
  min++;
1638
  }
1639
1640
if (min == 2)
1641
  return FALSE;
1642
1643
max = 0;
1644
max_end = next;
1645
if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1646
  {
1647
  type = *next;
1648
  while (1)
1649
    {
1650
    if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1651
      break;
1652
    next_end = bracketend(next + 2 + LINK_SIZE);
1653
    if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1654
      break;
1655
    next = next_end;
1656
    max++;
1657
    }
1658
1659
  if (next[0] == type && next[1] == *begin && max >= 1)
1660
    {
1661
    next_end = bracketend(next + 1);
1662
    if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1663
      {
1664
      for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1665
        if (*next_end != OP_KET)
1666
          break;
1667
1668
      if (i == max)
1669
        {
1670
        common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1671
        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1672
        /* +2 the original and the last. */
1673
        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1674
        if (min == 1)
1675
          return TRUE;
1676
        min--;
1677
        max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1678
        }
1679
      }
1680
    }
1681
  }
1682
1683
if (min >= 3)
1684
  {
1685
  common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1686
  common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1687
  common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1688
  return TRUE;
1689
  }
1690
1691
return FALSE;
1692
}
1693
1694
#define CASE_ITERATOR_PRIVATE_DATA_1 \
1695
    case OP_MINSTAR: \
1696
    case OP_MINPLUS: \
1697
    case OP_QUERY: \
1698
    case OP_MINQUERY: \
1699
    case OP_MINSTARI: \
1700
    case OP_MINPLUSI: \
1701
    case OP_QUERYI: \
1702
    case OP_MINQUERYI: \
1703
    case OP_NOTMINSTAR: \
1704
    case OP_NOTMINPLUS: \
1705
    case OP_NOTQUERY: \
1706
    case OP_NOTMINQUERY: \
1707
    case OP_NOTMINSTARI: \
1708
    case OP_NOTMINPLUSI: \
1709
    case OP_NOTQUERYI: \
1710
    case OP_NOTMINQUERYI:
1711
1712
#define CASE_ITERATOR_PRIVATE_DATA_2A \
1713
    case OP_STAR: \
1714
    case OP_PLUS: \
1715
    case OP_STARI: \
1716
    case OP_PLUSI: \
1717
    case OP_NOTSTAR: \
1718
    case OP_NOTPLUS: \
1719
    case OP_NOTSTARI: \
1720
    case OP_NOTPLUSI:
1721
1722
#define CASE_ITERATOR_PRIVATE_DATA_2B \
1723
    case OP_UPTO: \
1724
    case OP_MINUPTO: \
1725
    case OP_UPTOI: \
1726
    case OP_MINUPTOI: \
1727
    case OP_NOTUPTO: \
1728
    case OP_NOTMINUPTO: \
1729
    case OP_NOTUPTOI: \
1730
    case OP_NOTMINUPTOI:
1731
1732
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1733
    case OP_TYPEMINSTAR: \
1734
    case OP_TYPEMINPLUS: \
1735
    case OP_TYPEQUERY: \
1736
    case OP_TYPEMINQUERY:
1737
1738
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1739
    case OP_TYPESTAR: \
1740
    case OP_TYPEPLUS:
1741
1742
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1743
    case OP_TYPEUPTO: \
1744
    case OP_TYPEMINUPTO:
1745
1746
static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1747
{
1748
PCRE2_SPTR cc = common->start;
1749
PCRE2_SPTR alternative;
1750
PCRE2_SPTR end = NULL;
1751
int private_data_ptr = *private_data_start;
1752
int space, size, bracketlen;
1753
BOOL repeat_check = TRUE;
1754
1755
while (cc < ccend)
1756
  {
1757
  space = 0;
1758
  size = 0;
1759
  bracketlen = 0;
1760
  if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1761
    break;
1762
1763
  /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1764
  if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1765
    {
1766
    if (detect_repeat(common, cc))
1767
      {
1768
      /* These brackets are converted to repeats, so no global
1769
      based single character repeat is allowed. */
1770
      if (cc >= end)
1771
        end = bracketend(cc);
1772
      }
1773
    }
1774
  repeat_check = TRUE;
1775
1776
  switch(*cc)
1777
    {
1778
    case OP_KET:
1779
    if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1780
      {
1781
      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1782
      private_data_ptr += sizeof(sljit_sw);
1783
      cc += common->private_data_ptrs[cc + 1 - common->start];
1784
      }
1785
    cc += 1 + LINK_SIZE;
1786
    break;
1787
1788
    case OP_ASSERT:
1789
    case OP_ASSERT_NOT:
1790
    case OP_ASSERTBACK:
1791
    case OP_ASSERTBACK_NOT:
1792
    case OP_ASSERT_NA:
1793
    case OP_ASSERTBACK_NA:
1794
    case OP_ONCE:
1795
    case OP_SCRIPT_RUN:
1796
    case OP_BRAPOS:
1797
    case OP_SBRA:
1798
    case OP_SBRAPOS:
1799
    case OP_SCOND:
1800
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1801
    private_data_ptr += sizeof(sljit_sw);
1802
    bracketlen = 1 + LINK_SIZE;
1803
    break;
1804
1805
    case OP_CBRAPOS:
1806
    case OP_SCBRAPOS:
1807
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1808
    private_data_ptr += sizeof(sljit_sw);
1809
    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1810
    break;
1811
1812
    case OP_COND:
1813
    /* Might be a hidden SCOND. */
1814
    common->private_data_ptrs[cc - common->start] = 0;
1815
    alternative = cc + GET(cc, 1);
1816
    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1817
      {
1818
      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1819
      private_data_ptr += sizeof(sljit_sw);
1820
      }
1821
    bracketlen = 1 + LINK_SIZE;
1822
    break;
1823
1824
    case OP_BRA:
1825
    bracketlen = 1 + LINK_SIZE;
1826
    break;
1827
1828
    case OP_CBRA:
1829
    case OP_SCBRA:
1830
    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1831
    break;
1832
1833
    case OP_BRAZERO:
1834
    case OP_BRAMINZERO:
1835
    case OP_BRAPOSZERO:
1836
    size = 1;
1837
    repeat_check = FALSE;
1838
    break;
1839
1840
    CASE_ITERATOR_PRIVATE_DATA_1
1841
    size = -2;
1842
    space = 1;
1843
    break;
1844
1845
    CASE_ITERATOR_PRIVATE_DATA_2A
1846
    size = -2;
1847
    space = 2;
1848
    break;
1849
1850
    CASE_ITERATOR_PRIVATE_DATA_2B
1851
    size = -(2 + IMM2_SIZE);
1852
    space = 2;
1853
    break;
1854
1855
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1856
    size = 1;
1857
    space = 1;
1858
    break;
1859
1860
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1861
    size = 1;
1862
    if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1863
      space = 2;
1864
    break;
1865
1866
    case OP_TYPEUPTO:
1867
    size = 1 + IMM2_SIZE;
1868
    if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1869
      space = 2;
1870
    break;
1871
1872
    case OP_TYPEMINUPTO:
1873
    size = 1 + IMM2_SIZE;
1874
    space = 2;
1875
    break;
1876
1877
    case OP_CLASS:
1878
    case OP_NCLASS:
1879
    size = 1 + 32 / sizeof(PCRE2_UCHAR);
1880
    space = get_class_iterator_size(cc + size);
1881
    break;
1882
1883
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1884
    case OP_XCLASS:
1885
    size = GET(cc, 1);
1886
    space = get_class_iterator_size(cc + size);
1887
    break;
1888
#endif
1889
1890
    default:
1891
    cc = next_opcode(common, cc);
1892
    SLJIT_ASSERT(cc != NULL);
1893
    break;
1894
    }
1895
1896
  /* Character iterators, which are not inside a repeated bracket,
1897
     gets a private slot instead of allocating it on the stack. */
1898
  if (space > 0 && cc >= end)
1899
    {
1900
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1901
    private_data_ptr += sizeof(sljit_sw) * space;
1902
    }
1903
1904
  if (size != 0)
1905
    {
1906
    if (size < 0)
1907
      {
1908
      cc += -size;
1909
#ifdef SUPPORT_UNICODE
1910
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1911
#endif
1912
      }
1913
    else
1914
      cc += size;
1915
    }
1916
1917
  if (bracketlen > 0)
1918
    {
1919
    if (cc >= end)
1920
      {
1921
      end = bracketend(cc);
1922
      if (end[-1 - LINK_SIZE] == OP_KET)
1923
        end = NULL;
1924
      }
1925
    cc += bracketlen;
1926
    }
1927
  }
1928
*private_data_start = private_data_ptr;
1929
}
1930
1931
/* Returns with a frame_types (always < 0) if no need for frame. */
1932
static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1933
{
1934
int length = 0;
1935
int possessive = 0;
1936
BOOL stack_restore = FALSE;
1937
BOOL setsom_found = recursive;
1938
BOOL setmark_found = recursive;
1939
/* The last capture is a local variable even for recursions. */
1940
BOOL capture_last_found = FALSE;
1941
1942
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1943
SLJIT_ASSERT(common->control_head_ptr != 0);
1944
*needs_control_head = TRUE;
1945
#else
1946
*needs_control_head = FALSE;
1947
#endif
1948
1949
if (ccend == NULL)
1950
  {
1951
  ccend = bracketend(cc) - (1 + LINK_SIZE);
1952
  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1953
    {
1954
    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1955
    /* This is correct regardless of common->capture_last_ptr. */
1956
    capture_last_found = TRUE;
1957
    }
1958
  cc = next_opcode(common, cc);
1959
  }
1960
1961
SLJIT_ASSERT(cc != NULL);
1962
while (cc < ccend)
1963
  switch(*cc)
1964
    {
1965
    case OP_SET_SOM:
1966
    SLJIT_ASSERT(common->has_set_som);
1967
    stack_restore = TRUE;
1968
    if (!setsom_found)
1969
      {
1970
      length += 2;
1971
      setsom_found = TRUE;
1972
      }
1973
    cc += 1;
1974
    break;
1975
1976
    case OP_MARK:
1977
    case OP_COMMIT_ARG:
1978
    case OP_PRUNE_ARG:
1979
    case OP_THEN_ARG:
1980
    SLJIT_ASSERT(common->mark_ptr != 0);
1981
    stack_restore = TRUE;
1982
    if (!setmark_found)
1983
      {
1984
      length += 2;
1985
      setmark_found = TRUE;
1986
      }
1987
    if (common->control_head_ptr != 0)
1988
      *needs_control_head = TRUE;
1989
    cc += 1 + 2 + cc[1];
1990
    break;
1991
1992
    case OP_RECURSE:
1993
    stack_restore = TRUE;
1994
    if (common->has_set_som && !setsom_found)
1995
      {
1996
      length += 2;
1997
      setsom_found = TRUE;
1998
      }
1999
    if (common->mark_ptr != 0 && !setmark_found)
2000
      {
2001
      length += 2;
2002
      setmark_found = TRUE;
2003
      }
2004
    if (common->capture_last_ptr != 0 && !capture_last_found)
2005
      {
2006
      length += 2;
2007
      capture_last_found = TRUE;
2008
      }
2009
    cc += 1 + LINK_SIZE;
2010
    break;
2011
2012
    case OP_CBRA:
2013
    case OP_CBRAPOS:
2014
    case OP_SCBRA:
2015
    case OP_SCBRAPOS:
2016
    stack_restore = TRUE;
2017
    if (common->capture_last_ptr != 0 && !capture_last_found)
2018
      {
2019
      length += 2;
2020
      capture_last_found = TRUE;
2021
      }
2022
    length += 3;
2023
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2024
    break;
2025
2026
    case OP_THEN:
2027
    stack_restore = TRUE;
2028
    if (common->control_head_ptr != 0)
2029
      *needs_control_head = TRUE;
2030
    cc ++;
2031
    break;
2032
2033
    default:
2034
    stack_restore = TRUE;
2035
    /* Fall through. */
2036
2037
    case OP_NOT_WORD_BOUNDARY:
2038
    case OP_WORD_BOUNDARY:
2039
    case OP_NOT_DIGIT:
2040
    case OP_DIGIT:
2041
    case OP_NOT_WHITESPACE:
2042
    case OP_WHITESPACE:
2043
    case OP_NOT_WORDCHAR:
2044
    case OP_WORDCHAR:
2045
    case OP_ANY:
2046
    case OP_ALLANY:
2047
    case OP_ANYBYTE:
2048
    case OP_NOTPROP:
2049
    case OP_PROP:
2050
    case OP_ANYNL:
2051
    case OP_NOT_HSPACE:
2052
    case OP_HSPACE:
2053
    case OP_NOT_VSPACE:
2054
    case OP_VSPACE:
2055
    case OP_EXTUNI:
2056
    case OP_EODN:
2057
    case OP_EOD:
2058
    case OP_CIRC:
2059
    case OP_CIRCM:
2060
    case OP_DOLL:
2061
    case OP_DOLLM:
2062
    case OP_CHAR:
2063
    case OP_CHARI:
2064
    case OP_NOT:
2065
    case OP_NOTI:
2066
2067
    case OP_EXACT:
2068
    case OP_POSSTAR:
2069
    case OP_POSPLUS:
2070
    case OP_POSQUERY:
2071
    case OP_POSUPTO:
2072
2073
    case OP_EXACTI:
2074
    case OP_POSSTARI:
2075
    case OP_POSPLUSI:
2076
    case OP_POSQUERYI:
2077
    case OP_POSUPTOI:
2078
2079
    case OP_NOTEXACT:
2080
    case OP_NOTPOSSTAR:
2081
    case OP_NOTPOSPLUS:
2082
    case OP_NOTPOSQUERY:
2083
    case OP_NOTPOSUPTO:
2084
2085
    case OP_NOTEXACTI:
2086
    case OP_NOTPOSSTARI:
2087
    case OP_NOTPOSPLUSI:
2088
    case OP_NOTPOSQUERYI:
2089
    case OP_NOTPOSUPTOI:
2090
2091
    case OP_TYPEEXACT:
2092
    case OP_TYPEPOSSTAR:
2093
    case OP_TYPEPOSPLUS:
2094
    case OP_TYPEPOSQUERY:
2095
    case OP_TYPEPOSUPTO:
2096
2097
    case OP_CLASS:
2098
    case OP_NCLASS:
2099
    case OP_XCLASS:
2100
2101
    case OP_CALLOUT:
2102
    case OP_CALLOUT_STR:
2103
2104
    cc = next_opcode(common, cc);
2105
    SLJIT_ASSERT(cc != NULL);
2106
    break;
2107
    }
2108
2109
/* Possessive quantifiers can use a special case. */
2110
if (SLJIT_UNLIKELY(possessive == length))
2111
  return stack_restore ? no_frame : no_stack;
2112
2113
if (length > 0)
2114
  return length + 1;
2115
return stack_restore ? no_frame : no_stack;
2116
}
2117
2118
static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2119
{
2120
DEFINE_COMPILER;
2121
BOOL setsom_found = FALSE;
2122
BOOL setmark_found = FALSE;
2123
/* The last capture is a local variable even for recursions. */
2124
BOOL capture_last_found = FALSE;
2125
int offset;
2126
2127
/* >= 1 + shortest item size (2) */
2128
SLJIT_UNUSED_ARG(stacktop);
2129
SLJIT_ASSERT(stackpos >= stacktop + 2);
2130
2131
stackpos = STACK(stackpos);
2132
if (ccend == NULL)
2133
  {
2134
  ccend = bracketend(cc) - (1 + LINK_SIZE);
2135
  if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2136
    cc = next_opcode(common, cc);
2137
  }
2138
2139
SLJIT_ASSERT(cc != NULL);
2140
while (cc < ccend)
2141
  switch(*cc)
2142
    {
2143
    case OP_SET_SOM:
2144
    SLJIT_ASSERT(common->has_set_som);
2145
    if (!setsom_found)
2146
      {
2147
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2148
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2149
      stackpos -= (int)sizeof(sljit_sw);
2150
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2151
      stackpos -= (int)sizeof(sljit_sw);
2152
      setsom_found = TRUE;
2153
      }
2154
    cc += 1;
2155
    break;
2156
2157
    case OP_MARK:
2158
    case OP_COMMIT_ARG:
2159
    case OP_PRUNE_ARG:
2160
    case OP_THEN_ARG:
2161
    SLJIT_ASSERT(common->mark_ptr != 0);
2162
    if (!setmark_found)
2163
      {
2164
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2165
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2166
      stackpos -= (int)sizeof(sljit_sw);
2167
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2168
      stackpos -= (int)sizeof(sljit_sw);
2169
      setmark_found = TRUE;
2170
      }
2171
    cc += 1 + 2 + cc[1];
2172
    break;
2173
2174
    case OP_RECURSE:
2175
    if (common->has_set_som && !setsom_found)
2176
      {
2177
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2178
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2179
      stackpos -= (int)sizeof(sljit_sw);
2180
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2181
      stackpos -= (int)sizeof(sljit_sw);
2182
      setsom_found = TRUE;
2183
      }
2184
    if (common->mark_ptr != 0 && !setmark_found)
2185
      {
2186
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2187
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2188
      stackpos -= (int)sizeof(sljit_sw);
2189
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2190
      stackpos -= (int)sizeof(sljit_sw);
2191
      setmark_found = TRUE;
2192
      }
2193
    if (common->capture_last_ptr != 0 && !capture_last_found)
2194
      {
2195
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2196
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2197
      stackpos -= (int)sizeof(sljit_sw);
2198
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2199
      stackpos -= (int)sizeof(sljit_sw);
2200
      capture_last_found = TRUE;
2201
      }
2202
    cc += 1 + LINK_SIZE;
2203
    break;
2204
2205
    case OP_CBRA:
2206
    case OP_CBRAPOS:
2207
    case OP_SCBRA:
2208
    case OP_SCBRAPOS:
2209
    if (common->capture_last_ptr != 0 && !capture_last_found)
2210
      {
2211
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2212
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2213
      stackpos -= (int)sizeof(sljit_sw);
2214
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2215
      stackpos -= (int)sizeof(sljit_sw);
2216
      capture_last_found = TRUE;
2217
      }
2218
    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2219
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2220
    stackpos -= (int)sizeof(sljit_sw);
2221
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2222
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2223
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2224
    stackpos -= (int)sizeof(sljit_sw);
2225
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2226
    stackpos -= (int)sizeof(sljit_sw);
2227
2228
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2229
    break;
2230
2231
    default:
2232
    cc = next_opcode(common, cc);
2233
    SLJIT_ASSERT(cc != NULL);
2234
    break;
2235
    }
2236
2237
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2238
SLJIT_ASSERT(stackpos == STACK(stacktop));
2239
}
2240
2241
#define RECURSE_TMP_REG_COUNT 3
2242
2243
typedef struct delayed_mem_copy_status {
2244
  struct sljit_compiler *compiler;
2245
  int store_bases[RECURSE_TMP_REG_COUNT];
2246
  int store_offsets[RECURSE_TMP_REG_COUNT];
2247
  int tmp_regs[RECURSE_TMP_REG_COUNT];
2248
  int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2249
  int next_tmp_reg;
2250
} delayed_mem_copy_status;
2251
2252
static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2253
{
2254
int i;
2255
2256
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2257
  {
2258
  SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2259
  SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2260
2261
  status->store_bases[i] = -1;
2262
  }
2263
status->next_tmp_reg = 0;
2264
status->compiler = common->compiler;
2265
}
2266
2267
static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2268
  int store_base, sljit_sw store_offset)
2269
{
2270
struct sljit_compiler *compiler = status->compiler;
2271
int next_tmp_reg = status->next_tmp_reg;
2272
int tmp_reg = status->tmp_regs[next_tmp_reg];
2273
2274
SLJIT_ASSERT(load_base > 0 && store_base > 0);
2275
2276
if (status->store_bases[next_tmp_reg] == -1)
2277
  {
2278
  /* Preserve virtual registers. */
2279
  if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2280
    OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2281
  }
2282
else
2283
  OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2284
2285
OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2286
status->store_bases[next_tmp_reg] = store_base;
2287
status->store_offsets[next_tmp_reg] = store_offset;
2288
2289
status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2290
}
2291
2292
static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2293
{
2294
struct sljit_compiler *compiler = status->compiler;
2295
int next_tmp_reg = status->next_tmp_reg;
2296
int tmp_reg, saved_tmp_reg, i;
2297
2298
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2299
  {
2300
  if (status->store_bases[next_tmp_reg] != -1)
2301
    {
2302
    tmp_reg = status->tmp_regs[next_tmp_reg];
2303
    saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2304
2305
    OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2306
2307
    /* Restore virtual registers. */
2308
    if (sljit_get_register_index(saved_tmp_reg) < 0)
2309
      OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2310
    }
2311
2312
  next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2313
  }
2314
}
2315
2316
#undef RECURSE_TMP_REG_COUNT
2317
2318
static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2319
  BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
2320
{
2321
int length = 1;
2322
int size;
2323
PCRE2_SPTR alternative;
2324
BOOL quit_found = FALSE;
2325
BOOL accept_found = FALSE;
2326
BOOL setsom_found = FALSE;
2327
BOOL setmark_found = FALSE;
2328
BOOL capture_last_found = FALSE;
2329
BOOL control_head_found = FALSE;
2330
2331
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2332
SLJIT_ASSERT(common->control_head_ptr != 0);
2333
control_head_found = TRUE;
2334
#endif
2335
2336
/* Calculate the sum of the private machine words. */
2337
while (cc < ccend)
2338
  {
2339
  size = 0;
2340
  switch(*cc)
2341
    {
2342
    case OP_SET_SOM:
2343
    SLJIT_ASSERT(common->has_set_som);
2344
    setsom_found = TRUE;
2345
    cc += 1;
2346
    break;
2347
2348
    case OP_RECURSE:
2349
    if (common->has_set_som)
2350
      setsom_found = TRUE;
2351
    if (common->mark_ptr != 0)
2352
      setmark_found = TRUE;
2353
    if (common->capture_last_ptr != 0)
2354
      capture_last_found = TRUE;
2355
    cc += 1 + LINK_SIZE;
2356
    break;
2357
2358
    case OP_KET:
2359
    if (PRIVATE_DATA(cc) != 0)
2360
      {
2361
      length++;
2362
      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2363
      cc += PRIVATE_DATA(cc + 1);
2364
      }
2365
    cc += 1 + LINK_SIZE;
2366
    break;
2367
2368
    case OP_ASSERT:
2369
    case OP_ASSERT_NOT:
2370
    case OP_ASSERTBACK:
2371
    case OP_ASSERTBACK_NOT:
2372
    case OP_ASSERT_NA:
2373
    case OP_ASSERTBACK_NA:
2374
    case OP_ONCE:
2375
    case OP_SCRIPT_RUN:
2376
    case OP_BRAPOS:
2377
    case OP_SBRA:
2378
    case OP_SBRAPOS:
2379
    case OP_SCOND:
2380
    length++;
2381
    SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2382
    cc += 1 + LINK_SIZE;
2383
    break;
2384
2385
    case OP_CBRA:
2386
    case OP_SCBRA:
2387
    length += 2;
2388
    if (common->capture_last_ptr != 0)
2389
      capture_last_found = TRUE;
2390
    if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2391
      length++;
2392
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2393
    break;
2394
2395
    case OP_CBRAPOS:
2396
    case OP_SCBRAPOS:
2397
    length += 2 + 2;
2398
    if (common->capture_last_ptr != 0)
2399
      capture_last_found = TRUE;
2400
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2401
    break;
2402
2403
    case OP_COND:
2404
    /* Might be a hidden SCOND. */
2405
    alternative = cc + GET(cc, 1);
2406
    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2407
      length++;
2408
    cc += 1 + LINK_SIZE;
2409
    break;
2410
2411
    CASE_ITERATOR_PRIVATE_DATA_1
2412
    if (PRIVATE_DATA(cc) != 0)
2413
      length++;
2414
    cc += 2;
2415
#ifdef SUPPORT_UNICODE
2416
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2417
#endif
2418
    break;
2419
2420
    CASE_ITERATOR_PRIVATE_DATA_2A
2421
    if (PRIVATE_DATA(cc) != 0)
2422
      length += 2;
2423
    cc += 2;
2424
#ifdef SUPPORT_UNICODE
2425
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2426
#endif
2427
    break;
2428
2429
    CASE_ITERATOR_PRIVATE_DATA_2B
2430
    if (PRIVATE_DATA(cc) != 0)
2431
      length += 2;
2432
    cc += 2 + IMM2_SIZE;
2433
#ifdef SUPPORT_UNICODE
2434
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2435
#endif
2436
    break;
2437
2438
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2439
    if (PRIVATE_DATA(cc) != 0)
2440
      length++;
2441
    cc += 1;
2442
    break;
2443
2444
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2445
    if (PRIVATE_DATA(cc) != 0)
2446
      length += 2;
2447
    cc += 1;
2448
    break;
2449
2450
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2451
    if (PRIVATE_DATA(cc) != 0)
2452
      length += 2;
2453
    cc += 1 + IMM2_SIZE;
2454
    break;
2455
2456
    case OP_CLASS:
2457
    case OP_NCLASS:
2458
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2459
    case OP_XCLASS:
2460
    size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2461
#else
2462
    size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2463
#endif
2464
    if (PRIVATE_DATA(cc) != 0)
2465
      length += get_class_iterator_size(cc + size);
2466
    cc += size;
2467
    break;
2468
2469
    case OP_MARK:
2470
    case OP_COMMIT_ARG:
2471
    case OP_PRUNE_ARG:
2472
    case OP_THEN_ARG:
2473
    SLJIT_ASSERT(common->mark_ptr != 0);
2474
    if (!setmark_found)
2475
      setmark_found = TRUE;
2476
    if (common->control_head_ptr != 0)
2477
      control_head_found = TRUE;
2478
    if (*cc != OP_MARK)
2479
      quit_found = TRUE;
2480
2481
    cc += 1 + 2 + cc[1];
2482
    break;
2483
2484
    case OP_PRUNE:
2485
    case OP_SKIP:
2486
    case OP_COMMIT:
2487
    quit_found = TRUE;
2488
    cc++;
2489
    break;
2490
2491
    case OP_SKIP_ARG:
2492
    quit_found = TRUE;
2493
    cc += 1 + 2 + cc[1];
2494
    break;
2495
2496
    case OP_THEN:
2497
    SLJIT_ASSERT(common->control_head_ptr != 0);
2498
    quit_found = TRUE;
2499
    if (!control_head_found)
2500
      control_head_found = TRUE;
2501
    cc++;
2502
    break;
2503
2504
    case OP_ACCEPT:
2505
    case OP_ASSERT_ACCEPT:
2506
    accept_found = TRUE;
2507
    cc++;
2508
    break;
2509
2510
    default:
2511
    cc = next_opcode(common, cc);
2512
    SLJIT_ASSERT(cc != NULL);
2513
    break;
2514
    }
2515
  }
2516
SLJIT_ASSERT(cc == ccend);
2517
2518
if (control_head_found)
2519
  length++;
2520
if (capture_last_found)
2521
  length++;
2522
if (quit_found)
2523
  {
2524
  if (setsom_found)
2525
    length++;
2526
  if (setmark_found)
2527
    length++;
2528
  }
2529
2530
*needs_control_head = control_head_found;
2531
*has_quit = quit_found;
2532
*has_accept = accept_found;
2533
return length;
2534
}
2535
2536
enum copy_recurse_data_types {
2537
  recurse_copy_from_global,
2538
  recurse_copy_private_to_global,
2539
  recurse_copy_shared_to_global,
2540
  recurse_copy_kept_shared_to_global,
2541
  recurse_swap_global
2542
};
2543
2544
static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2545
  int type, int stackptr, int stacktop, BOOL has_quit)
2546
{
2547
delayed_mem_copy_status status;
2548
PCRE2_SPTR alternative;
2549
sljit_sw private_srcw[2];
2550
sljit_sw shared_srcw[3];
2551
sljit_sw kept_shared_srcw[2];
2552
int private_count, shared_count, kept_shared_count;
2553
int from_sp, base_reg, offset, i;
2554
BOOL setsom_found = FALSE;
2555
BOOL setmark_found = FALSE;
2556
BOOL capture_last_found = FALSE;
2557
BOOL control_head_found = FALSE;
2558
2559
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2560
SLJIT_ASSERT(common->control_head_ptr != 0);
2561
control_head_found = TRUE;
2562
#endif
2563
2564
switch (type)
2565
  {
2566
  case recurse_copy_from_global:
2567
  from_sp = TRUE;
2568
  base_reg = STACK_TOP;
2569
  break;
2570
2571
  case recurse_copy_private_to_global:
2572
  case recurse_copy_shared_to_global:
2573
  case recurse_copy_kept_shared_to_global:
2574
  from_sp = FALSE;
2575
  base_reg = STACK_TOP;
2576
  break;
2577
2578
  default:
2579
  SLJIT_ASSERT(type == recurse_swap_global);
2580
  from_sp = FALSE;
2581
  base_reg = TMP2;
2582
  break;
2583
  }
2584
2585
stackptr = STACK(stackptr);
2586
stacktop = STACK(stacktop);
2587
2588
status.tmp_regs[0] = TMP1;
2589
status.saved_tmp_regs[0] = TMP1;
2590
2591
if (base_reg != TMP2)
2592
  {
2593
  status.tmp_regs[1] = TMP2;
2594
  status.saved_tmp_regs[1] = TMP2;
2595
  }
2596
else
2597
  {
2598
  status.saved_tmp_regs[1] = RETURN_ADDR;
2599
  if (HAS_VIRTUAL_REGISTERS)
2600
    status.tmp_regs[1] = STR_PTR;
2601
  else
2602
    status.tmp_regs[1] = RETURN_ADDR;
2603
  }
2604
2605
status.saved_tmp_regs[2] = TMP3;
2606
if (HAS_VIRTUAL_REGISTERS)
2607
  status.tmp_regs[2] = STR_END;
2608
else
2609
  status.tmp_regs[2] = TMP3;
2610
2611
delayed_mem_copy_init(&status, common);
2612
2613
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2614
  {
2615
  SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2616
2617
  if (!from_sp)
2618
    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2619
2620
  if (from_sp || type == recurse_swap_global)
2621
    delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2622
  }
2623
2624
stackptr += sizeof(sljit_sw);
2625
2626
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2627
if (type != recurse_copy_shared_to_global)
2628
  {
2629
  if (!from_sp)
2630
    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2631
2632
  if (from_sp || type == recurse_swap_global)
2633
    delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2634
  }
2635
2636
stackptr += sizeof(sljit_sw);
2637
#endif
2638
2639
while (cc < ccend)
2640
  {
2641
  private_count = 0;
2642
  shared_count = 0;
2643
  kept_shared_count = 0;
2644
2645
  switch(*cc)
2646
    {
2647
    case OP_SET_SOM:
2648
    SLJIT_ASSERT(common->has_set_som);
2649
    if (has_quit && !setsom_found)
2650
      {
2651
      kept_shared_srcw[0] = OVECTOR(0);
2652
      kept_shared_count = 1;
2653
      setsom_found = TRUE;
2654
      }
2655
    cc += 1;
2656
    break;
2657
2658
    case OP_RECURSE:
2659
    if (has_quit)
2660
      {
2661
      if (common->has_set_som && !setsom_found)
2662
        {
2663
        kept_shared_srcw[0] = OVECTOR(0);
2664
        kept_shared_count = 1;
2665
        setsom_found = TRUE;
2666
        }
2667
      if (common->mark_ptr != 0 && !setmark_found)
2668
        {
2669
        kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2670
        kept_shared_count++;
2671
        setmark_found = TRUE;
2672
        }
2673
      }
2674
    if (common->capture_last_ptr != 0 && !capture_last_found)
2675
      {
2676
      shared_srcw[0] = common->capture_last_ptr;
2677
      shared_count = 1;
2678
      capture_last_found = TRUE;
2679
      }
2680
    cc += 1 + LINK_SIZE;
2681
    break;
2682
2683
    case OP_KET:
2684
    if (PRIVATE_DATA(cc) != 0)
2685
      {
2686
      private_count = 1;
2687
      private_srcw[0] = PRIVATE_DATA(cc);
2688
      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2689
      cc += PRIVATE_DATA(cc + 1);
2690
      }
2691
    cc += 1 + LINK_SIZE;
2692
    break;
2693
2694
    case OP_ASSERT:
2695
    case OP_ASSERT_NOT:
2696
    case OP_ASSERTBACK:
2697
    case OP_ASSERTBACK_NOT:
2698
    case OP_ASSERT_NA:
2699
    case OP_ASSERTBACK_NA:
2700
    case OP_ONCE:
2701
    case OP_SCRIPT_RUN:
2702
    case OP_BRAPOS:
2703
    case OP_SBRA:
2704
    case OP_SBRAPOS:
2705
    case OP_SCOND:
2706
    private_count = 1;
2707
    private_srcw[0] = PRIVATE_DATA(cc);
2708
    cc += 1 + LINK_SIZE;
2709
    break;
2710
2711
    case OP_CBRA:
2712
    case OP_SCBRA:
2713
    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2714
    shared_srcw[0] = OVECTOR(offset);
2715
    shared_srcw[1] = OVECTOR(offset + 1);
2716
    shared_count = 2;
2717
2718
    if (common->capture_last_ptr != 0 && !capture_last_found)
2719
      {
2720
      shared_srcw[2] = common->capture_last_ptr;
2721
      shared_count = 3;
2722
      capture_last_found = TRUE;
2723
      }
2724
2725
    if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2726
      {
2727
      private_count = 1;
2728
      private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2729
      }
2730
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2731
    break;
2732
2733
    case OP_CBRAPOS:
2734
    case OP_SCBRAPOS:
2735
    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2736
    shared_srcw[0] = OVECTOR(offset);
2737
    shared_srcw[1] = OVECTOR(offset + 1);
2738
    shared_count = 2;
2739
2740
    if (common->capture_last_ptr != 0 && !capture_last_found)
2741
      {
2742
      shared_srcw[2] = common->capture_last_ptr;
2743
      shared_count = 3;
2744
      capture_last_found = TRUE;
2745
      }
2746
2747
    private_count = 2;
2748
    private_srcw[0] = PRIVATE_DATA(cc);
2749
    private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2750
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2751
    break;
2752
2753
    case OP_COND:
2754
    /* Might be a hidden SCOND. */
2755
    alternative = cc + GET(cc, 1);
2756
    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2757
      {
2758
      private_count = 1;
2759
      private_srcw[0] = PRIVATE_DATA(cc);
2760
      }
2761
    cc += 1 + LINK_SIZE;
2762
    break;
2763
2764
    CASE_ITERATOR_PRIVATE_DATA_1
2765
    if (PRIVATE_DATA(cc))
2766
      {
2767
      private_count = 1;
2768
      private_srcw[0] = PRIVATE_DATA(cc);
2769
      }
2770
    cc += 2;
2771
#ifdef SUPPORT_UNICODE
2772
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2773
#endif
2774
    break;
2775
2776
    CASE_ITERATOR_PRIVATE_DATA_2A
2777
    if (PRIVATE_DATA(cc))
2778
      {
2779
      private_count = 2;
2780
      private_srcw[0] = PRIVATE_DATA(cc);
2781
      private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2782
      }
2783
    cc += 2;
2784
#ifdef SUPPORT_UNICODE
2785
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2786
#endif
2787
    break;
2788
2789
    CASE_ITERATOR_PRIVATE_DATA_2B
2790
    if (PRIVATE_DATA(cc))
2791
      {
2792
      private_count = 2;
2793
      private_srcw[0] = PRIVATE_DATA(cc);
2794
      private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2795
      }
2796
    cc += 2 + IMM2_SIZE;
2797
#ifdef SUPPORT_UNICODE
2798
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2799
#endif
2800
    break;
2801
2802
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2803
    if (PRIVATE_DATA(cc))
2804
      {
2805
      private_count = 1;
2806
      private_srcw[0] = PRIVATE_DATA(cc);
2807
      }
2808
    cc += 1;
2809
    break;
2810
2811
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2812
    if (PRIVATE_DATA(cc))
2813
      {
2814
      private_count = 2;
2815
      private_srcw[0] = PRIVATE_DATA(cc);
2816
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2817
      }
2818
    cc += 1;
2819
    break;
2820
2821
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2822
    if (PRIVATE_DATA(cc))
2823
      {
2824
      private_count = 2;
2825
      private_srcw[0] = PRIVATE_DATA(cc);
2826
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2827
      }
2828
    cc += 1 + IMM2_SIZE;
2829
    break;
2830
2831
    case OP_CLASS:
2832
    case OP_NCLASS:
2833
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2834
    case OP_XCLASS:
2835
    i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2836
#else
2837
    i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2838
#endif
2839
    if (PRIVATE_DATA(cc) != 0)
2840
      switch(get_class_iterator_size(cc + i))
2841
        {
2842
        case 1:
2843
        private_count = 1;
2844
        private_srcw[0] = PRIVATE_DATA(cc);
2845
        break;
2846
2847
        case 2:
2848
        private_count = 2;
2849
        private_srcw[0] = PRIVATE_DATA(cc);
2850
        private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2851
        break;
2852
2853
        default:
2854
        SLJIT_UNREACHABLE();
2855
        break;
2856
        }
2857
    cc += i;
2858
    break;
2859
2860
    case OP_MARK:
2861
    case OP_COMMIT_ARG:
2862
    case OP_PRUNE_ARG:
2863
    case OP_THEN_ARG:
2864
    SLJIT_ASSERT(common->mark_ptr != 0);
2865
    if (has_quit && !setmark_found)
2866
      {
2867
      kept_shared_srcw[0] = common->mark_ptr;
2868
      kept_shared_count = 1;
2869
      setmark_found = TRUE;
2870
      }
2871
    if (common->control_head_ptr != 0 && !control_head_found)
2872
      {
2873
      private_srcw[0] = common->control_head_ptr;
2874
      private_count = 1;
2875
      control_head_found = TRUE;
2876
      }
2877
    cc += 1 + 2 + cc[1];
2878
    break;
2879
2880
    case OP_THEN:
2881
    SLJIT_ASSERT(common->control_head_ptr != 0);
2882
    if (!control_head_found)
2883
      {
2884
      private_srcw[0] = common->control_head_ptr;
2885
      private_count = 1;
2886
      control_head_found = TRUE;
2887
      }
2888
    cc++;
2889
    break;
2890
2891
    default:
2892
    cc = next_opcode(common, cc);
2893
    SLJIT_ASSERT(cc != NULL);
2894
    break;
2895
    }
2896
2897
  if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2898
    {
2899
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2900
2901
    for (i = 0; i < private_count; i++)
2902
      {
2903
      SLJIT_ASSERT(private_srcw[i] != 0);
2904
2905
      if (!from_sp)
2906
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2907
2908
      if (from_sp || type == recurse_swap_global)
2909
        delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2910
2911
      stackptr += sizeof(sljit_sw);
2912
      }
2913
    }
2914
  else
2915
    stackptr += sizeof(sljit_sw) * private_count;
2916
2917
  if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2918
    {
2919
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2920
2921
    for (i = 0; i < shared_count; i++)
2922
      {
2923
      SLJIT_ASSERT(shared_srcw[i] != 0);
2924
2925
      if (!from_sp)
2926
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
2927
2928
      if (from_sp || type == recurse_swap_global)
2929
        delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
2930
2931
      stackptr += sizeof(sljit_sw);
2932
      }
2933
    }
2934
  else
2935
    stackptr += sizeof(sljit_sw) * shared_count;
2936
2937
  if (type != recurse_copy_private_to_global && type != recurse_swap_global)
2938
    {
2939
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
2940
2941
    for (i = 0; i < kept_shared_count; i++)
2942
      {
2943
      SLJIT_ASSERT(kept_shared_srcw[i] != 0);
2944
2945
      if (!from_sp)
2946
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
2947
2948
      if (from_sp || type == recurse_swap_global)
2949
        delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
2950
2951
      stackptr += sizeof(sljit_sw);
2952
      }
2953
    }
2954
  else
2955
    stackptr += sizeof(sljit_sw) * kept_shared_count;
2956
  }
2957
2958
SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
2959
2960
delayed_mem_copy_finish(&status);
2961
}
2962
2963
static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
2964
{
2965
PCRE2_SPTR end = bracketend(cc);
2966
BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2967
2968
/* Assert captures then. */
2969
if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
2970
  current_offset = NULL;
2971
/* Conditional block does not. */
2972
if (*cc == OP_COND || *cc == OP_SCOND)
2973
  has_alternatives = FALSE;
2974
2975
cc = next_opcode(common, cc);
2976
if (has_alternatives)
2977
  current_offset = common->then_offsets + (cc - common->start);
2978
2979
while (cc < end)
2980
  {
2981
  if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2982
    cc = set_then_offsets(common, cc, current_offset);
2983
  else
2984
    {
2985
    if (*cc == OP_ALT && has_alternatives)
2986
      current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2987
    if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2988
      *current_offset = 1;
2989
    cc = next_opcode(common, cc);
2990
    }
2991
  }
2992
2993
return end;
2994
}
2995
2996
#undef CASE_ITERATOR_PRIVATE_DATA_1
2997
#undef CASE_ITERATOR_PRIVATE_DATA_2A
2998
#undef CASE_ITERATOR_PRIVATE_DATA_2B
2999
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3000
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3001
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3002
3003
static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3004
{
3005
return (value & (value - 1)) == 0;
3006
}
3007
3008
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3009
{
3010
while (list)
3011
  {
3012
  /* sljit_set_label is clever enough to do nothing
3013
  if either the jump or the label is NULL. */
3014
  SET_LABEL(list->jump, label);
3015
  list = list->next;
3016
  }
3017
}
3018
3019
static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3020
{
3021
jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3022
if (list_item)
3023
  {
3024
  list_item->next = *list;
3025
  list_item->jump = jump;
3026
  *list = list_item;
3027
  }
3028
}
3029
3030
static void add_stub(compiler_common *common, struct sljit_jump *start)
3031
{
3032
DEFINE_COMPILER;
3033
stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3034
3035
if (list_item)
3036
  {
3037
  list_item->start = start;
3038
  list_item->quit = LABEL();
3039
  list_item->next = common->stubs;
3040
  common->stubs = list_item;
3041
  }
3042
}
3043
3044
static void flush_stubs(compiler_common *common)
3045
{
3046
DEFINE_COMPILER;
3047
stub_list *list_item = common->stubs;
3048
3049
while (list_item)
3050
  {
3051
  JUMPHERE(list_item->start);
3052
  add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3053
  JUMPTO(SLJIT_JUMP, list_item->quit);
3054
  list_item = list_item->next;
3055
  }
3056
common->stubs = NULL;
3057
}
3058
3059
static SLJIT_INLINE void count_match(compiler_common *common)
3060
{
3061
DEFINE_COMPILER;
3062
3063
OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3064
add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3065
}
3066
3067
static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3068
{
3069
/* May destroy all locals and registers except TMP2. */
3070
DEFINE_COMPILER;
3071
3072
SLJIT_ASSERT(size > 0);
3073
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3074
#ifdef DESTROY_REGISTERS
3075
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3076
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3077
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3078
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3079
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3080
#endif
3081
add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3082
}
3083
3084
static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3085
{
3086
DEFINE_COMPILER;
3087
3088
SLJIT_ASSERT(size > 0);
3089
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3090
}
3091
3092
static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3093
{
3094
DEFINE_COMPILER;
3095
sljit_uw *result;
3096
3097
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3098
  return NULL;
3099
3100
result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3101
if (SLJIT_UNLIKELY(result == NULL))
3102
  {
3103
  sljit_set_compiler_memory_error(compiler);
3104
  return NULL;
3105
  }
3106
3107
*(void**)result = common->read_only_data_head;
3108
common->read_only_data_head = (void *)result;
3109
return result + 1;
3110
}
3111
3112
static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3113
{
3114
DEFINE_COMPILER;
3115
struct sljit_label *loop;
3116
sljit_s32 i;
3117
3118
/* At this point we can freely use all temporary registers. */
3119
SLJIT_ASSERT(length > 1);
3120
/* TMP1 returns with begin - 1. */
3121
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3122
if (length < 8)
3123
  {
3124
  for (i = 1; i < length; i++)
3125
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3126
  }
3127
else
3128
  {
3129
  if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3130
    {
3131
    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3132
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3133
    loop = LABEL();
3134
    sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3135
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3136
    JUMPTO(SLJIT_NOT_ZERO, loop);
3137
    }
3138
  else
3139
    {
3140
    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3141
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3142
    loop = LABEL();
3143
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3144
    OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3145
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3146
    JUMPTO(SLJIT_NOT_ZERO, loop);
3147
    }
3148
  }
3149
}
3150
3151
static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3152
{
3153
DEFINE_COMPILER;
3154
sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3155
sljit_u32 uncleared_size;
3156
sljit_s32 src = SLJIT_IMM;
3157
sljit_s32 i;
3158
struct sljit_label *loop;
3159
3160
SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3161
3162
if (size == sizeof(sljit_sw))
3163
  {
3164
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3165
  return;
3166
  }
3167
3168
if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3169
  {
3170
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3171
  src = TMP3;
3172
  }
3173
3174
if (size <= 6 * sizeof(sljit_sw))
3175
  {
3176
  for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3177
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3178
  return;
3179
  }
3180
3181
GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3182
3183
uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3184
3185
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3186
3187
loop = LABEL();
3188
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3189
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3190
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
3191
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
3192
CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3193
3194
if (uncleared_size >= sizeof(sljit_sw))
3195
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3196
3197
if (uncleared_size >= 2 * sizeof(sljit_sw))
3198
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3199
}
3200
3201
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3202
{
3203
DEFINE_COMPILER;
3204
struct sljit_label *loop;
3205
int i;
3206
3207
SLJIT_ASSERT(length > 1);
3208
/* OVECTOR(1) contains the "string begin - 1" constant. */
3209
if (length > 2)
3210
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3211
if (length < 8)
3212
  {
3213
  for (i = 2; i < length; i++)
3214
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3215
  }
3216
else
3217
  {
3218
  if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3219
    {
3220
    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3221
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3222
    loop = LABEL();
3223
    sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3224
    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3225
    JUMPTO(SLJIT_NOT_ZERO, loop);
3226
    }
3227
  else
3228
    {
3229
    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3230
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3231
    loop = LABEL();
3232
    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3233
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3234
    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3235
    JUMPTO(SLJIT_NOT_ZERO, loop);
3236
    }
3237
  }
3238
3239
if (!HAS_VIRTUAL_REGISTERS)
3240
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3241
else
3242
  OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3243
3244
if (common->mark_ptr != 0)
3245
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3246
if (common->control_head_ptr != 0)
3247
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3248
if (HAS_VIRTUAL_REGISTERS)
3249
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3250
3251
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3252
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3253
}
3254
3255
static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3256
{
3257
while (current != NULL)
3258
  {
3259
  switch (current[1])
3260
    {
3261
    case type_then_trap:
3262
    break;
3263
3264
    case type_mark:
3265
    if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3266
      return current[3];
3267
    break;
3268
3269
    default:
3270
    SLJIT_UNREACHABLE();
3271
    break;
3272
    }
3273
  SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3274
  current = (sljit_sw*)current[0];
3275
  }
3276
return 0;
3277
}
3278
3279
static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3280
{
3281
DEFINE_COMPILER;
3282
struct sljit_label *loop;
3283
BOOL has_pre;
3284
3285
/* At this point we can freely use all registers. */
3286
OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3287
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3288
3289
if (HAS_VIRTUAL_REGISTERS)
3290
  {
3291
  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3292
  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3293
  if (common->mark_ptr != 0)
3294
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3295
  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3296
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3297
  if (common->mark_ptr != 0)
3298
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3299
  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3300
    SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3301
  }
3302
else
3303
  {
3304
  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3305
  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3306
  if (common->mark_ptr != 0)
3307
    OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3308
  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3309
  OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3310
  if (common->mark_ptr != 0)
3311
    OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3312
  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3313
  }
3314
3315
has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3316
3317
GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3318
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3319
3320
loop = LABEL();
3321
3322
if (has_pre)
3323
  sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3324
else
3325
  {
3326
  OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3327
  OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3328
  }
3329
3330
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3331
OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3332
/* Copy the integer value to the output buffer */
3333
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3334
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3335
#endif
3336
3337
SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3338
OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3339
3340
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3341
JUMPTO(SLJIT_NOT_ZERO, loop);
3342
3343
/* Calculate the return value, which is the maximum ovector value. */
3344
if (topbracket > 1)
3345
  {
3346
  if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3347
    {
3348
    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3349
    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3350
3351
    /* OVECTOR(0) is never equal to SLJIT_S2. */
3352
    loop = LABEL();
3353
    sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3354
    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3355
    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3356
    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3357
    }
3358
  else
3359
    {
3360
    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3361
    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3362
3363
    /* OVECTOR(0) is never equal to SLJIT_S2. */
3364
    loop = LABEL();
3365
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3366
    OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3367
    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3368
    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3369
    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3370
    }
3371
  }
3372
else
3373
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3374
}
3375
3376
static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3377
{
3378
DEFINE_COMPILER;
3379
sljit_s32 mov_opcode;
3380
sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3381
3382
SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3383
SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3384
  && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3385
3386
if (arguments_reg != ARGUMENTS)
3387
  OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3388
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3389
  common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3390
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3391
3392
/* Store match begin and end. */
3393
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3394
OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3395
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3396
3397
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3398
3399
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3400
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3401
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3402
#endif
3403
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3404
3405
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3406
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3407
OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3408
#endif
3409
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3410
3411
JUMPTO(SLJIT_JUMP, quit);
3412
}
3413
3414
static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3415
{
3416
/* May destroy TMP1. */
3417
DEFINE_COMPILER;
3418
struct sljit_jump *jump;
3419
3420
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3421
  {
3422
  /* The value of -1 must be kept for start_used_ptr! */
3423
  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3424
  /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3425
  is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3426
  jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3427
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3428
  JUMPHERE(jump);
3429
  }
3430
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3431
  {
3432
  jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3433
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3434
  JUMPHERE(jump);
3435
  }
3436
}
3437
3438
static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3439
{
3440
/* Detects if the character has an othercase. */
3441
unsigned int c;
3442
3443
#ifdef SUPPORT_UNICODE
3444
if (common->utf || common->ucp)
3445
  {
3446
  if (common->utf)
3447
    {
3448
    GETCHAR(c, cc);
3449
    }
3450
  else
3451
    c = *cc;
3452
3453
  if (c > 127)
3454
    return c != UCD_OTHERCASE(c);
3455
3456
  return common->fcc[c] != c;
3457
  }
3458
else
3459
#endif
3460
  c = *cc;
3461
return MAX_255(c) ? common->fcc[c] != c : FALSE;
3462
}
3463
3464
static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3465
{
3466
/* Returns with the othercase. */
3467
#ifdef SUPPORT_UNICODE
3468
if ((common->utf || common->ucp) && c > 127)
3469
  return UCD_OTHERCASE(c);
3470
#endif
3471
return TABLE_GET(c, common->fcc, c);
3472
}
3473
3474
static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3475
{
3476
/* Detects if the character and its othercase has only 1 bit difference. */
3477
unsigned int c, oc, bit;
3478
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3479
int n;
3480
#endif
3481
3482
#ifdef SUPPORT_UNICODE
3483
if (common->utf || common->ucp)
3484
  {
3485
  if (common->utf)
3486
    {
3487
    GETCHAR(c, cc);
3488
    }
3489
  else
3490
    c = *cc;
3491
3492
  if (c <= 127)
3493
    oc = common->fcc[c];
3494
  else
3495
    oc = UCD_OTHERCASE(c);
3496
  }
3497
else
3498
  {
3499
  c = *cc;
3500
  oc = TABLE_GET(c, common->fcc, c);
3501
  }
3502
#else
3503
c = *cc;
3504
oc = TABLE_GET(c, common->fcc, c);
3505
#endif
3506
3507
SLJIT_ASSERT(c != oc);
3508
3509
bit = c ^ oc;
3510
/* Optimized for English alphabet. */
3511
if (c <= 127 && bit == 0x20)
3512
  return (0 << 8) | 0x20;
3513
3514
/* Since c != oc, they must have at least 1 bit difference. */
3515
if (!is_powerof2(bit))
3516
  return 0;
3517
3518
#if PCRE2_CODE_UNIT_WIDTH == 8
3519
3520
#ifdef SUPPORT_UNICODE
3521
if (common->utf && c > 127)
3522
  {
3523
  n = GET_EXTRALEN(*cc);
3524
  while ((bit & 0x3f) == 0)
3525
    {
3526
    n--;
3527
    bit >>= 6;
3528
    }
3529
  return (n << 8) | bit;
3530
  }
3531
#endif /* SUPPORT_UNICODE */
3532
return (0 << 8) | bit;
3533
3534
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3535
3536
#ifdef SUPPORT_UNICODE
3537
if (common->utf && c > 65535)
3538
  {
3539
  if (bit >= (1u << 10))
3540
    bit >>= 10;
3541
  else
3542
    return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3543
  }
3544
#endif /* SUPPORT_UNICODE */
3545
return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3546
3547
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3548
}
3549
3550
static void check_partial(compiler_common *common, BOOL force)
3551
{
3552
/* Checks whether a partial matching is occurred. Does not modify registers. */
3553
DEFINE_COMPILER;
3554
struct sljit_jump *jump = NULL;
3555
3556
SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3557
3558
if (common->mode == PCRE2_JIT_COMPLETE)
3559
  return;
3560
3561
if (!force && !common->allow_empty_partial)
3562
  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3563
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3564
  jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3565
3566
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3567
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3568
else
3569
  {
3570
  if (common->partialmatchlabel != NULL)
3571
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3572
  else
3573
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3574
  }
3575
3576
if (jump != NULL)
3577
  JUMPHERE(jump);
3578
}
3579
3580
static void check_str_end(compiler_common *common, jump_list **end_reached)
3581
{
3582
/* Does not affect registers. Usually used in a tight spot. */
3583
DEFINE_COMPILER;
3584
struct sljit_jump *jump;
3585
3586
if (common->mode == PCRE2_JIT_COMPLETE)
3587
  {
3588
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3589
  return;
3590
  }
3591
3592
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3593
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3594
  {
3595
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3596
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3597
  add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3598
  }
3599
else
3600
  {
3601
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3602
  if (common->partialmatchlabel != NULL)
3603
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3604
  else
3605
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3606
  }
3607
JUMPHERE(jump);
3608
}
3609
3610
static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3611
{
3612
DEFINE_COMPILER;
3613
struct sljit_jump *jump;
3614
3615
if (common->mode == PCRE2_JIT_COMPLETE)
3616
  {
3617
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3618
  return;
3619
  }
3620
3621
/* Partial matching mode. */
3622
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3623
if (!common->allow_empty_partial)
3624
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3625
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3626
  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3627
3628
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3629
  {
3630
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3631
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3632
  }
3633
else
3634
  {
3635
  if (common->partialmatchlabel != NULL)
3636
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3637
  else
3638
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3639
  }
3640
JUMPHERE(jump);
3641
}
3642
3643
static void process_partial_match(compiler_common *common)
3644
{
3645
DEFINE_COMPILER;
3646
struct sljit_jump *jump;
3647
3648
/* Partial matching mode. */
3649
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3650
  {
3651
  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3652
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3653
  JUMPHERE(jump);
3654
  }
3655
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3656
  {
3657
  if (common->partialmatchlabel != NULL)
3658
    CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3659
  else
3660
    add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3661
  }
3662
}
3663
3664
static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3665
{
3666
DEFINE_COMPILER;
3667
3668
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3669
process_partial_match(common);
3670
}
3671
3672
static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3673
{
3674
/* Reads the character into TMP1, keeps STR_PTR.
3675
Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3676
DEFINE_COMPILER;
3677
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3678
struct sljit_jump *jump;
3679
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3680
3681
SLJIT_UNUSED_ARG(max);
3682
SLJIT_UNUSED_ARG(dst);
3683
SLJIT_UNUSED_ARG(dstw);
3684
SLJIT_UNUSED_ARG(backtracks);
3685
3686
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3687
3688
#ifdef SUPPORT_UNICODE
3689
#if PCRE2_CODE_UNIT_WIDTH == 8
3690
if (common->utf)
3691
  {
3692
  if (max < 128) return;
3693
3694
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3695
  OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3696
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3697
  add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3698
  OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3699
  if (backtracks && common->invalid_utf)
3700
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3701
  JUMPHERE(jump);
3702
  }
3703
#elif PCRE2_CODE_UNIT_WIDTH == 16
3704
if (common->utf)
3705
  {
3706
  if (max < 0xd800) return;
3707
3708
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3709
3710
  if (common->invalid_utf)
3711
    {
3712
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3713
    OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3714
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3715
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3716
    OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3717
    if (backtracks && common->invalid_utf)
3718
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3719
    }
3720
  else
3721
    {
3722
    /* TMP2 contains the high surrogate. */
3723
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3724
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3725
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3726
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3727
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3728
    }
3729
3730
  JUMPHERE(jump);
3731
  }
3732
#elif PCRE2_CODE_UNIT_WIDTH == 32
3733
if (common->invalid_utf)
3734
  {
3735
  if (max < 0xd800) return;
3736
3737
  if (backtracks != NULL)
3738
    {
3739
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3740
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3741
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3742
    }
3743
  else
3744
    {
3745
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3746
    OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3747
    CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3748
    OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3749
    CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3750
    }
3751
  }
3752
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3753
#endif /* SUPPORT_UNICODE */
3754
}
3755
3756
static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3757
{
3758
/* Reads one character back without moving STR_PTR. TMP2 must
3759
contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3760
DEFINE_COMPILER;
3761
3762
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3763
struct sljit_jump *jump;
3764
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3765
3766
SLJIT_UNUSED_ARG(max);
3767
SLJIT_UNUSED_ARG(backtracks);
3768
3769
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3770
3771
#ifdef SUPPORT_UNICODE
3772
#if PCRE2_CODE_UNIT_WIDTH == 8
3773
if (common->utf)
3774
  {
3775
  if (max < 128) return;
3776
3777
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3778
  if (common->invalid_utf)
3779
    {
3780
    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3781
    if (backtracks != NULL)
3782
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3783
    }
3784
  else
3785
    add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3786
  JUMPHERE(jump);
3787
  }
3788
#elif PCRE2_CODE_UNIT_WIDTH == 16
3789
if (common->utf)
3790
  {
3791
  if (max < 0xd800) return;
3792
3793
  if (common->invalid_utf)
3794
    {
3795
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3796
    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3797
    if (backtracks != NULL)
3798
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3799
    }
3800
  else
3801
    {
3802
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3803
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3804
    /* TMP2 contains the low surrogate. */
3805
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3806
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3807
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3808
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3809
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3810
    }
3811
    JUMPHERE(jump);
3812
  }
3813
#elif PCRE2_CODE_UNIT_WIDTH == 32
3814
if (common->invalid_utf)
3815
  {
3816
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3817
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3818
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3819
  }
3820
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3821
#endif /* SUPPORT_UNICODE */
3822
}
3823
3824
#define READ_CHAR_UPDATE_STR_PTR 0x1
3825
#define READ_CHAR_UTF8_NEWLINE 0x2
3826
#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3827
#define READ_CHAR_VALID_UTF 0x4
3828
3829
static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3830
  jump_list **backtracks, sljit_u32 options)
3831
{
3832
/* Reads the precise value of a character into TMP1, if the character is
3833
between min and max (c >= min && c <= max). Otherwise it returns with a value
3834
outside the range. Does not check STR_END. */
3835
DEFINE_COMPILER;
3836
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3837
struct sljit_jump *jump;
3838
#endif
3839
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3840
struct sljit_jump *jump2;
3841
#endif
3842
3843
SLJIT_UNUSED_ARG(min);
3844
SLJIT_UNUSED_ARG(max);
3845
SLJIT_UNUSED_ARG(backtracks);
3846
SLJIT_UNUSED_ARG(options);
3847
SLJIT_ASSERT(min <= max);
3848
3849
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3850
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3851
3852
#ifdef SUPPORT_UNICODE
3853
#if PCRE2_CODE_UNIT_WIDTH == 8
3854
if (common->utf)
3855
  {
3856
  if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3857
3858
  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3859
    {
3860
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3861
3862
    if (options & READ_CHAR_UTF8_NEWLINE)
3863
      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3864
    else
3865
      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3866
3867
    if (backtracks != NULL)
3868
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3869
    JUMPHERE(jump);
3870
    return;
3871
    }
3872
3873
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3874
  if (min >= 0x10000)
3875
    {
3876
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3877
    if (options & READ_CHAR_UPDATE_STR_PTR)
3878
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3879
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3880
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3881
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3882
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3883
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3884
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3885
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3886
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3887
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3888
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3889
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
3890
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3891
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3892
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3893
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3894
    JUMPHERE(jump2);
3895
    if (options & READ_CHAR_UPDATE_STR_PTR)
3896
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3897
    }
3898
  else if (min >= 0x800 && max <= 0xffff)
3899
    {
3900
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3901
    if (options & READ_CHAR_UPDATE_STR_PTR)
3902
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3903
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3904
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3905
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3906
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3907
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3908
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3909
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
3910
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3911
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3912
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3913
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3914
    JUMPHERE(jump2);
3915
    if (options & READ_CHAR_UPDATE_STR_PTR)
3916
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3917
    }
3918
  else if (max >= 0x800)
3919
    {
3920
    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3921
    }
3922
  else if (max < 128)
3923
    {
3924
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3925
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3926
    }
3927
  else
3928
    {
3929
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3930
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
3931
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3932
    else
3933
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3934
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3935
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3936
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3937
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3938
    if (options & READ_CHAR_UPDATE_STR_PTR)
3939
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3940
    }
3941
  JUMPHERE(jump);
3942
  }
3943
#elif PCRE2_CODE_UNIT_WIDTH == 16
3944
if (common->utf)
3945
  {
3946
  if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3947
3948
  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3949
    {
3950
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3951
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3952
3953
    if (options & READ_CHAR_UTF8_NEWLINE)
3954
      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3955
    else
3956
      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3957
3958
    if (backtracks != NULL)
3959
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3960
    JUMPHERE(jump);
3961
    return;
3962
    }
3963
3964
  if (max >= 0x10000)
3965
    {
3966
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3967
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3968
    /* TMP2 contains the high surrogate. */
3969
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3970
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3971
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3972
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3973
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3974
    JUMPHERE(jump);
3975
    return;
3976
    }
3977
3978
  /* Skip low surrogate if necessary. */
3979
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3980
3981
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
3982
    {
3983
    if (options & READ_CHAR_UPDATE_STR_PTR)
3984
      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3985
    OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
3986
    if (options & READ_CHAR_UPDATE_STR_PTR)
3987
      CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
3988
    if (max >= 0xd800)
3989
      CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
3990
    }
3991
  else
3992
    {
3993
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
3994
    if (options & READ_CHAR_UPDATE_STR_PTR)
3995
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3996
    if (max >= 0xd800)
3997
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3998
    JUMPHERE(jump);
3999
    }
4000
  }
4001
#elif PCRE2_CODE_UNIT_WIDTH == 32
4002
if (common->invalid_utf)
4003
  {
4004
  if (backtracks != NULL)
4005
    {
4006
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4007
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4008
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4009
    }
4010
  else
4011
    {
4012
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4013
    OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4014
    CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4015
    OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4016
    CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4017
    }
4018
  }
4019
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4020
#endif /* SUPPORT_UNICODE */
4021
}
4022
4023
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4024
4025
static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4026
{
4027
/* Tells whether the character codes below 128 are enough
4028
to determine a match. */
4029
const sljit_u8 value = nclass ? 0xff : 0;
4030
const sljit_u8 *end = bitset + 32;
4031
4032
bitset += 16;
4033
do
4034
  {
4035
  if (*bitset++ != value)
4036
    return FALSE;
4037
  }
4038
while (bitset < end);
4039
return TRUE;
4040
}
4041
4042
static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4043
{
4044
/* Reads the precise character type of a character into TMP1, if the character
4045
is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4046
full_read argument tells whether characters above max are accepted or not. */
4047
DEFINE_COMPILER;
4048
struct sljit_jump *jump;
4049
4050
SLJIT_ASSERT(common->utf);
4051
4052
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4053
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4054
4055
/* All values > 127 are zero in ctypes. */
4056
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4057
4058
if (negated)
4059
  {
4060
  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4061
4062
  if (common->invalid_utf)
4063
    {
4064
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4065
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4066
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4067
    }
4068
  else
4069
    {
4070
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4071
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4072
    }
4073
  JUMPHERE(jump);
4074
  }
4075
}
4076
4077
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4078
4079
static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4080
{
4081
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4082
DEFINE_COMPILER;
4083
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4084
struct sljit_jump *jump;
4085
#endif
4086
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4087
struct sljit_jump *jump2;
4088
#endif
4089
4090
SLJIT_UNUSED_ARG(backtracks);
4091
SLJIT_UNUSED_ARG(negated);
4092
4093
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4094
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4095
4096
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4097
if (common->utf)
4098
  {
4099
  /* The result of this read may be unused, but saves an "else" part. */
4100
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4101
  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4102
4103
  if (!negated)
4104
    {
4105
    if (common->invalid_utf)
4106
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4107
4108
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4109
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4110
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4111
    if (common->invalid_utf)
4112
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4113
4114
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4115
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4116
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4117
    if (common->invalid_utf)
4118
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4119
4120
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4121
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4122
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4123
    JUMPHERE(jump2);
4124
    }
4125
  else if (common->invalid_utf)
4126
    {
4127
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4128
    OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4129
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4130
4131
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4132
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4133
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4134
    JUMPHERE(jump2);
4135
    }
4136
  else
4137
    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4138
4139
  JUMPHERE(jump);
4140
  return;
4141
  }
4142
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4143
4144
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4145
if (common->invalid_utf && negated)
4146
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4147
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4148
4149
#if PCRE2_CODE_UNIT_WIDTH != 8
4150
/* The ctypes array contains only 256 values. */
4151
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4152
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4153
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4154
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4155
#if PCRE2_CODE_UNIT_WIDTH != 8
4156
JUMPHERE(jump);
4157
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4158
4159
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4160
if (common->utf && negated)
4161
  {
4162
  /* Skip low surrogate if necessary. */
4163
  if (!common->invalid_utf)
4164
    {
4165
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4166
4167
    if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4168
      {
4169
      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4170
      OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4171
      CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4172
      }
4173
    else
4174
      {
4175
      jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4176
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4177
      JUMPHERE(jump);
4178
      }
4179
    return;
4180
    }
4181
4182
  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4183
  jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4184
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4185
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4186
4187
  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4188
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4189
  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4190
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4191
4192
  JUMPHERE(jump);
4193
  return;
4194
  }
4195
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4196
}
4197
4198
static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4199
{
4200
/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4201
TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4202
and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4203
DEFINE_COMPILER;
4204
4205
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4206
struct sljit_jump *jump;
4207
#endif
4208
4209
#ifdef SUPPORT_UNICODE
4210
#if PCRE2_CODE_UNIT_WIDTH == 8
4211
struct sljit_label *label;
4212
4213
if (common->utf)
4214
  {
4215
  if (!must_be_valid && common->invalid_utf)
4216
    {
4217
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4218
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4219
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4220
    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4221
    if (backtracks != NULL)
4222
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4223
    JUMPHERE(jump);
4224
    return;
4225
    }
4226
4227
  label = LABEL();
4228
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4229
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4230
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4231
  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4232
  return;
4233
  }
4234
#elif PCRE2_CODE_UNIT_WIDTH == 16
4235
if (common->utf)
4236
  {
4237
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4238
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4239
4240
  if (!must_be_valid && common->invalid_utf)
4241
    {
4242
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4243
    jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4244
    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4245
    if (backtracks != NULL)
4246
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4247
    JUMPHERE(jump);
4248
    return;
4249
    }
4250
4251
  /* Skip low surrogate if necessary. */
4252
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4253
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4254
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4255
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4256
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4257
  return;
4258
  }
4259
#elif PCRE2_CODE_UNIT_WIDTH == 32
4260
if (common->invalid_utf && !must_be_valid)
4261
  {
4262
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4263
  if (backtracks != NULL)
4264
    {
4265
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4266
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4267
    return;
4268
    }
4269
4270
  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4271
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4272
  OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4273
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4274
  return;
4275
  }
4276
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4277
#endif /* SUPPORT_UNICODE */
4278
4279
SLJIT_UNUSED_ARG(backtracks);
4280
SLJIT_UNUSED_ARG(must_be_valid);
4281
4282
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4283
}
4284
4285
static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4286
{
4287
/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4288
DEFINE_COMPILER;
4289
struct sljit_jump *jump;
4290
4291
if (nltype == NLTYPE_ANY)
4292
  {
4293
  add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4294
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
4295
  add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4296
  }
4297
else if (nltype == NLTYPE_ANYCRLF)
4298
  {
4299
  if (jumpifmatch)
4300
    {
4301
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4302
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4303
    }
4304
  else
4305
    {
4306
    jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4307
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4308
    JUMPHERE(jump);
4309
    }
4310
  }
4311
else
4312
  {
4313
  SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4314
  add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4315
  }
4316
}
4317
4318
#ifdef SUPPORT_UNICODE
4319
4320
#if PCRE2_CODE_UNIT_WIDTH == 8
4321
static void do_utfreadchar(compiler_common *common)
4322
{
4323
/* Fast decoding a UTF-8 character. TMP1 contains the first byte
4324
of the character (>= 0xc0). Return char value in TMP1. */
4325
DEFINE_COMPILER;
4326
struct sljit_jump *jump;
4327
4328
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4329
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4330
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4331
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4332
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4333
4334
/* Searching for the first zero. */
4335
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4336
jump = JUMP(SLJIT_NOT_ZERO);
4337
/* Two byte sequence. */
4338
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4339
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4340
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4341
4342
JUMPHERE(jump);
4343
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4344
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4345
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4346
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4347
4348
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4349
jump = JUMP(SLJIT_NOT_ZERO);
4350
/* Three byte sequence. */
4351
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4352
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4353
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4354
4355
/* Four byte sequence. */
4356
JUMPHERE(jump);
4357
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4358
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4359
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4360
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4361
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4362
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4363
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4364
}
4365
4366
static void do_utfreadtype8(compiler_common *common)
4367
{
4368
/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4369
of the character (>= 0xc0). Return value in TMP1. */
4370
DEFINE_COMPILER;
4371
struct sljit_jump *jump;
4372
struct sljit_jump *compare;
4373
4374
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4375
4376
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
4377
jump = JUMP(SLJIT_NOT_ZERO);
4378
/* Two byte sequence. */
4379
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4380
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4381
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4382
/* The upper 5 bits are known at this point. */
4383
compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4384
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4385
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4386
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4387
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4388
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4389
4390
JUMPHERE(compare);
4391
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4392
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4393
4394
/* We only have types for characters less than 256. */
4395
JUMPHERE(jump);
4396
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4397
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4398
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4399
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4400
}
4401
4402
static void do_utfreadchar_invalid(compiler_common *common)
4403
{
4404
/* Slow decoding a UTF-8 character. TMP1 contains the first byte
4405
of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4406
undefined for invalid characters. */
4407
DEFINE_COMPILER;
4408
sljit_s32 i;
4409
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4410
struct sljit_jump *jump;
4411
struct sljit_jump *buffer_end_close;
4412
struct sljit_label *three_byte_entry;
4413
struct sljit_label *exit_invalid_label;
4414
struct sljit_jump *exit_invalid[11];
4415
4416
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4417
4418
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4419
4420
/* Usually more than 3 characters remained in the subject buffer. */
4421
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4422
4423
/* Not a valid start of a multi-byte sequence, no more bytes read. */
4424
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4425
4426
buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4427
4428
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4429
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4430
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4431
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4432
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4433
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4434
4435
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4436
jump = JUMP(SLJIT_NOT_ZERO);
4437
4438
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4439
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4440
4441
JUMPHERE(jump);
4442
4443
/* Three-byte sequence. */
4444
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4445
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4446
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4447
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4448
if (has_cmov)
4449
  {
4450
  OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4451
  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4452
  exit_invalid[2] = NULL;
4453
  }
4454
else
4455
  exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4456
4457
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4458
jump = JUMP(SLJIT_NOT_ZERO);
4459
4460
three_byte_entry = LABEL();
4461
4462
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4463
if (has_cmov)
4464
  {
4465
  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4466
  CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4467
  exit_invalid[3] = NULL;
4468
  }
4469
else
4470
  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4471
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4472
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4473
4474
if (has_cmov)
4475
  {
4476
  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4477
  CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4478
  exit_invalid[4] = NULL;
4479
  }
4480
else
4481
  exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4482
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4483
4484
JUMPHERE(jump);
4485
4486
/* Four-byte sequence. */
4487
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4488
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4489
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4490
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4491
if (has_cmov)
4492
  {
4493
  OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4494
  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4495
  exit_invalid[5] = NULL;
4496
  }
4497
else
4498
  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4499
4500
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4501
if (has_cmov)
4502
  {
4503
  OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4504
  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4505
  exit_invalid[6] = NULL;
4506
  }
4507
else
4508
  exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4509
4510
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4511
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4512
4513
JUMPHERE(buffer_end_close);
4514
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4515
exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4516
4517
/* Two-byte sequence. */
4518
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4519
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4520
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4521
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4522
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4523
exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4524
4525
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4526
jump = JUMP(SLJIT_NOT_ZERO);
4527
4528
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4529
4530
/* Three-byte sequence. */
4531
JUMPHERE(jump);
4532
exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4533
4534
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4535
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4536
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4537
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4538
if (has_cmov)
4539
  {
4540
  OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4541
  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4542
  exit_invalid[10] = NULL;
4543
  }
4544
else
4545
  exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4546
4547
/* One will be substracted from STR_PTR later. */
4548
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4549
4550
/* Four byte sequences are not possible. */
4551
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4552
4553
exit_invalid_label = LABEL();
4554
for (i = 0; i < 11; i++)
4555
  sljit_set_label(exit_invalid[i], exit_invalid_label);
4556
4557
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4558
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4559
}
4560
4561
static void do_utfreadnewline_invalid(compiler_common *common)
4562
{
4563
/* Slow decoding a UTF-8 character, specialized for newlines.
4564
TMP1 contains the first byte of the character (>= 0xc0). Return
4565
char value in TMP1. */
4566
DEFINE_COMPILER;
4567
struct sljit_label *loop;
4568
struct sljit_label *skip_start;
4569
struct sljit_label *three_byte_exit;
4570
struct sljit_jump *jump[5];
4571
4572
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4573
4574
if (common->nltype != NLTYPE_ANY)
4575
  {
4576
  SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4577
4578
  /* All newlines are ascii, just skip intermediate octets. */
4579
  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4580
  loop = LABEL();
4581
  if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4582
    sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4583
  else
4584
    {
4585
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4586
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4587
    }
4588
4589
  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4590
  CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4591
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4592
4593
  JUMPHERE(jump[0]);
4594
4595
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4596
  OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4597
  return;
4598
  }
4599
4600
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4601
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4602
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4603
4604
jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4605
jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4606
4607
skip_start = LABEL();
4608
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4609
jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4610
4611
/* Skip intermediate octets. */
4612
loop = LABEL();
4613
jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4614
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4615
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4616
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4617
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4618
4619
JUMPHERE(jump[3]);
4620
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4621
4622
three_byte_exit = LABEL();
4623
JUMPHERE(jump[0]);
4624
JUMPHERE(jump[4]);
4625
4626
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4627
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4628
4629
/* Two byte long newline: 0x85. */
4630
JUMPHERE(jump[1]);
4631
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4632
4633
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4634
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4635
4636
/* Three byte long newlines: 0x2028 and 0x2029. */
4637
JUMPHERE(jump[2]);
4638
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4639
CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4640
4641
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4642
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4643
4644
OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4645
CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4646
4647
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4648
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4649
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4650
}
4651
4652
static void do_utfmoveback_invalid(compiler_common *common)
4653
{
4654
/* Goes one character back. */
4655
DEFINE_COMPILER;
4656
sljit_s32 i;
4657
struct sljit_jump *jump;
4658
struct sljit_jump *buffer_start_close;
4659
struct sljit_label *exit_ok_label;
4660
struct sljit_label *exit_invalid_label;
4661
struct sljit_jump *exit_invalid[7];
4662
4663
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4664
4665
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4666
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4667
4668
/* Two-byte sequence. */
4669
buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4670
4671
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4672
4673
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4674
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4675
4676
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4677
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4678
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4679
4680
/* Three-byte sequence. */
4681
JUMPHERE(jump);
4682
exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4683
4684
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4685
4686
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4687
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4688
4689
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4690
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4691
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4692
4693
/* Four-byte sequence. */
4694
JUMPHERE(jump);
4695
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4696
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4697
4698
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4699
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4700
exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4701
4702
exit_ok_label = LABEL();
4703
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4704
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4705
4706
/* Two-byte sequence. */
4707
JUMPHERE(buffer_start_close);
4708
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4709
4710
exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4711
4712
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4713
4714
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4715
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4716
4717
/* Three-byte sequence. */
4718
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4719
exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4720
exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4721
4722
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4723
4724
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4725
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4726
4727
/* Four-byte sequences are not possible. */
4728
4729
exit_invalid_label = LABEL();
4730
sljit_set_label(exit_invalid[5], exit_invalid_label);
4731
sljit_set_label(exit_invalid[6], exit_invalid_label);
4732
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4733
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4734
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4735
4736
JUMPHERE(exit_invalid[4]);
4737
/* -2 + 4 = 2 */
4738
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4739
4740
exit_invalid_label = LABEL();
4741
for (i = 0; i < 4; i++)
4742
  sljit_set_label(exit_invalid[i], exit_invalid_label);
4743
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4744
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4745
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4746
}
4747
4748
static void do_utfpeakcharback(compiler_common *common)
4749
{
4750
/* Peak a character back. Does not modify STR_PTR. */
4751
DEFINE_COMPILER;
4752
struct sljit_jump *jump[2];
4753
4754
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4755
4756
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4757
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4758
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4759
4760
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4761
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4762
jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4763
4764
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4765
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4766
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4767
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4768
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4769
4770
JUMPHERE(jump[1]);
4771
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4772
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4773
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4774
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4775
4776
JUMPHERE(jump[0]);
4777
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4778
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4779
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4780
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4781
4782
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4783
}
4784
4785
static void do_utfpeakcharback_invalid(compiler_common *common)
4786
{
4787
/* Peak a character back. Does not modify STR_PTR. */
4788
DEFINE_COMPILER;
4789
sljit_s32 i;
4790
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4791
struct sljit_jump *jump[2];
4792
struct sljit_label *two_byte_entry;
4793
struct sljit_label *three_byte_entry;
4794
struct sljit_label *exit_invalid_label;
4795
struct sljit_jump *exit_invalid[8];
4796
4797
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4798
4799
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4800
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4801
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4802
4803
/* Two-byte sequence. */
4804
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4805
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4806
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4807
4808
two_byte_entry = LABEL();
4809
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4810
/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4811
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4812
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4813
4814
JUMPHERE(jump[1]);
4815
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4816
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4817
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4818
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4819
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4820
4821
/* Three-byte sequence. */
4822
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4823
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4824
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4825
4826
three_byte_entry = LABEL();
4827
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4828
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4829
4830
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4831
if (has_cmov)
4832
  {
4833
  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4834
  CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4835
  exit_invalid[2] = NULL;
4836
  }
4837
else
4838
  exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4839
4840
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4841
if (has_cmov)
4842
  {
4843
  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4844
  CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4845
  exit_invalid[3] = NULL;
4846
  }
4847
else
4848
  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4849
4850
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4851
4852
JUMPHERE(jump[1]);
4853
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4854
exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4855
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4856
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4857
4858
/* Four-byte sequence. */
4859
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4860
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4861
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4862
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4863
/* ADD is used instead of OR because of the SUB 0x10000 above. */
4864
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4865
4866
if (has_cmov)
4867
  {
4868
  OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4869
  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4870
  exit_invalid[5] = NULL;
4871
  }
4872
else
4873
  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4874
4875
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4876
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4877
4878
JUMPHERE(jump[0]);
4879
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4880
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4881
4882
/* Two-byte sequence. */
4883
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4884
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4885
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4886
4887
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4888
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4889
exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4890
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4891
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4892
4893
/* Three-byte sequence. */
4894
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4895
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4896
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4897
4898
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4899
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4900
4901
JUMPHERE(jump[0]);
4902
exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4903
4904
/* Two-byte sequence. */
4905
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4906
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4907
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4908
4909
exit_invalid_label = LABEL();
4910
for (i = 0; i < 8; i++)
4911
  sljit_set_label(exit_invalid[i], exit_invalid_label);
4912
4913
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4914
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4915
}
4916
4917
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4918
4919
#if PCRE2_CODE_UNIT_WIDTH == 16
4920
4921
static void do_utfreadchar_invalid(compiler_common *common)
4922
{
4923
/* Slow decoding a UTF-16 character. TMP1 contains the first half
4924
of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4925
undefined for invalid characters. */
4926
DEFINE_COMPILER;
4927
struct sljit_jump *exit_invalid[3];
4928
4929
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4930
4931
/* TMP2 contains the high surrogate. */
4932
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4933
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4934
4935
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4936
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4937
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4938
4939
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4940
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4941
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4942
4943
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4944
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4945
4946
JUMPHERE(exit_invalid[0]);
4947
JUMPHERE(exit_invalid[1]);
4948
JUMPHERE(exit_invalid[2]);
4949
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4950
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4951
}
4952
4953
static void do_utfreadnewline_invalid(compiler_common *common)
4954
{
4955
/* Slow decoding a UTF-16 character, specialized for newlines.
4956
TMP1 contains the first half of the character (>= 0xd800). Return
4957
char value in TMP1. */
4958
4959
DEFINE_COMPILER;
4960
struct sljit_jump *exit_invalid[2];
4961
4962
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4963
4964
/* TMP2 contains the high surrogate. */
4965
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4966
4967
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4968
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4969
4970
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4971
OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4972
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
4973
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4974
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4975
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4976
4977
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4978
4979
JUMPHERE(exit_invalid[0]);
4980
JUMPHERE(exit_invalid[1]);
4981
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4982
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4983
}
4984
4985
static void do_utfmoveback_invalid(compiler_common *common)
4986
{
4987
/* Goes one character back. */
4988
DEFINE_COMPILER;
4989
struct sljit_jump *exit_invalid[3];
4990
4991
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4992
4993
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
4994
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4995
4996
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4997
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4998
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4999
5000
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5001
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5002
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5003
5004
JUMPHERE(exit_invalid[0]);
5005
JUMPHERE(exit_invalid[1]);
5006
JUMPHERE(exit_invalid[2]);
5007
5008
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5009
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5010
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5011
}
5012
5013
static void do_utfpeakcharback_invalid(compiler_common *common)
5014
{
5015
/* Peak a character back. Does not modify STR_PTR. */
5016
DEFINE_COMPILER;
5017
struct sljit_jump *jump;
5018
struct sljit_jump *exit_invalid[3];
5019
5020
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5021
5022
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5023
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5024
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5025
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5026
5027
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5028
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5029
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5030
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5031
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5032
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5033
5034
JUMPHERE(jump);
5035
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5036
5037
JUMPHERE(exit_invalid[0]);
5038
JUMPHERE(exit_invalid[1]);
5039
JUMPHERE(exit_invalid[2]);
5040
5041
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5042
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5043
}
5044
5045
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5046
5047
/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5048
#define UCD_BLOCK_MASK 127
5049
#define UCD_BLOCK_SHIFT 7
5050
5051
static void do_getucd(compiler_common *common)
5052
{
5053
/* Search the UCD record for the character comes in TMP1.
5054
Returns chartype in TMP1 and UCD offset in TMP2. */
5055
DEFINE_COMPILER;
5056
#if PCRE2_CODE_UNIT_WIDTH == 32
5057
struct sljit_jump *jump;
5058
#endif
5059
5060
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5061
/* dummy_ucd_record */
5062
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5063
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5064
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5065
#endif
5066
5067
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5068
5069
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5070
5071
#if PCRE2_CODE_UNIT_WIDTH == 32
5072
if (!common->utf)
5073
  {
5074
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5075
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5076
  JUMPHERE(jump);
5077
  }
5078
#endif
5079
5080
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5081
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5082
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5083
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5084
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5085
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5086
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5087
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5088
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5089
}
5090
5091
static void do_getucdtype(compiler_common *common)
5092
{
5093
/* Search the UCD record for the character comes in TMP1.
5094
Returns chartype in TMP1 and UCD offset in TMP2. */
5095
DEFINE_COMPILER;
5096
#if PCRE2_CODE_UNIT_WIDTH == 32
5097
struct sljit_jump *jump;
5098
#endif
5099
5100
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5101
/* dummy_ucd_record */
5102
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5103
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5104
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5105
#endif
5106
5107
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5108
5109
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5110
5111
#if PCRE2_CODE_UNIT_WIDTH == 32
5112
if (!common->utf)
5113
  {
5114
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5115
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5116
  JUMPHERE(jump);
5117
  }
5118
#endif
5119
5120
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5121
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5122
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5123
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5124
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5125
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5126
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5127
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5128
5129
/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5130
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5131
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5132
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5133
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5134
5135
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5136
}
5137
5138
#endif /* SUPPORT_UNICODE */
5139
5140
static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5141
{
5142
DEFINE_COMPILER;
5143
struct sljit_label *mainloop;
5144
struct sljit_label *newlinelabel = NULL;
5145
struct sljit_jump *start;
5146
struct sljit_jump *end = NULL;
5147
struct sljit_jump *end2 = NULL;
5148
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5149
struct sljit_label *loop;
5150
struct sljit_jump *jump;
5151
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5152
jump_list *newline = NULL;
5153
sljit_u32 overall_options = common->re->overall_options;
5154
BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5155
BOOL newlinecheck = FALSE;
5156
BOOL readuchar = FALSE;
5157
5158
if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5159
    && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5160
  newlinecheck = TRUE;
5161
5162
SLJIT_ASSERT(common->abort_label == NULL);
5163
5164
if ((overall_options & PCRE2_FIRSTLINE) != 0)
5165
  {
5166
  /* Search for the end of the first line. */
5167
  SLJIT_ASSERT(common->match_end_ptr != 0);
5168
  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5169
5170
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5171
    {
5172
    mainloop = LABEL();
5173
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5174
    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5175
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5176
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5177
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5178
    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5179
    JUMPHERE(end);
5180
    OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5181
    }
5182
  else
5183
    {
5184
    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5185
    mainloop = LABEL();
5186
    /* Continual stores does not cause data dependency. */
5187
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5188
    read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5189
    check_newlinechar(common, common->nltype, &newline, TRUE);
5190
    CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5191
    JUMPHERE(end);
5192
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5193
    set_jumps(newline, LABEL());
5194
    }
5195
5196
  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5197
  }
5198
else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5199
  {
5200
  /* Check whether offset limit is set and valid. */
5201
  SLJIT_ASSERT(common->match_end_ptr != 0);
5202
5203
  if (HAS_VIRTUAL_REGISTERS)
5204
    {
5205
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5206
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5207
    }
5208
  else
5209
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5210
5211
  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5212
  end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5213
  if (HAS_VIRTUAL_REGISTERS)
5214
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5215
  else
5216
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5217
5218
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5219
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5220
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5221
  if (HAS_VIRTUAL_REGISTERS)
5222
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5223
5224
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5225
  end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5226
  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5227
  JUMPHERE(end2);
5228
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5229
  add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5230
  JUMPHERE(end);
5231
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5232
  }
5233
5234
start = JUMP(SLJIT_JUMP);
5235
5236
if (newlinecheck)
5237
  {
5238
  newlinelabel = LABEL();
5239
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5240
  end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5241
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5242
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5243
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5244
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5245
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5246
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5247
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5248
  end2 = JUMP(SLJIT_JUMP);
5249
  }
5250
5251
mainloop = LABEL();
5252
5253
/* Increasing the STR_PTR here requires one less jump in the most common case. */
5254
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5255
if (common->utf && !common->invalid_utf) readuchar = TRUE;
5256
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5257
if (newlinecheck) readuchar = TRUE;
5258
5259
if (readuchar)
5260
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5261
5262
if (newlinecheck)
5263
  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5264
5265
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5266
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5267
#if PCRE2_CODE_UNIT_WIDTH == 8
5268
if (common->invalid_utf)
5269
  {
5270
  /* Skip continuation code units. */
5271
  loop = LABEL();
5272
  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5273
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5274
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5275
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5276
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5277
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5278
  JUMPHERE(jump);
5279
  }
5280
else if (common->utf)
5281
  {
5282
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5283
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5284
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5285
  JUMPHERE(jump);
5286
  }
5287
#elif PCRE2_CODE_UNIT_WIDTH == 16
5288
if (common->invalid_utf)
5289
  {
5290
  /* Skip continuation code units. */
5291
  loop = LABEL();
5292
  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5293
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5294
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5295
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5296
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5297
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5298
  JUMPHERE(jump);
5299
  }
5300
else if (common->utf)
5301
  {
5302
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5303
5304
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5305
    {
5306
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5307
    OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5308
    CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5309
    }
5310
  else
5311
    {
5312
    OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5313
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5314
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5315
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5316
    }
5317
  }
5318
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5319
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5320
JUMPHERE(start);
5321
5322
if (newlinecheck)
5323
  {
5324
  JUMPHERE(end);
5325
  JUMPHERE(end2);
5326
  }
5327
5328
return mainloop;
5329
}
5330
5331
5332
static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5333
{
5334
sljit_u32 i, count = chars->count;
5335
5336
if (count == 255)
5337
  return;
5338
5339
if (count == 0)
5340
  {
5341
  chars->count = 1;
5342
  chars->chars[0] = chr;
5343
5344
  if (last)
5345
    chars->last_count = 1;
5346
  return;
5347
  }
5348
5349
for (i = 0; i < count; i++)
5350
  if (chars->chars[i] == chr)
5351
    return;
5352
5353
if (count >= MAX_DIFF_CHARS)
5354
  {
5355
  chars->count = 255;
5356
  return;
5357
  }
5358
5359
chars->chars[count] = chr;
5360
chars->count = count + 1;
5361
5362
if (last)
5363
  chars->last_count++;
5364
}
5365
5366
static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5367
{
5368
/* Recursive function, which scans prefix literals. */
5369
BOOL last, any, class, caseless;
5370
int len, repeat, len_save, consumed = 0;
5371
sljit_u32 chr; /* Any unicode character. */
5372
sljit_u8 *bytes, *bytes_end, byte;
5373
PCRE2_SPTR alternative, cc_save, oc;
5374
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5375
PCRE2_UCHAR othercase[4];
5376
#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5377
PCRE2_UCHAR othercase[2];
5378
#else
5379
PCRE2_UCHAR othercase[1];
5380
#endif
5381
5382
repeat = 1;
5383
while (TRUE)
5384
  {
5385
  if (*rec_count == 0)
5386
    return 0;
5387
  (*rec_count)--;
5388
5389
  last = TRUE;
5390
  any = FALSE;
5391
  class = FALSE;
5392
  caseless = FALSE;
5393
5394
  switch (*cc)
5395
    {
5396
    case OP_CHARI:
5397
    caseless = TRUE;
5398
    /* Fall through */
5399
    case OP_CHAR:
5400
    last = FALSE;
5401
    cc++;
5402
    break;
5403
5404
    case OP_SOD:
5405
    case OP_SOM:
5406
    case OP_SET_SOM:
5407
    case OP_NOT_WORD_BOUNDARY:
5408
    case OP_WORD_BOUNDARY:
5409
    case OP_EODN:
5410
    case OP_EOD:
5411
    case OP_CIRC:
5412
    case OP_CIRCM:
5413
    case OP_DOLL:
5414
    case OP_DOLLM:
5415
    /* Zero width assertions. */
5416
    cc++;
5417
    continue;
5418
5419
    case OP_ASSERT:
5420
    case OP_ASSERT_NOT:
5421
    case OP_ASSERTBACK:
5422
    case OP_ASSERTBACK_NOT:
5423
    case OP_ASSERT_NA:
5424
    case OP_ASSERTBACK_NA:
5425
    cc = bracketend(cc);
5426
    continue;
5427
5428
    case OP_PLUSI:
5429
    case OP_MINPLUSI:
5430
    case OP_POSPLUSI:
5431
    caseless = TRUE;
5432
    /* Fall through */
5433
    case OP_PLUS:
5434
    case OP_MINPLUS:
5435
    case OP_POSPLUS:
5436
    cc++;
5437
    break;
5438
5439
    case OP_EXACTI:
5440
    caseless = TRUE;
5441
    /* Fall through */
5442
    case OP_EXACT:
5443
    repeat = GET2(cc, 1);
5444
    last = FALSE;
5445
    cc += 1 + IMM2_SIZE;
5446
    break;
5447
5448
    case OP_QUERYI:
5449
    case OP_MINQUERYI:
5450
    case OP_POSQUERYI:
5451
    caseless = TRUE;
5452
    /* Fall through */
5453
    case OP_QUERY:
5454
    case OP_MINQUERY:
5455
    case OP_POSQUERY:
5456
    len = 1;
5457
    cc++;
5458
#ifdef SUPPORT_UNICODE
5459
    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5460
#endif
5461
    max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5462
    if (max_chars == 0)
5463
      return consumed;
5464
    last = FALSE;
5465
    break;
5466
5467
    case OP_KET:
5468
    cc += 1 + LINK_SIZE;
5469
    continue;
5470
5471
    case OP_ALT:
5472
    cc += GET(cc, 1);
5473
    continue;
5474
5475
    case OP_ONCE:
5476
    case OP_BRA:
5477
    case OP_BRAPOS:
5478
    case OP_CBRA:
5479
    case OP_CBRAPOS:
5480
    alternative = cc + GET(cc, 1);
5481
    while (*alternative == OP_ALT)
5482
      {
5483
      max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5484
      if (max_chars == 0)
5485
        return consumed;
5486
      alternative += GET(alternative, 1);
5487
      }
5488
5489
    if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5490
      cc += IMM2_SIZE;
5491
    cc += 1 + LINK_SIZE;
5492
    continue;
5493
5494
    case OP_CLASS:
5495
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5496
    if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5497
      return consumed;
5498
#endif
5499
    class = TRUE;
5500
    break;
5501
5502
    case OP_NCLASS:
5503
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5504
    if (common->utf) return consumed;
5505
#endif
5506
    class = TRUE;
5507
    break;
5508
5509
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5510
    case OP_XCLASS:
5511
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5512
    if (common->utf) return consumed;
5513
#endif
5514
    any = TRUE;
5515
    cc += GET(cc, 1);
5516
    break;
5517
#endif
5518
5519
    case OP_DIGIT:
5520
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5521
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5522
      return consumed;
5523
#endif
5524
    any = TRUE;
5525
    cc++;
5526
    break;
5527
5528
    case OP_WHITESPACE:
5529
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5530
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5531
      return consumed;
5532
#endif
5533
    any = TRUE;
5534
    cc++;
5535
    break;
5536
5537
    case OP_WORDCHAR:
5538
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5539
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5540
      return consumed;
5541
#endif
5542
    any = TRUE;
5543
    cc++;
5544
    break;
5545
5546
    case OP_NOT:
5547
    case OP_NOTI:
5548
    cc++;
5549
    /* Fall through. */
5550
    case OP_NOT_DIGIT:
5551
    case OP_NOT_WHITESPACE:
5552
    case OP_NOT_WORDCHAR:
5553
    case OP_ANY:
5554
    case OP_ALLANY:
5555
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5556
    if (common->utf) return consumed;
5557
#endif
5558
    any = TRUE;
5559
    cc++;
5560
    break;
5561
5562
#ifdef SUPPORT_UNICODE
5563
    case OP_NOTPROP:
5564
    case OP_PROP:
5565
#if PCRE2_CODE_UNIT_WIDTH != 32
5566
    if (common->utf) return consumed;
5567
#endif
5568
    any = TRUE;
5569
    cc += 1 + 2;
5570
    break;
5571
#endif
5572
5573
    case OP_TYPEEXACT:
5574
    repeat = GET2(cc, 1);
5575
    cc += 1 + IMM2_SIZE;
5576
    continue;
5577
5578
    case OP_NOTEXACT:
5579
    case OP_NOTEXACTI:
5580
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5581
    if (common->utf) return consumed;
5582
#endif
5583
    any = TRUE;
5584
    repeat = GET2(cc, 1);
5585
    cc += 1 + IMM2_SIZE + 1;
5586
    break;
5587
5588
    default:
5589
    return consumed;
5590
    }
5591
5592
  if (any)
5593
    {
5594
    do
5595
      {
5596
      chars->count = 255;
5597
5598
      consumed++;
5599
      if (--max_chars == 0)
5600
        return consumed;
5601
      chars++;
5602
      }
5603
    while (--repeat > 0);
5604
5605
    repeat = 1;
5606
    continue;
5607
    }
5608
5609
  if (class)
5610
    {
5611
    bytes = (sljit_u8*) (cc + 1);
5612
    cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5613
5614
    switch (*cc)
5615
      {
5616
      case OP_CRSTAR:
5617
      case OP_CRMINSTAR:
5618
      case OP_CRPOSSTAR:
5619
      case OP_CRQUERY:
5620
      case OP_CRMINQUERY:
5621
      case OP_CRPOSQUERY:
5622
      max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5623
      if (max_chars == 0)
5624
        return consumed;
5625
      break;
5626
5627
      default:
5628
      case OP_CRPLUS:
5629
      case OP_CRMINPLUS:
5630
      case OP_CRPOSPLUS:
5631
      break;
5632
5633
      case OP_CRRANGE:
5634
      case OP_CRMINRANGE:
5635
      case OP_CRPOSRANGE:
5636
      repeat = GET2(cc, 1);
5637
      if (repeat <= 0)
5638
        return consumed;
5639
      break;
5640
      }
5641
5642
    do
5643
      {
5644
      if (bytes[31] & 0x80)
5645
        chars->count = 255;
5646
      else if (chars->count != 255)
5647
        {
5648
        bytes_end = bytes + 32;
5649
        chr = 0;
5650
        do
5651
          {
5652
          byte = *bytes++;
5653
          SLJIT_ASSERT((chr & 0x7) == 0);
5654
          if (byte == 0)
5655
            chr += 8;
5656
          else
5657
            {
5658
            do
5659
              {
5660
              if ((byte & 0x1) != 0)
5661
                add_prefix_char(chr, chars, TRUE);
5662
              byte >>= 1;
5663
              chr++;
5664
              }
5665
            while (byte != 0);
5666
            chr = (chr + 7) & ~7;
5667
            }
5668
          }
5669
        while (chars->count != 255 && bytes < bytes_end);
5670
        bytes = bytes_end - 32;
5671
        }
5672
5673
      consumed++;
5674
      if (--max_chars == 0)
5675
        return consumed;
5676
      chars++;
5677
      }
5678
    while (--repeat > 0);
5679
5680
    switch (*cc)
5681
      {
5682
      case OP_CRSTAR:
5683
      case OP_CRMINSTAR:
5684
      case OP_CRPOSSTAR:
5685
      return consumed;
5686
5687
      case OP_CRQUERY:
5688
      case OP_CRMINQUERY:
5689
      case OP_CRPOSQUERY:
5690
      cc++;
5691
      break;
5692
5693
      case OP_CRRANGE:
5694
      case OP_CRMINRANGE:
5695
      case OP_CRPOSRANGE:
5696
      if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5697
        return consumed;
5698
      cc += 1 + 2 * IMM2_SIZE;
5699
      break;
5700
      }
5701
5702
    repeat = 1;
5703
    continue;
5704
    }
5705
5706
  len = 1;
5707
#ifdef SUPPORT_UNICODE
5708
  if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5709
#endif
5710
5711
  if (caseless && char_has_othercase(common, cc))
5712
    {
5713
#ifdef SUPPORT_UNICODE
5714
    if (common->utf)
5715
      {
5716
      GETCHAR(chr, cc);
5717
      if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5718
        return consumed;
5719
      }
5720
    else
5721
#endif
5722
      {
5723
      chr = *cc;
5724
#ifdef SUPPORT_UNICODE
5725
      if (common->ucp && chr > 127)
5726
        othercase[0] = UCD_OTHERCASE(chr);
5727
      else
5728
#endif
5729
        othercase[0] = TABLE_GET(chr, common->fcc, chr);
5730
      }
5731
    }
5732
  else
5733
    {
5734
    caseless = FALSE;
5735
    othercase[0] = 0; /* Stops compiler warning - PH */
5736
    }
5737
5738
  len_save = len;
5739
  cc_save = cc;
5740
  while (TRUE)
5741
    {
5742
    oc = othercase;
5743
    do
5744
      {
5745
      len--;
5746
      consumed++;
5747
5748
      chr = *cc;
5749
      add_prefix_char(*cc, chars, len == 0);
5750
5751
      if (caseless)
5752
        add_prefix_char(*oc, chars, len == 0);
5753
5754
      if (--max_chars == 0)
5755
        return consumed;
5756
      chars++;
5757
      cc++;
5758
      oc++;
5759
      }
5760
    while (len > 0);
5761
5762
    if (--repeat == 0)
5763
      break;
5764
5765
    len = len_save;
5766
    cc = cc_save;
5767
    }
5768
5769
  repeat = 1;
5770
  if (last)
5771
    return consumed;
5772
  }
5773
}
5774
5775
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5776
static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5777
{
5778
#if PCRE2_CODE_UNIT_WIDTH == 8
5779
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5780
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5781
#elif PCRE2_CODE_UNIT_WIDTH == 16
5782
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5783
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5784
#else
5785
#error "Unknown code width"
5786
#endif
5787
}
5788
#endif
5789
5790
#include "pcre2_jit_simd_inc.h"
5791
5792
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5793
5794
static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5795
{
5796
  sljit_s32 i, j, max_i = 0, max_j = 0;
5797
  sljit_u32 max_pri = 0;
5798
  PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5799
5800
  for (i = max - 1; i >= 1; i--)
5801
    {
5802
    if (chars[i].last_count > 2)
5803
      {
5804
      a1 = chars[i].chars[0];
5805
      a2 = chars[i].chars[1];
5806
      a_pri = chars[i].last_count;
5807
5808
      j = i - max_fast_forward_char_pair_offset();
5809
      if (j < 0)
5810
        j = 0;
5811
5812
      while (j < i)
5813
        {
5814
        b_pri = chars[j].last_count;
5815
        if (b_pri > 2 && a_pri + b_pri >= max_pri)
5816
          {
5817
          b1 = chars[j].chars[0];
5818
          b2 = chars[j].chars[1];
5819
5820
          if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5821
            {
5822
            max_pri = a_pri + b_pri;
5823
            max_i = i;
5824
            max_j = j;
5825
            }
5826
          }
5827
        j++;
5828
        }
5829
      }
5830
    }
5831
5832
if (max_pri == 0)
5833
  return FALSE;
5834
5835
fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5836
return TRUE;
5837
}
5838
5839
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5840
5841
static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5842
{
5843
DEFINE_COMPILER;
5844
struct sljit_label *start;
5845
struct sljit_jump *match;
5846
struct sljit_jump *partial_quit;
5847
PCRE2_UCHAR mask;
5848
BOOL has_match_end = (common->match_end_ptr != 0);
5849
5850
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5851
5852
if (has_match_end)
5853
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5854
5855
if (offset > 0)
5856
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5857
5858
if (has_match_end)
5859
  {
5860
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5861
5862
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5863
  OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5864
  CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5865
  }
5866
5867
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5868
5869
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5870
  {
5871
  fast_forward_char_simd(common, char1, char2, offset);
5872
5873
  if (offset > 0)
5874
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5875
5876
  if (has_match_end)
5877
    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5878
  return;
5879
  }
5880
5881
#endif
5882
5883
start = LABEL();
5884
5885
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5886
if (common->mode == PCRE2_JIT_COMPLETE)
5887
  add_jump(compiler, &common->failed_match, partial_quit);
5888
5889
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5890
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5891
5892
if (char1 == char2)
5893
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5894
else
5895
  {
5896
  mask = char1 ^ char2;
5897
  if (is_powerof2(mask))
5898
    {
5899
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5900
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5901
    }
5902
  else
5903
    {
5904
    match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5905
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5906
    JUMPHERE(match);
5907
    }
5908
  }
5909
5910
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5911
if (common->utf && offset > 0)
5912
  {
5913
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5914
  jumpto_if_not_utf_char_start(compiler, TMP1, start);
5915
  }
5916
#endif
5917
5918
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5919
5920
if (common->mode != PCRE2_JIT_COMPLETE)
5921
  JUMPHERE(partial_quit);
5922
5923
if (has_match_end)
5924
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5925
}
5926
5927
static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
5928
{
5929
DEFINE_COMPILER;
5930
struct sljit_label *start;
5931
struct sljit_jump *match;
5932
fast_forward_char_data chars[MAX_N_CHARS];
5933
sljit_s32 offset;
5934
PCRE2_UCHAR mask;
5935
PCRE2_UCHAR *char_set, *char_set_end;
5936
int i, max, from;
5937
int range_right = -1, range_len;
5938
sljit_u8 *update_table = NULL;
5939
BOOL in_range;
5940
sljit_u32 rec_count;
5941
5942
for (i = 0; i < MAX_N_CHARS; i++)
5943
  {
5944
  chars[i].count = 0;
5945
  chars[i].last_count = 0;
5946
  }
5947
5948
rec_count = 10000;
5949
max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
5950
5951
if (max < 1)
5952
  return FALSE;
5953
5954
/* Convert last_count to priority. */
5955
for (i = 0; i < max; i++)
5956
  {
5957
  SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
5958
5959
  if (chars[i].count == 1)
5960
    {
5961
    chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
5962
    /* Simplifies algorithms later. */
5963
    chars[i].chars[1] = chars[i].chars[0];
5964
    }
5965
  else if (chars[i].count == 2)
5966
    {
5967
    SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
5968
5969
    if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
5970
      chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
5971
    else
5972
      chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
5973
    }
5974
  else
5975
    chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
5976
  }
5977
5978
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5979
if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
5980
  return TRUE;
5981
#endif
5982
5983
in_range = FALSE;
5984
/* Prevent compiler "uninitialized" warning */
5985
from = 0;
5986
range_len = 4 /* minimum length */ - 1;
5987
for (i = 0; i <= max; i++)
5988
  {
5989
  if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
5990
    {
5991
    range_len = i - from;
5992
    range_right = i - 1;
5993
    }
5994
5995
  if (i < max && chars[i].count < 255)
5996
    {
5997
    SLJIT_ASSERT(chars[i].count > 0);
5998
    if (!in_range)
5999
      {
6000
      in_range = TRUE;
6001
      from = i;
6002
      }
6003
    }
6004
  else
6005
    in_range = FALSE;
6006
  }
6007
6008
if (range_right >= 0)
6009
  {
6010
  update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6011
  if (update_table == NULL)
6012
    return TRUE;
6013
  memset(update_table, IN_UCHARS(range_len), 256);
6014
6015
  for (i = 0; i < range_len; i++)
6016
    {
6017
    SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6018
6019
    char_set = chars[range_right - i].chars;
6020
    char_set_end = char_set + chars[range_right - i].count;
6021
    do
6022
      {
6023
      if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6024
        update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6025
      char_set++;
6026
      }
6027
    while (char_set < char_set_end);
6028
    }
6029
  }
6030
6031
offset = -1;
6032
/* Scan forward. */
6033
for (i = 0; i < max; i++)
6034
  {
6035
  if (range_right == i)
6036
    continue;
6037
6038
  if (offset == -1)
6039
    {
6040
    if (chars[i].last_count >= 2)
6041
      offset = i;
6042
    }
6043
  else if (chars[offset].last_count < chars[i].last_count)
6044
    offset = i;
6045
  }
6046
6047
SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6048
6049
if (range_right < 0)
6050
  {
6051
  if (offset < 0)
6052
    return FALSE;
6053
  /* Works regardless the value is 1 or 2. */
6054
  fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6055
  return TRUE;
6056
  }
6057
6058
SLJIT_ASSERT(range_right != offset);
6059
6060
if (common->match_end_ptr != 0)
6061
  {
6062
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6063
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6064
  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6065
  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6066
  OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6067
  CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6068
  }
6069
else
6070
  {
6071
  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6072
  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6073
  }
6074
6075
SLJIT_ASSERT(range_right >= 0);
6076
6077
if (!HAS_VIRTUAL_REGISTERS)
6078
  OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6079
6080
start = LABEL();
6081
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6082
6083
#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6084
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6085
#else
6086
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6087
#endif
6088
6089
if (!HAS_VIRTUAL_REGISTERS)
6090
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6091
else
6092
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6093
6094
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6095
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6096
6097
if (offset >= 0)
6098
  {
6099
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6100
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6101
6102
  if (chars[offset].count == 1)
6103
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6104
  else
6105
    {
6106
    mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6107
    if (is_powerof2(mask))
6108
      {
6109
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6110
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6111
      }
6112
    else
6113
      {
6114
      match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6115
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6116
      JUMPHERE(match);
6117
      }
6118
    }
6119
  }
6120
6121
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6122
if (common->utf && offset != 0)
6123
  {
6124
  if (offset < 0)
6125
    {
6126
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6127
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6128
    }
6129
  else
6130
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6131
6132
  jumpto_if_not_utf_char_start(compiler, TMP1, start);
6133
6134
  if (offset < 0)
6135
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6136
  }
6137
#endif
6138
6139
if (offset >= 0)
6140
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6141
6142
if (common->match_end_ptr != 0)
6143
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6144
else
6145
  OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6146
return TRUE;
6147
}
6148
6149
static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6150
{
6151
PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6152
PCRE2_UCHAR oc;
6153
6154
oc = first_char;
6155
if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6156
  {
6157
  oc = TABLE_GET(first_char, common->fcc, first_char);
6158
#if defined SUPPORT_UNICODE
6159
  if (first_char > 127 && (common->utf || common->ucp))
6160
    oc = UCD_OTHERCASE(first_char);
6161
#endif
6162
  }
6163
6164
fast_forward_first_char2(common, first_char, oc, 0);
6165
}
6166
6167
static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6168
{
6169
DEFINE_COMPILER;
6170
struct sljit_label *loop;
6171
struct sljit_jump *lastchar = NULL;
6172
struct sljit_jump *firstchar;
6173
struct sljit_jump *quit = NULL;
6174
struct sljit_jump *foundcr = NULL;
6175
struct sljit_jump *notfoundnl;
6176
jump_list *newline = NULL;
6177
6178
if (common->match_end_ptr != 0)
6179
  {
6180
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6181
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6182
  }
6183
6184
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6185
  {
6186
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6187
  if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6188
    {
6189
    if (HAS_VIRTUAL_REGISTERS)
6190
      {
6191
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6192
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6193
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6194
      }
6195
    else
6196
      {
6197
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6198
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6199
      }
6200
    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6201
6202
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6203
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6204
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6205
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6206
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6207
#endif
6208
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6209
6210
    fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6211
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6212
    }
6213
  else
6214
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6215
    {
6216
    lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6217
    if (HAS_VIRTUAL_REGISTERS)
6218
      {
6219
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6220
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6221
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6222
      }
6223
    else
6224
      {
6225
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6226
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6227
      }
6228
    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6229
6230
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6231
    OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6232
    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6233
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6234
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6235
#endif
6236
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6237
6238
    loop = LABEL();
6239
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6240
    quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6241
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6242
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6243
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6244
    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6245
6246
    JUMPHERE(quit);
6247
    JUMPHERE(lastchar);
6248
    }
6249
6250
  JUMPHERE(firstchar);
6251
6252
  if (common->match_end_ptr != 0)
6253
    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6254
  return;
6255
  }
6256
6257
if (HAS_VIRTUAL_REGISTERS)
6258
  {
6259
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6260
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6261
  }
6262
else
6263
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6264
6265
/* Example: match /^/ to \r\n from offset 1. */
6266
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6267
6268
if (common->nltype == NLTYPE_ANY)
6269
  move_back(common, NULL, FALSE);
6270
else
6271
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6272
6273
loop = LABEL();
6274
common->ff_newline_shortcut = loop;
6275
6276
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6277
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6278
  {
6279
  if (common->nltype == NLTYPE_ANYCRLF)
6280
    {
6281
    fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6282
    if (common->mode != PCRE2_JIT_COMPLETE)
6283
      lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6284
6285
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6286
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6287
    quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6288
    }
6289
   else
6290
    {
6291
    fast_forward_char_simd(common, common->newline, common->newline, 0);
6292
6293
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6294
    if (common->mode != PCRE2_JIT_COMPLETE)
6295
      {
6296
      OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
6297
      CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
6298
      }
6299
    }
6300
  }
6301
else
6302
#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6303
  {
6304
  read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6305
  lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6306
  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6307
    foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6308
  check_newlinechar(common, common->nltype, &newline, FALSE);
6309
  set_jumps(newline, loop);
6310
  }
6311
6312
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6313
  {
6314
  if (quit == NULL)
6315
    {
6316
    quit = JUMP(SLJIT_JUMP);
6317
    JUMPHERE(foundcr);
6318
    }
6319
6320
  notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6321
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6322
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
6323
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6324
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6325
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6326
#endif
6327
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6328
  JUMPHERE(notfoundnl);
6329
  JUMPHERE(quit);
6330
  }
6331
6332
if (lastchar)
6333
  JUMPHERE(lastchar);
6334
JUMPHERE(firstchar);
6335
6336
if (common->match_end_ptr != 0)
6337
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6338
}
6339
6340
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6341
6342
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6343
{
6344
DEFINE_COMPILER;
6345
const sljit_u8 *start_bits = common->re->start_bitmap;
6346
struct sljit_label *start;
6347
struct sljit_jump *partial_quit;
6348
#if PCRE2_CODE_UNIT_WIDTH != 8
6349
struct sljit_jump *found = NULL;
6350
#endif
6351
jump_list *matches = NULL;
6352
6353
if (common->match_end_ptr != 0)
6354
  {
6355
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6356
  OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6357
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6358
  OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6359
  CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6360
  }
6361
6362
start = LABEL();
6363
6364
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6365
if (common->mode == PCRE2_JIT_COMPLETE)
6366
  add_jump(compiler, &common->failed_match, partial_quit);
6367
6368
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6369
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6370
6371
if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6372
  {
6373
#if PCRE2_CODE_UNIT_WIDTH != 8
6374
  if ((start_bits[31] & 0x80) != 0)
6375
    found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6376
  else
6377
    CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6378
#elif defined SUPPORT_UNICODE
6379
  if (common->utf && is_char7_bitset(start_bits, FALSE))
6380
    CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6381
#endif
6382
  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6383
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6384
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6385
  if (!HAS_VIRTUAL_REGISTERS)
6386
    {
6387
    OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6388
    OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
6389
    }
6390
  else
6391
    {
6392
    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6393
    OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6394
    }
6395
  JUMPTO(SLJIT_ZERO, start);
6396
  }
6397
else
6398
  set_jumps(matches, start);
6399
6400
#if PCRE2_CODE_UNIT_WIDTH != 8
6401
if (found != NULL)
6402
  JUMPHERE(found);
6403
#endif
6404
6405
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6406
6407
if (common->mode != PCRE2_JIT_COMPLETE)
6408
  JUMPHERE(partial_quit);
6409
6410
if (common->match_end_ptr != 0)
6411
  OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6412
}
6413
6414
static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6415
{
6416
DEFINE_COMPILER;
6417
struct sljit_label *loop;
6418
struct sljit_jump *toolong;
6419
struct sljit_jump *already_found;
6420
struct sljit_jump *found;
6421
struct sljit_jump *found_oc = NULL;
6422
jump_list *not_found = NULL;
6423
sljit_u32 oc, bit;
6424
6425
SLJIT_ASSERT(common->req_char_ptr != 0);
6426
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6427
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6428
toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6429
already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6430
6431
if (has_firstchar)
6432
  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6433
else
6434
  OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6435
6436
oc = req_char;
6437
if (caseless)
6438
  {
6439
  oc = TABLE_GET(req_char, common->fcc, req_char);
6440
#if defined SUPPORT_UNICODE
6441
  if (req_char > 127 && (common->utf || common->ucp))
6442
    oc = UCD_OTHERCASE(req_char);
6443
#endif
6444
  }
6445
6446
#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6447
if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6448
  {
6449
  not_found = fast_requested_char_simd(common, req_char, oc);
6450
  }
6451
else
6452
#endif
6453
  {
6454
  loop = LABEL();
6455
  add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6456
6457
  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6458
6459
  if (req_char == oc)
6460
    found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6461
  else
6462
    {
6463
    bit = req_char ^ oc;
6464
    if (is_powerof2(bit))
6465
      {
6466
       OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6467
      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6468
      }
6469
    else
6470
      {
6471
      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6472
      found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6473
      }
6474
    }
6475
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6476
  JUMPTO(SLJIT_JUMP, loop);
6477
6478
  JUMPHERE(found);
6479
  if (found_oc)
6480
    JUMPHERE(found_oc);
6481
  }
6482
6483
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6484
6485
JUMPHERE(already_found);
6486
JUMPHERE(toolong);
6487
return not_found;
6488
}
6489
6490
static void do_revertframes(compiler_common *common)
6491
{
6492
DEFINE_COMPILER;
6493
struct sljit_jump *jump;
6494
struct sljit_label *mainloop;
6495
6496
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6497
GET_LOCAL_BASE(TMP1, 0, 0);
6498
6499
/* Drop frames until we reach STACK_TOP. */
6500
mainloop = LABEL();
6501
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6502
jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6503
6504
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6505
if (HAS_VIRTUAL_REGISTERS)
6506
  {
6507
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6508
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6509
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6510
  }
6511
else
6512
  {
6513
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6514
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6515
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6516
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6517
  GET_LOCAL_BASE(TMP1, 0, 0);
6518
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6519
  }
6520
JUMPTO(SLJIT_JUMP, mainloop);
6521
6522
JUMPHERE(jump);
6523
jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6524
/* End of reverting values. */
6525
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6526
6527
JUMPHERE(jump);
6528
OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
6529
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6530
if (HAS_VIRTUAL_REGISTERS)
6531
  {
6532
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6533
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6534
  }
6535
else
6536
  {
6537
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6538
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6539
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6540
  }
6541
JUMPTO(SLJIT_JUMP, mainloop);
6542
}
6543
6544
static void check_wordboundary(compiler_common *common)
6545
{
6546
DEFINE_COMPILER;
6547
struct sljit_jump *skipread;
6548
jump_list *skipread_list = NULL;
6549
#ifdef SUPPORT_UNICODE
6550
struct sljit_label *valid_utf;
6551
jump_list *invalid_utf1 = NULL;
6552
#endif /* SUPPORT_UNICODE */
6553
jump_list *invalid_utf2 = NULL;
6554
#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6555
struct sljit_jump *jump;
6556
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6557
6558
SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6559
6560
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6561
/* Get type of the previous char, and put it to TMP3. */
6562
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6563
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6564
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6565
skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6566
6567
#ifdef SUPPORT_UNICODE
6568
if (common->invalid_utf)
6569
  {
6570
  peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6571
6572
  if (common->mode != PCRE2_JIT_COMPLETE)
6573
    {
6574
    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6575
    OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6576
    move_back(common, NULL, TRUE);
6577
    check_start_used_ptr(common);
6578
    OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6579
    OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6580
    }
6581
  }
6582
else
6583
#endif /* SUPPORT_UNICODE */
6584
  {
6585
  if (common->mode == PCRE2_JIT_COMPLETE)
6586
    peek_char_back(common, READ_CHAR_MAX, NULL);
6587
  else
6588
    {
6589
    move_back(common, NULL, TRUE);
6590
    check_start_used_ptr(common);
6591
    read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6592
    }
6593
  }
6594
6595
/* Testing char type. */
6596
#ifdef SUPPORT_UNICODE
6597
if (common->ucp)
6598
  {
6599
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6600
  jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6601
  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6602
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6603
  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6604
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6605
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6606
  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6607
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6608
  JUMPHERE(jump);
6609
  OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6610
  }
6611
else
6612
#endif /* SUPPORT_UNICODE */
6613
  {
6614
#if PCRE2_CODE_UNIT_WIDTH != 8
6615
  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6616
#elif defined SUPPORT_UNICODE
6617
  /* Here TMP3 has already been zeroed. */
6618
  jump = NULL;
6619
  if (common->utf)
6620
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6621
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6622
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6623
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6624
  OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6625
#if PCRE2_CODE_UNIT_WIDTH != 8
6626
  JUMPHERE(jump);
6627
#elif defined SUPPORT_UNICODE
6628
  if (jump != NULL)
6629
    JUMPHERE(jump);
6630
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6631
  }
6632
JUMPHERE(skipread);
6633
6634
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6635
check_str_end(common, &skipread_list);
6636
peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6637
6638
/* Testing char type. This is a code duplication. */
6639
#ifdef SUPPORT_UNICODE
6640
6641
valid_utf = LABEL();
6642
6643
if (common->ucp)
6644
  {
6645
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6646
  jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6647
  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6648
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6649
  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6650
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6651
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6652
  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6653
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6654
  JUMPHERE(jump);
6655
  }
6656
else
6657
#endif /* SUPPORT_UNICODE */
6658
  {
6659
#if PCRE2_CODE_UNIT_WIDTH != 8
6660
  /* TMP2 may be destroyed by peek_char. */
6661
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6662
  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6663
#elif defined SUPPORT_UNICODE
6664
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6665
  jump = NULL;
6666
  if (common->utf)
6667
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6668
#endif
6669
  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6670
  OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6671
  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6672
#if PCRE2_CODE_UNIT_WIDTH != 8
6673
  JUMPHERE(jump);
6674
#elif defined SUPPORT_UNICODE
6675
  if (jump != NULL)
6676
    JUMPHERE(jump);
6677
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6678
  }
6679
set_jumps(skipread_list, LABEL());
6680
6681
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6682
OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6683
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6684
6685
#ifdef SUPPORT_UNICODE
6686
if (common->invalid_utf)
6687
  {
6688
  set_jumps(invalid_utf1, LABEL());
6689
6690
  peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6691
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6692
6693
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6694
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6695
  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6696
6697
  set_jumps(invalid_utf2, LABEL());
6698
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6699
  OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6700
  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6701
  }
6702
#endif /* SUPPORT_UNICODE */
6703
}
6704
6705
static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6706
{
6707
/* May destroy TMP1. */
6708
DEFINE_COMPILER;
6709
int ranges[MAX_CLASS_RANGE_SIZE];
6710
sljit_u8 bit, cbit, all;
6711
int i, byte, length = 0;
6712
6713
bit = bits[0] & 0x1;
6714
/* All bits will be zero or one (since bit is zero or one). */
6715
all = -bit;
6716
6717
for (i = 0; i < 256; )
6718
  {
6719
  byte = i >> 3;
6720
  if ((i & 0x7) == 0 && bits[byte] == all)
6721
    i += 8;
6722
  else
6723
    {
6724
    cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6725
    if (cbit != bit)
6726
      {
6727
      if (length >= MAX_CLASS_RANGE_SIZE)
6728
        return FALSE;
6729
      ranges[length] = i;
6730
      length++;
6731
      bit = cbit;
6732
      all = -cbit;
6733
      }
6734
    i++;
6735
    }
6736
  }
6737
6738
if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6739
  {
6740
  if (length >= MAX_CLASS_RANGE_SIZE)
6741
    return FALSE;
6742
  ranges[length] = 256;
6743
  length++;
6744
  }
6745
6746
if (length < 0 || length > 4)
6747
  return FALSE;
6748
6749
bit = bits[0] & 0x1;
6750
if (invert) bit ^= 0x1;
6751
6752
/* No character is accepted. */
6753
if (length == 0 && bit == 0)
6754
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6755
6756
switch(length)
6757
  {
6758
  case 0:
6759
  /* When bit != 0, all characters are accepted. */
6760
  return TRUE;
6761
6762
  case 1:
6763
  add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6764
  return TRUE;
6765
6766
  case 2:
6767
  if (ranges[0] + 1 != ranges[1])
6768
    {
6769
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6770
    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6771
    }
6772
  else
6773
    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6774
  return TRUE;
6775
6776
  case 3:
6777
  if (bit != 0)
6778
    {
6779
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6780
    if (ranges[0] + 1 != ranges[1])
6781
      {
6782
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6783
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6784
      }
6785
    else
6786
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6787
    return TRUE;
6788
    }
6789
6790
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6791
  if (ranges[1] + 1 != ranges[2])
6792
    {
6793
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6794
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6795
    }
6796
  else
6797
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6798
  return TRUE;
6799
6800
  case 4:
6801
  if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6802
      && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6803
      && (ranges[1] & (ranges[2] - ranges[0])) == 0
6804
      && is_powerof2(ranges[2] - ranges[0]))
6805
    {
6806
    SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6807
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6808
    if (ranges[2] + 1 != ranges[3])
6809
      {
6810
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6811
      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6812
      }
6813
    else
6814
      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6815
    return TRUE;
6816
    }
6817
6818
  if (bit != 0)
6819
    {
6820
    i = 0;
6821
    if (ranges[0] + 1 != ranges[1])
6822
      {
6823
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6824
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6825
      i = ranges[0];
6826
      }
6827
    else
6828
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6829
6830
    if (ranges[2] + 1 != ranges[3])
6831
      {
6832
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6833
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6834
      }
6835
    else
6836
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6837
    return TRUE;
6838
    }
6839
6840
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6841
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6842
  if (ranges[1] + 1 != ranges[2])
6843
    {
6844
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6845
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6846
    }
6847
  else
6848
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6849
  return TRUE;
6850
6851
  default:
6852
  SLJIT_UNREACHABLE();
6853
  return FALSE;
6854
  }
6855
}
6856
6857
static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6858
{
6859
/* May destroy TMP1. */
6860
DEFINE_COMPILER;
6861
uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6862
uint8_t byte;
6863
sljit_s32 type;
6864
int i, j, k, len, c;
6865
6866
if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6867
  return FALSE;
6868
6869
len = 0;
6870
6871
for (i = 0; i < 32; i++)
6872
  {
6873
  byte = bits[i];
6874
6875
  if (nclass)
6876
    byte = ~byte;
6877
6878
  j = 0;
6879
  while (byte != 0)
6880
    {
6881
    if (byte & 0x1)
6882
      {
6883
      c = i * 8 + j;
6884
6885
      k = len;
6886
6887
      if ((c & 0x20) != 0)
6888
        {
6889
        for (k = 0; k < len; k++)
6890
          if (char_list[k] == c - 0x20)
6891
            {
6892
            char_list[k] |= 0x120;
6893
            break;
6894
            }
6895
        }
6896
6897
      if (k == len)
6898
        {
6899
        if (len >= MAX_CLASS_CHARS_SIZE)
6900
          return FALSE;
6901
6902
        char_list[len++] = (uint16_t) c;
6903
        }
6904
      }
6905
6906
    byte >>= 1;
6907
    j++;
6908
    }
6909
  }
6910
6911
if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
6912
6913
i = 0;
6914
j = 0;
6915
6916
if (char_list[0] == 0)
6917
  {
6918
  i++;
6919
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
6920
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6921
  }
6922
else
6923
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6924
6925
while (i < len)
6926
  {
6927
  if ((char_list[i] & 0x100) != 0)
6928
    j++;
6929
  else
6930
    {
6931
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
6932
    CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6933
    }
6934
  i++;
6935
  }
6936
6937
if (j != 0)
6938
  {
6939
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
6940
6941
  for (i = 0; i < len; i++)
6942
    if ((char_list[i] & 0x100) != 0)
6943
      {
6944
      j--;
6945
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
6946
      CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6947
      }
6948
  }
6949
6950
if (invert)
6951
  nclass = !nclass;
6952
6953
type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
6954
add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
6955
return TRUE;
6956
}
6957
6958
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6959
{
6960
/* May destroy TMP1. */
6961
if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
6962
  return TRUE;
6963
return optimize_class_chars(common, bits, nclass, invert, backtracks);
6964
}
6965
6966
static void check_anynewline(compiler_common *common)
6967
{
6968
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6969
DEFINE_COMPILER;
6970
6971
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6972
6973
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
6974
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
6975
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6976
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
6977
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6978
#if PCRE2_CODE_UNIT_WIDTH == 8
6979
if (common->utf)
6980
  {
6981
#endif
6982
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6983
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
6984
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
6985
#if PCRE2_CODE_UNIT_WIDTH == 8
6986
  }
6987
#endif
6988
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6989
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6990
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6991
}
6992
6993
static void check_hspace(compiler_common *common)
6994
{
6995
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6996
DEFINE_COMPILER;
6997
6998
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6999
7000
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
7001
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7002
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
7003
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7004
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
7005
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7006
#if PCRE2_CODE_UNIT_WIDTH == 8
7007
if (common->utf)
7008
  {
7009
#endif
7010
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7011
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
7012
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7013
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
7014
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7015
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7016
  OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7017
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7018
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7019
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7020
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7021
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7022
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7023
#if PCRE2_CODE_UNIT_WIDTH == 8
7024
  }
7025
#endif
7026
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7027
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7028
7029
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7030
}
7031
7032
static void check_vspace(compiler_common *common)
7033
{
7034
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7035
DEFINE_COMPILER;
7036
7037
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7038
7039
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7040
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7041
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7042
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7043
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7044
#if PCRE2_CODE_UNIT_WIDTH == 8
7045
if (common->utf)
7046
  {
7047
#endif
7048
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7049
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7050
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7051
#if PCRE2_CODE_UNIT_WIDTH == 8
7052
  }
7053
#endif
7054
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7055
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7056
7057
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7058
}
7059
7060
static void do_casefulcmp(compiler_common *common)
7061
{
7062
DEFINE_COMPILER;
7063
struct sljit_jump *jump;
7064
struct sljit_label *label;
7065
int char1_reg;
7066
int char2_reg;
7067
7068
if (HAS_VIRTUAL_REGISTERS)
7069
  {
7070
  char1_reg = STR_END;
7071
  char2_reg = STACK_TOP;
7072
  }
7073
else
7074
  {
7075
  char1_reg = TMP3;
7076
  char2_reg = RETURN_ADDR;
7077
  }
7078
7079
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7080
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7081
7082
if (char1_reg == STR_END)
7083
  {
7084
  OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7085
  OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7086
  }
7087
7088
if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7089
  {
7090
  label = LABEL();
7091
  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7092
  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7093
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7094
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7095
  JUMPTO(SLJIT_NOT_ZERO, label);
7096
7097
  JUMPHERE(jump);
7098
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7099
  }
7100
else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7101
  {
7102
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7103
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7104
7105
  label = LABEL();
7106
  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7107
  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7108
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7109
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7110
  JUMPTO(SLJIT_NOT_ZERO, label);
7111
7112
  JUMPHERE(jump);
7113
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7114
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7115
  }
7116
else
7117
  {
7118
  label = LABEL();
7119
  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7120
  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7121
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7122
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7123
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7124
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7125
  JUMPTO(SLJIT_NOT_ZERO, label);
7126
7127
  JUMPHERE(jump);
7128
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7129
  }
7130
7131
if (char1_reg == STR_END)
7132
  {
7133
  OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7134
  OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7135
  }
7136
7137
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7138
}
7139
7140
static void do_caselesscmp(compiler_common *common)
7141
{
7142
DEFINE_COMPILER;
7143
struct sljit_jump *jump;
7144
struct sljit_label *label;
7145
int char1_reg = STR_END;
7146
int char2_reg;
7147
int lcc_table;
7148
int opt_type = 0;
7149
7150
if (HAS_VIRTUAL_REGISTERS)
7151
  {
7152
  char2_reg = STACK_TOP;
7153
  lcc_table = STACK_LIMIT;
7154
  }
7155
else
7156
  {
7157
  char2_reg = RETURN_ADDR;
7158
  lcc_table = TMP3;
7159
  }
7160
7161
if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7162
  opt_type = 1;
7163
else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7164
  opt_type = 2;
7165
7166
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7167
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7168
7169
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7170
7171
if (char2_reg == STACK_TOP)
7172
  {
7173
  OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7174
  OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7175
  }
7176
7177
OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7178
7179
if (opt_type == 1)
7180
  {
7181
  label = LABEL();
7182
  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7183
  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7184
  }
7185
else if (opt_type == 2)
7186
  {
7187
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7188
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7189
7190
  label = LABEL();
7191
  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7192
  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7193
  }
7194
else
7195
  {
7196
  label = LABEL();
7197
  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7198
  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7199
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7200
  }
7201
7202
#if PCRE2_CODE_UNIT_WIDTH != 8
7203
jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7204
#endif
7205
OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7206
#if PCRE2_CODE_UNIT_WIDTH != 8
7207
JUMPHERE(jump);
7208
jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7209
#endif
7210
OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7211
#if PCRE2_CODE_UNIT_WIDTH != 8
7212
JUMPHERE(jump);
7213
#endif
7214
7215
if (opt_type == 0)
7216
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7217
7218
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7219
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7220
JUMPTO(SLJIT_NOT_ZERO, label);
7221
7222
JUMPHERE(jump);
7223
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7224
7225
if (opt_type == 2)
7226
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7227
7228
if (char2_reg == STACK_TOP)
7229
  {
7230
  OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7231
  OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7232
  }
7233
7234
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7235
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7236
}
7237
7238
static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7239
    compare_context *context, jump_list **backtracks)
7240
{
7241
DEFINE_COMPILER;
7242
unsigned int othercasebit = 0;
7243
PCRE2_SPTR othercasechar = NULL;
7244
#ifdef SUPPORT_UNICODE
7245
int utflength;
7246
#endif
7247
7248
if (caseless && char_has_othercase(common, cc))
7249
  {
7250
  othercasebit = char_get_othercase_bit(common, cc);
7251
  SLJIT_ASSERT(othercasebit);
7252
  /* Extracting bit difference info. */
7253
#if PCRE2_CODE_UNIT_WIDTH == 8
7254
  othercasechar = cc + (othercasebit >> 8);
7255
  othercasebit &= 0xff;
7256
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7257
  /* Note that this code only handles characters in the BMP. If there
7258
  ever are characters outside the BMP whose othercase differs in only one
7259
  bit from itself (there currently are none), this code will need to be
7260
  revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7261
  othercasechar = cc + (othercasebit >> 9);
7262
  if ((othercasebit & 0x100) != 0)
7263
    othercasebit = (othercasebit & 0xff) << 8;
7264
  else
7265
    othercasebit &= 0xff;
7266
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7267
  }
7268
7269
if (context->sourcereg == -1)
7270
  {
7271
#if PCRE2_CODE_UNIT_WIDTH == 8
7272
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7273
  if (context->length >= 4)
7274
    OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7275
  else if (context->length >= 2)
7276
    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7277
  else
7278
#endif
7279
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7280
#elif PCRE2_CODE_UNIT_WIDTH == 16
7281
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7282
  if (context->length >= 4)
7283
    OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7284
  else
7285
#endif
7286
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7287
#elif PCRE2_CODE_UNIT_WIDTH == 32
7288
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7289
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7290
  context->sourcereg = TMP2;
7291
  }
7292
7293
#ifdef SUPPORT_UNICODE
7294
utflength = 1;
7295
if (common->utf && HAS_EXTRALEN(*cc))
7296
  utflength += GET_EXTRALEN(*cc);
7297
7298
do
7299
  {
7300
#endif
7301
7302
  context->length -= IN_UCHARS(1);
7303
#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7304
7305
  /* Unaligned read is supported. */
7306
  if (othercasebit != 0 && othercasechar == cc)
7307
    {
7308
    context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7309
    context->oc.asuchars[context->ucharptr] = othercasebit;
7310
    }
7311
  else
7312
    {
7313
    context->c.asuchars[context->ucharptr] = *cc;
7314
    context->oc.asuchars[context->ucharptr] = 0;
7315
    }
7316
  context->ucharptr++;
7317
7318
#if PCRE2_CODE_UNIT_WIDTH == 8
7319
  if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7320
#else
7321
  if (context->ucharptr >= 2 || context->length == 0)
7322
#endif
7323
    {
7324
    if (context->length >= 4)
7325
      OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7326
    else if (context->length >= 2)
7327
      OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7328
#if PCRE2_CODE_UNIT_WIDTH == 8
7329
    else if (context->length >= 1)
7330
      OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7331
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7332
    context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7333
7334
    switch(context->ucharptr)
7335
      {
7336
      case 4 / sizeof(PCRE2_UCHAR):
7337
      if (context->oc.asint != 0)
7338
        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7339
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7340
      break;
7341
7342
      case 2 / sizeof(PCRE2_UCHAR):
7343
      if (context->oc.asushort != 0)
7344
        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7345
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7346
      break;
7347
7348
#if PCRE2_CODE_UNIT_WIDTH == 8
7349
      case 1:
7350
      if (context->oc.asbyte != 0)
7351
        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7352
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7353
      break;
7354
#endif
7355
7356
      default:
7357
      SLJIT_UNREACHABLE();
7358
      break;
7359
      }
7360
    context->ucharptr = 0;
7361
    }
7362
7363
#else
7364
7365
  /* Unaligned read is unsupported or in 32 bit mode. */
7366
  if (context->length >= 1)
7367
    OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7368
7369
  context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7370
7371
  if (othercasebit != 0 && othercasechar == cc)
7372
    {
7373
    OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7374
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7375
    }
7376
  else
7377
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7378
7379
#endif
7380
7381
  cc++;
7382
#ifdef SUPPORT_UNICODE
7383
  utflength--;
7384
  }
7385
while (utflength > 0);
7386
#endif
7387
7388
return cc;
7389
}
7390
7391
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7392
7393
#define SET_TYPE_OFFSET(value) \
7394
  if ((value) != typeoffset) \
7395
    { \
7396
    if ((value) < typeoffset) \
7397
      OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7398
    else \
7399
      OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7400
    } \
7401
  typeoffset = (value);
7402
7403
#define SET_CHAR_OFFSET(value) \
7404
  if ((value) != charoffset) \
7405
    { \
7406
    if ((value) < charoffset) \
7407
      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7408
    else \
7409
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7410
    } \
7411
  charoffset = (value);
7412
7413
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7414
7415
static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7416
{
7417
DEFINE_COMPILER;
7418
jump_list *found = NULL;
7419
jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7420
sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7421
struct sljit_jump *jump = NULL;
7422
PCRE2_SPTR ccbegin;
7423
int compares, invertcmp, numberofcmps;
7424
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7425
BOOL utf = common->utf;
7426
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7427
7428
#ifdef SUPPORT_UNICODE
7429
BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
7430
BOOL charsaved = FALSE;
7431
int typereg = TMP1;
7432
const sljit_u32 *other_cases;
7433
sljit_uw typeoffset;
7434
#endif /* SUPPORT_UNICODE */
7435
7436
/* Scanning the necessary info. */
7437
cc++;
7438
ccbegin = cc;
7439
compares = 0;
7440
7441
if (cc[-1] & XCL_MAP)
7442
  {
7443
  min = 0;
7444
  cc += 32 / sizeof(PCRE2_UCHAR);
7445
  }
7446
7447
while (*cc != XCL_END)
7448
  {
7449
  compares++;
7450
  if (*cc == XCL_SINGLE)
7451
    {
7452
    cc ++;
7453
    GETCHARINCTEST(c, cc);
7454
    if (c > max) max = c;
7455
    if (c < min) min = c;
7456
#ifdef SUPPORT_UNICODE
7457
    needschar = TRUE;
7458
#endif /* SUPPORT_UNICODE */
7459
    }
7460
  else if (*cc == XCL_RANGE)
7461
    {
7462
    cc ++;
7463
    GETCHARINCTEST(c, cc);
7464
    if (c < min) min = c;
7465
    GETCHARINCTEST(c, cc);
7466
    if (c > max) max = c;
7467
#ifdef SUPPORT_UNICODE
7468
    needschar = TRUE;
7469
#endif /* SUPPORT_UNICODE */
7470
    }
7471
#ifdef SUPPORT_UNICODE
7472
  else
7473
    {
7474
    SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7475
    cc++;
7476
    if (*cc == PT_CLIST)
7477
      {
7478
      other_cases = PRIV(ucd_caseless_sets) + cc[1];
7479
      while (*other_cases != NOTACHAR)
7480
        {
7481
        if (*other_cases > max) max = *other_cases;
7482
        if (*other_cases < min) min = *other_cases;
7483
        other_cases++;
7484
        }
7485
      }
7486
    else
7487
      {
7488
      max = READ_CHAR_MAX;
7489
      min = 0;
7490
      }
7491
7492
    switch(*cc)
7493
      {
7494
      case PT_ANY:
7495
      /* Any either accepts everything or ignored. */
7496
      if (cc[-1] == XCL_PROP)
7497
        {
7498
        compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7499
        if (list == backtracks)
7500
          add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7501
        return;
7502
        }
7503
      break;
7504
7505
      case PT_LAMP:
7506
      case PT_GC:
7507
      case PT_PC:
7508
      case PT_ALNUM:
7509
      needstype = TRUE;
7510
      break;
7511
7512
      case PT_SC:
7513
      needsscript = TRUE;
7514
      break;
7515
7516
      case PT_SPACE:
7517
      case PT_PXSPACE:
7518
      case PT_WORD:
7519
      case PT_PXGRAPH:
7520
      case PT_PXPRINT:
7521
      case PT_PXPUNCT:
7522
      needstype = TRUE;
7523
      needschar = TRUE;
7524
      break;
7525
7526
      case PT_CLIST:
7527
      case PT_UCNC:
7528
      needschar = TRUE;
7529
      break;
7530
7531
      default:
7532
      SLJIT_UNREACHABLE();
7533
      break;
7534
      }
7535
    cc += 2;
7536
    }
7537
#endif /* SUPPORT_UNICODE */
7538
  }
7539
SLJIT_ASSERT(compares > 0);
7540
7541
/* We are not necessary in utf mode even in 8 bit mode. */
7542
cc = ccbegin;
7543
if ((cc[-1] & XCL_NOT) != 0)
7544
  read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7545
else
7546
  {
7547
#ifdef SUPPORT_UNICODE
7548
  read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0);
7549
#else /* !SUPPORT_UNICODE */
7550
  read_char(common, min, max, NULL, 0);
7551
#endif /* SUPPORT_UNICODE */
7552
  }
7553
7554
if ((cc[-1] & XCL_HASPROP) == 0)
7555
  {
7556
  if ((cc[-1] & XCL_MAP) != 0)
7557
    {
7558
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7559
    if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7560
      {
7561
      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7562
      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7563
      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7564
      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7565
      OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7566
      add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7567
      }
7568
7569
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7570
    JUMPHERE(jump);
7571
7572
    cc += 32 / sizeof(PCRE2_UCHAR);
7573
    }
7574
  else
7575
    {
7576
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7577
    add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7578
    }
7579
  }
7580
else if ((cc[-1] & XCL_MAP) != 0)
7581
  {
7582
  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7583
#ifdef SUPPORT_UNICODE
7584
  charsaved = TRUE;
7585
#endif /* SUPPORT_UNICODE */
7586
  if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7587
    {
7588
#if PCRE2_CODE_UNIT_WIDTH == 8
7589
    jump = NULL;
7590
    if (common->utf)
7591
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7592
      jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7593
7594
    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7595
    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7596
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7597
    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7598
    OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7599
    add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7600
7601
#if PCRE2_CODE_UNIT_WIDTH == 8
7602
    if (common->utf)
7603
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7604
      JUMPHERE(jump);
7605
    }
7606
7607
  OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7608
  cc += 32 / sizeof(PCRE2_UCHAR);
7609
  }
7610
7611
#ifdef SUPPORT_UNICODE
7612
if (needstype || needsscript)
7613
  {
7614
  if (needschar && !charsaved)
7615
    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7616
7617
#if PCRE2_CODE_UNIT_WIDTH == 32
7618
  if (!common->utf)
7619
    {
7620
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7621
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7622
    JUMPHERE(jump);
7623
    }
7624
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7625
7626
  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7627
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7628
  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7629
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7630
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7631
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7632
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7633
  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7634
7635
  /* Before anything else, we deal with scripts. */
7636
  if (needsscript)
7637
    {
7638
    OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7639
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7640
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7641
7642
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7643
7644
    ccbegin = cc;
7645
7646
    while (*cc != XCL_END)
7647
      {
7648
      if (*cc == XCL_SINGLE)
7649
        {
7650
        cc ++;
7651
        GETCHARINCTEST(c, cc);
7652
        }
7653
      else if (*cc == XCL_RANGE)
7654
        {
7655
        cc ++;
7656
        GETCHARINCTEST(c, cc);
7657
        GETCHARINCTEST(c, cc);
7658
        }
7659
      else
7660
        {
7661
        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7662
        cc++;
7663
        if (*cc == PT_SC)
7664
          {
7665
          compares--;
7666
          invertcmp = (compares == 0 && list != backtracks);
7667
          if (cc[-1] == XCL_NOTPROP)
7668
            invertcmp ^= 0x1;
7669
          jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7670
          add_jump(compiler, compares > 0 ? list : backtracks, jump);
7671
          }
7672
        cc += 2;
7673
        }
7674
      }
7675
7676
    cc = ccbegin;
7677
7678
    if (needstype)
7679
      {
7680
      /* TMP2 has already been shifted by 2 */
7681
      if (!needschar)
7682
        {
7683
        OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7684
        OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7685
7686
        OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7687
        }
7688
      else
7689
        {
7690
        OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7691
        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7692
7693
        OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7694
        OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7695
        typereg = RETURN_ADDR;
7696
        }
7697
      }
7698
    else if (needschar)
7699
      OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7700
    }
7701
  else if (needstype)
7702
    {
7703
    OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7704
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7705
7706
    if (!needschar)
7707
      {
7708
      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7709
7710
      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7711
      }
7712
    else
7713
      {
7714
      OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7715
7716
      OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7717
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7718
      typereg = RETURN_ADDR;
7719
      }
7720
    }
7721
  else if (needschar)
7722
    OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7723
  }
7724
#endif /* SUPPORT_UNICODE */
7725
7726
/* Generating code. */
7727
charoffset = 0;
7728
numberofcmps = 0;
7729
#ifdef SUPPORT_UNICODE
7730
typeoffset = 0;
7731
#endif /* SUPPORT_UNICODE */
7732
7733
while (*cc != XCL_END)
7734
  {
7735
  compares--;
7736
  invertcmp = (compares == 0 && list != backtracks);
7737
  jump = NULL;
7738
7739
  if (*cc == XCL_SINGLE)
7740
    {
7741
    cc ++;
7742
    GETCHARINCTEST(c, cc);
7743
7744
    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7745
      {
7746
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7747
      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7748
      numberofcmps++;
7749
      }
7750
    else if (numberofcmps > 0)
7751
      {
7752
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7753
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7754
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7755
      numberofcmps = 0;
7756
      }
7757
    else
7758
      {
7759
      jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7760
      numberofcmps = 0;
7761
      }
7762
    }
7763
  else if (*cc == XCL_RANGE)
7764
    {
7765
    cc ++;
7766
    GETCHARINCTEST(c, cc);
7767
    SET_CHAR_OFFSET(c);
7768
    GETCHARINCTEST(c, cc);
7769
7770
    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7771
      {
7772
      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7773
      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7774
      numberofcmps++;
7775
      }
7776
    else if (numberofcmps > 0)
7777
      {
7778
      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7779
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7780
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7781
      numberofcmps = 0;
7782
      }
7783
    else
7784
      {
7785
      jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7786
      numberofcmps = 0;
7787
      }
7788
    }
7789
#ifdef SUPPORT_UNICODE
7790
  else
7791
    {
7792
    SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7793
    if (*cc == XCL_NOTPROP)
7794
      invertcmp ^= 0x1;
7795
    cc++;
7796
    switch(*cc)
7797
      {
7798
      case PT_ANY:
7799
      if (!invertcmp)
7800
        jump = JUMP(SLJIT_JUMP);
7801
      break;
7802
7803
      case PT_LAMP:
7804
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
7805
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7806
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
7807
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7808
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
7809
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7810
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7811
      break;
7812
7813
      case PT_GC:
7814
      c = PRIV(ucp_typerange)[(int)cc[1] * 2];
7815
      SET_TYPE_OFFSET(c);
7816
      jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
7817
      break;
7818
7819
      case PT_PC:
7820
      jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
7821
      break;
7822
7823
      case PT_SC:
7824
      compares++;
7825
      /* Do nothing. */
7826
      break;
7827
7828
      case PT_SPACE:
7829
      case PT_PXSPACE:
7830
      SET_CHAR_OFFSET(9);
7831
      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
7832
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7833
7834
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
7835
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7836
7837
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
7838
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7839
7840
      SET_TYPE_OFFSET(ucp_Zl);
7841
      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
7842
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7843
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7844
      break;
7845
7846
      case PT_WORD:
7847
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
7848
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7849
      /* Fall through. */
7850
7851
      case PT_ALNUM:
7852
      SET_TYPE_OFFSET(ucp_Ll);
7853
      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
7854
      OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7855
      SET_TYPE_OFFSET(ucp_Nd);
7856
      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
7857
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7858
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7859
      break;
7860
7861
      case PT_CLIST:
7862
      other_cases = PRIV(ucd_caseless_sets) + cc[1];
7863
7864
      /* At least three characters are required.
7865
         Otherwise this case would be handled by the normal code path. */
7866
      SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
7867
      SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
7868
7869
      /* Optimizing character pairs, if their difference is power of 2. */
7870
      if (is_powerof2(other_cases[1] ^ other_cases[0]))
7871
        {
7872
        if (charoffset == 0)
7873
          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7874
        else
7875
          {
7876
          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7877
          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7878
          }
7879
        OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
7880
        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7881
        other_cases += 2;
7882
        }
7883
      else if (is_powerof2(other_cases[2] ^ other_cases[1]))
7884
        {
7885
        if (charoffset == 0)
7886
          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
7887
        else
7888
          {
7889
          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7890
          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7891
          }
7892
        OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
7893
        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7894
7895
        OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
7896
        OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7897
7898
        other_cases += 3;
7899
        }
7900
      else
7901
        {
7902
        OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7903
        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7904
        }
7905
7906
      while (*other_cases != NOTACHAR)
7907
        {
7908
        OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7909
        OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7910
        }
7911
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7912
      break;
7913
7914
      case PT_UCNC:
7915
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
7916
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7917
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
7918
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7919
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
7920
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7921
7922
      SET_CHAR_OFFSET(0xa0);
7923
      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
7924
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7925
      SET_CHAR_OFFSET(0);
7926
      OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
7927
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
7928
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7929
      break;
7930
7931
      case PT_PXGRAPH:
7932
      /* C and Z groups are the farthest two groups. */
7933
      SET_TYPE_OFFSET(ucp_Ll);
7934
      OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7935
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7936
7937
      jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7938
7939
      /* In case of ucp_Cf, we overwrite the result. */
7940
      SET_CHAR_OFFSET(0x2066);
7941
      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7942
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7943
7944
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7945
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7946
7947
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
7948
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7949
7950
      JUMPHERE(jump);
7951
      jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7952
      break;
7953
7954
      case PT_PXPRINT:
7955
      /* C and Z groups are the farthest two groups. */
7956
      SET_TYPE_OFFSET(ucp_Ll);
7957
      OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7958
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7959
7960
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
7961
      OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
7962
7963
      jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7964
7965
      /* In case of ucp_Cf, we overwrite the result. */
7966
      SET_CHAR_OFFSET(0x2066);
7967
      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7968
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7969
7970
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7971
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7972
7973
      JUMPHERE(jump);
7974
      jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7975
      break;
7976
7977
      case PT_PXPUNCT:
7978
      SET_TYPE_OFFSET(ucp_Sc);
7979
      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
7980
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7981
7982
      SET_CHAR_OFFSET(0);
7983
      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
7984
      OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
7985
7986
      SET_TYPE_OFFSET(ucp_Pc);
7987
      OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
7988
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7989
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7990
      break;
7991
7992
      default:
7993
      SLJIT_UNREACHABLE();
7994
      break;
7995
      }
7996
    cc += 2;
7997
    }
7998
#endif /* SUPPORT_UNICODE */
7999
8000
  if (jump != NULL)
8001
    add_jump(compiler, compares > 0 ? list : backtracks, jump);
8002
  }
8003
8004
if (found != NULL)
8005
  set_jumps(found, LABEL());
8006
}
8007
8008
#undef SET_TYPE_OFFSET
8009
#undef SET_CHAR_OFFSET
8010
8011
#endif
8012
8013
static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8014
{
8015
DEFINE_COMPILER;
8016
int length;
8017
struct sljit_jump *jump[4];
8018
#ifdef SUPPORT_UNICODE
8019
struct sljit_label *label;
8020
#endif /* SUPPORT_UNICODE */
8021
8022
switch(type)
8023
  {
8024
  case OP_SOD:
8025
  if (HAS_VIRTUAL_REGISTERS)
8026
    {
8027
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8028
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8029
    }
8030
  else
8031
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8032
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8033
  return cc;
8034
8035
  case OP_SOM:
8036
  if (HAS_VIRTUAL_REGISTERS)
8037
    {
8038
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8039
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8040
    }
8041
  else
8042
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8043
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8044
  return cc;
8045
8046
  case OP_NOT_WORD_BOUNDARY:
8047
  case OP_WORD_BOUNDARY:
8048
  add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8049
#ifdef SUPPORT_UNICODE
8050
  if (common->invalid_utf)
8051
    {
8052
    add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8053
    return cc;
8054
    }
8055
#endif /* SUPPORT_UNICODE */
8056
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
8057
  add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8058
  return cc;
8059
8060
  case OP_EODN:
8061
  /* Requires rather complex checks. */
8062
  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8063
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8064
    {
8065
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8066
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8067
    if (common->mode == PCRE2_JIT_COMPLETE)
8068
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8069
    else
8070
      {
8071
      jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8072
      OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8073
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8074
      OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8075
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8076
      add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8077
      check_partial(common, TRUE);
8078
      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8079
      JUMPHERE(jump[1]);
8080
      }
8081
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8082
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8083
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8084
    }
8085
  else if (common->nltype == NLTYPE_FIXED)
8086
    {
8087
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8088
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8089
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8090
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8091
    }
8092
  else
8093
    {
8094
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8095
    jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8096
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8097
    OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8098
    jump[2] = JUMP(SLJIT_GREATER);
8099
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8100
    /* Equal. */
8101
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8102
    jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8103
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8104
8105
    JUMPHERE(jump[1]);
8106
    if (common->nltype == NLTYPE_ANYCRLF)
8107
      {
8108
      OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8109
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8110
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8111
      }
8112
    else
8113
      {
8114
      OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8115
      read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8116
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8117
      add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8118
      sljit_set_current_flags(compiler, SLJIT_SET_Z);
8119
      add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8120
      OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8121
      }
8122
    JUMPHERE(jump[2]);
8123
    JUMPHERE(jump[3]);
8124
    }
8125
  JUMPHERE(jump[0]);
8126
  if (common->mode != PCRE2_JIT_COMPLETE)
8127
    check_partial(common, TRUE);
8128
  return cc;
8129
8130
  case OP_EOD:
8131
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8132
  if (common->mode != PCRE2_JIT_COMPLETE)
8133
    check_partial(common, TRUE);
8134
  return cc;
8135
8136
  case OP_DOLL:
8137
  if (HAS_VIRTUAL_REGISTERS)
8138
    {
8139
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8140
    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8141
    }
8142
  else
8143
    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8144
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8145
8146
  if (!common->endonly)
8147
    compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8148
  else
8149
    {
8150
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8151
    check_partial(common, FALSE);
8152
    }
8153
  return cc;
8154
8155
  case OP_DOLLM:
8156
  jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8157
  if (HAS_VIRTUAL_REGISTERS)
8158
    {
8159
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8160
    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8161
    }
8162
  else
8163
    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8164
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8165
  check_partial(common, FALSE);
8166
  jump[0] = JUMP(SLJIT_JUMP);
8167
  JUMPHERE(jump[1]);
8168
8169
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8170
    {
8171
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8172
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8173
    if (common->mode == PCRE2_JIT_COMPLETE)
8174
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8175
    else
8176
      {
8177
      jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8178
      /* STR_PTR = STR_END - IN_UCHARS(1) */
8179
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8180
      check_partial(common, TRUE);
8181
      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8182
      JUMPHERE(jump[1]);
8183
      }
8184
8185
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8186
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8187
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8188
    }
8189
  else
8190
    {
8191
    peek_char(common, common->nlmax, TMP3, 0, NULL);
8192
    check_newlinechar(common, common->nltype, backtracks, FALSE);
8193
    }
8194
  JUMPHERE(jump[0]);
8195
  return cc;
8196
8197
  case OP_CIRC:
8198
  if (HAS_VIRTUAL_REGISTERS)
8199
    {
8200
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8201
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8202
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8203
    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8204
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8205
    }
8206
  else
8207
    {
8208
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8209
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8210
    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8211
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8212
    }
8213
  return cc;
8214
8215
  case OP_CIRCM:
8216
  /* TMP2 might be used by peek_char_back. */
8217
  if (HAS_VIRTUAL_REGISTERS)
8218
    {
8219
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8220
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8221
    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8222
    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8223
    }
8224
  else
8225
    {
8226
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8227
    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8228
    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8229
    }
8230
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8231
  jump[0] = JUMP(SLJIT_JUMP);
8232
  JUMPHERE(jump[1]);
8233
8234
  if (!common->alt_circumflex)
8235
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8236
8237
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8238
    {
8239
    OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8240
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8241
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8242
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8243
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8244
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8245
    }
8246
  else
8247
    {
8248
    peek_char_back(common, common->nlmax, backtracks);
8249
    check_newlinechar(common, common->nltype, backtracks, FALSE);
8250
    }
8251
  JUMPHERE(jump[0]);
8252
  return cc;
8253
8254
  case OP_REVERSE:
8255
  length = GET(cc, 0);
8256
  if (length == 0)
8257
    return cc + LINK_SIZE;
8258
  if (HAS_VIRTUAL_REGISTERS)
8259
    {
8260
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8261
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8262
    }
8263
  else
8264
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8265
#ifdef SUPPORT_UNICODE
8266
  if (common->utf)
8267
    {
8268
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8269
    label = LABEL();
8270
    add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8271
    move_back(common, backtracks, FALSE);
8272
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8273
    JUMPTO(SLJIT_NOT_ZERO, label);
8274
    }
8275
  else
8276
#endif
8277
    {
8278
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8279
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8280
    }
8281
  check_start_used_ptr(common);
8282
  return cc + LINK_SIZE;
8283
  }
8284
SLJIT_UNREACHABLE();
8285
return cc;
8286
}
8287
8288
#ifdef SUPPORT_UNICODE
8289
8290
#if PCRE2_CODE_UNIT_WIDTH != 32
8291
8292
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8293
{
8294
PCRE2_SPTR start_subject = args->begin;
8295
PCRE2_SPTR end_subject = args->end;
8296
int lgb, rgb, ricount;
8297
PCRE2_SPTR prevcc, endcc, bptr;
8298
BOOL first = TRUE;
8299
uint32_t c;
8300
8301
prevcc = cc;
8302
endcc = NULL;
8303
do
8304
  {
8305
  GETCHARINC(c, cc);
8306
  rgb = UCD_GRAPHBREAK(c);
8307
8308
  if (first)
8309
    {
8310
    lgb = rgb;
8311
    endcc = cc;
8312
    first = FALSE;
8313
    continue;
8314
    }
8315
8316
  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8317
    break;
8318
8319
  /* Not breaking between Regional Indicators is allowed only if there
8320
  are an even number of preceding RIs. */
8321
8322
  if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8323
    {
8324
    ricount = 0;
8325
    bptr = prevcc;
8326
8327
    /* bptr is pointing to the left-hand character */
8328
    while (bptr > start_subject)
8329
      {
8330
      bptr--;
8331
      BACKCHAR(bptr);
8332
      GETCHAR(c, bptr);
8333
8334
      if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8335
        break;
8336
8337
      ricount++;
8338
      }
8339
8340
    if ((ricount & 1) != 0) break;  /* Grapheme break required */
8341
    }
8342
8343
  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8344
  allows any number of them before a following Extended_Pictographic. */
8345
8346
  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8347
       lgb != ucp_gbExtended_Pictographic)
8348
    lgb = rgb;
8349
8350
  prevcc = endcc;
8351
  endcc = cc;
8352
  }
8353
while (cc < end_subject);
8354
8355
return endcc;
8356
}
8357
8358
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8359
8360
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8361
{
8362
PCRE2_SPTR start_subject = args->begin;
8363
PCRE2_SPTR end_subject = args->end;
8364
int lgb, rgb, ricount;
8365
PCRE2_SPTR prevcc, endcc, bptr;
8366
BOOL first = TRUE;
8367
uint32_t c;
8368
8369
prevcc = cc;
8370
endcc = NULL;
8371
do
8372
  {
8373
  GETCHARINC_INVALID(c, cc, end_subject, break);
8374
  rgb = UCD_GRAPHBREAK(c);
8375
8376
  if (first)
8377
    {
8378
    lgb = rgb;
8379
    endcc = cc;
8380
    first = FALSE;
8381
    continue;
8382
    }
8383
8384
  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8385
    break;
8386
8387
  /* Not breaking between Regional Indicators is allowed only if there
8388
  are an even number of preceding RIs. */
8389
8390
  if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8391
    {
8392
    ricount = 0;
8393
    bptr = prevcc;
8394
8395
    /* bptr is pointing to the left-hand character */
8396
    while (bptr > start_subject)
8397
      {
8398
      GETCHARBACK_INVALID(c, bptr, start_subject, break);
8399
8400
      if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8401
        break;
8402
8403
      ricount++;
8404
      }
8405
8406
    if ((ricount & 1) != 0)
8407
      break;  /* Grapheme break required */
8408
    }
8409
8410
  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8411
  allows any number of them before a following Extended_Pictographic. */
8412
8413
  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8414
       lgb != ucp_gbExtended_Pictographic)
8415
    lgb = rgb;
8416
8417
  prevcc = endcc;
8418
  endcc = cc;
8419
  }
8420
while (cc < end_subject);
8421
8422
return endcc;
8423
}
8424
8425
static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8426
{
8427
PCRE2_SPTR start_subject = args->begin;
8428
PCRE2_SPTR end_subject = args->end;
8429
int lgb, rgb, ricount;
8430
PCRE2_SPTR bptr;
8431
uint32_t c;
8432
8433
/* Patch by PH */
8434
/* GETCHARINC(c, cc); */
8435
c = *cc++;
8436
8437
#if PCRE2_CODE_UNIT_WIDTH == 32
8438
if (c >= 0x110000)
8439
  return NULL;
8440
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8441
lgb = UCD_GRAPHBREAK(c);
8442
8443
while (cc < end_subject)
8444
  {
8445
  c = *cc;
8446
#if PCRE2_CODE_UNIT_WIDTH == 32
8447
  if (c >= 0x110000)
8448
    break;
8449
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8450
  rgb = UCD_GRAPHBREAK(c);
8451
8452
  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8453
    break;
8454
8455
  /* Not breaking between Regional Indicators is allowed only if there
8456
  are an even number of preceding RIs. */
8457
8458
  if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8459
    {
8460
    ricount = 0;
8461
    bptr = cc - 1;
8462
8463
    /* bptr is pointing to the left-hand character */
8464
    while (bptr > start_subject)
8465
      {
8466
      bptr--;
8467
      c = *bptr;
8468
#if PCRE2_CODE_UNIT_WIDTH == 32
8469
      if (c >= 0x110000)
8470
        break;
8471
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8472
8473
      if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
8474
8475
      ricount++;
8476
      }
8477
8478
    if ((ricount & 1) != 0)
8479
      break;  /* Grapheme break required */
8480
    }
8481
8482
  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8483
  allows any number of them before a following Extended_Pictographic. */
8484
8485
  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8486
       lgb != ucp_gbExtended_Pictographic)
8487
    lgb = rgb;
8488
8489
  cc++;
8490
  }
8491
8492
return cc;
8493
}
8494
8495
#endif /* SUPPORT_UNICODE */
8496
8497
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8498
{
8499
DEFINE_COMPILER;
8500
int length;
8501
unsigned int c, oc, bit;
8502
compare_context context;
8503
struct sljit_jump *jump[3];
8504
jump_list *end_list;
8505
#ifdef SUPPORT_UNICODE
8506
PCRE2_UCHAR propdata[5];
8507
#endif /* SUPPORT_UNICODE */
8508
8509
switch(type)
8510
  {
8511
  case OP_NOT_DIGIT:
8512
  case OP_DIGIT:
8513
  /* Digits are usually 0-9, so it is worth to optimize them. */
8514
  if (check_str_ptr)
8515
    detect_partial_match(common, backtracks);
8516
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8517
  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8518
    read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8519
  else
8520
#endif
8521
    read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8522
    /* Flip the starting bit in the negative case. */
8523
  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
8524
  add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8525
  return cc;
8526
8527
  case OP_NOT_WHITESPACE:
8528
  case OP_WHITESPACE:
8529
  if (check_str_ptr)
8530
    detect_partial_match(common, backtracks);
8531
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8532
  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8533
    read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8534
  else
8535
#endif
8536
    read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8537
  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
8538
  add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8539
  return cc;
8540
8541
  case OP_NOT_WORDCHAR:
8542
  case OP_WORDCHAR:
8543
  if (check_str_ptr)
8544
    detect_partial_match(common, backtracks);
8545
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8546
  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8547
    read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8548
  else
8549
#endif
8550
    read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8551
  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
8552
  add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8553
  return cc;
8554
8555
  case OP_ANY:
8556
  if (check_str_ptr)
8557
    detect_partial_match(common, backtracks);
8558
  read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8559
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8560
    {
8561
    jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8562
    end_list = NULL;
8563
    if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8564
      add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8565
    else
8566
      check_str_end(common, &end_list);
8567
8568
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8569
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8570
    set_jumps(end_list, LABEL());
8571
    JUMPHERE(jump[0]);
8572
    }
8573
  else
8574
    check_newlinechar(common, common->nltype, backtracks, TRUE);
8575
  return cc;
8576
8577
  case OP_ALLANY:
8578
  if (check_str_ptr)
8579
    detect_partial_match(common, backtracks);
8580
#ifdef SUPPORT_UNICODE
8581
  if (common->utf)
8582
    {
8583
    if (common->invalid_utf)
8584
      {
8585
      read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8586
      return cc;
8587
      }
8588
8589
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8590
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8591
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8592
#if PCRE2_CODE_UNIT_WIDTH == 8
8593
    jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8594
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8595
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8596
#elif PCRE2_CODE_UNIT_WIDTH == 16
8597
    jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8598
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8599
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
8600
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8601
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8602
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8603
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8604
    JUMPHERE(jump[0]);
8605
    return cc;
8606
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8607
    }
8608
#endif /* SUPPORT_UNICODE */
8609
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8610
  return cc;
8611
8612
  case OP_ANYBYTE:
8613
  if (check_str_ptr)
8614
    detect_partial_match(common, backtracks);
8615
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8616
  return cc;
8617
8618
#ifdef SUPPORT_UNICODE
8619
  case OP_NOTPROP:
8620
  case OP_PROP:
8621
  propdata[0] = XCL_HASPROP;
8622
  propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8623
  propdata[2] = cc[0];
8624
  propdata[3] = cc[1];
8625
  propdata[4] = XCL_END;
8626
  if (check_str_ptr)
8627
    detect_partial_match(common, backtracks);
8628
  compile_xclass_matchingpath(common, propdata, backtracks);
8629
  return cc + 2;
8630
#endif
8631
8632
  case OP_ANYNL:
8633
  if (check_str_ptr)
8634
    detect_partial_match(common, backtracks);
8635
  read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8636
  jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8637
  /* We don't need to handle soft partial matching case. */
8638
  end_list = NULL;
8639
  if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8640
    add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8641
  else
8642
    check_str_end(common, &end_list);
8643
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8644
  jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8645
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8646
  jump[2] = JUMP(SLJIT_JUMP);
8647
  JUMPHERE(jump[0]);
8648
  check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8649
  set_jumps(end_list, LABEL());
8650
  JUMPHERE(jump[1]);
8651
  JUMPHERE(jump[2]);
8652
  return cc;
8653
8654
  case OP_NOT_HSPACE:
8655
  case OP_HSPACE:
8656
  if (check_str_ptr)
8657
    detect_partial_match(common, backtracks);
8658
8659
  if (type == OP_NOT_HSPACE)
8660
    read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8661
  else
8662
    read_char(common, 0x9, 0x3000, NULL, 0);
8663
8664
  add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8665
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
8666
  add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8667
  return cc;
8668
8669
  case OP_NOT_VSPACE:
8670
  case OP_VSPACE:
8671
  if (check_str_ptr)
8672
    detect_partial_match(common, backtracks);
8673
8674
  if (type == OP_NOT_VSPACE)
8675
    read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8676
  else
8677
    read_char(common, 0xa, 0x2029, NULL, 0);
8678
8679
  add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8680
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
8681
  add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8682
  return cc;
8683
8684
#ifdef SUPPORT_UNICODE
8685
  case OP_EXTUNI:
8686
  if (check_str_ptr)
8687
    detect_partial_match(common, backtracks);
8688
8689
  SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8690
  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8691
8692
#if PCRE2_CODE_UNIT_WIDTH != 32
8693
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8694
    common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8695
  if (common->invalid_utf)
8696
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8697
#else
8698
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8699
    common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8700
  if (!common->utf || common->invalid_utf)
8701
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8702
#endif
8703
8704
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8705
8706
  if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8707
    {
8708
    jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8709
    /* Since we successfully read a char above, partial matching must occure. */
8710
    check_partial(common, TRUE);
8711
    JUMPHERE(jump[0]);
8712
    }
8713
  return cc;
8714
#endif
8715
8716
  case OP_CHAR:
8717
  case OP_CHARI:
8718
  length = 1;
8719
#ifdef SUPPORT_UNICODE
8720
  if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8721
#endif
8722
8723
  if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8724
    detect_partial_match(common, backtracks);
8725
8726
  if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8727
    {
8728
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8729
    if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8730
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8731
8732
    context.length = IN_UCHARS(length);
8733
    context.sourcereg = -1;
8734
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8735
    context.ucharptr = 0;
8736
#endif
8737
    return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8738
    }
8739
8740
#ifdef SUPPORT_UNICODE
8741
  if (common->utf)
8742
    {
8743
    GETCHAR(c, cc);
8744
    }
8745
  else
8746
#endif
8747
    c = *cc;
8748
8749
  SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8750
8751
  if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8752
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8753
8754
  oc = char_othercase(common, c);
8755
  read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8756
8757
  SLJIT_ASSERT(!is_powerof2(c ^ oc));
8758
8759
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8760
    {
8761
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
8762
    CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8763
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8764
    }
8765
  else
8766
    {
8767
    jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8768
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8769
    JUMPHERE(jump[0]);
8770
    }
8771
  return cc + length;
8772
8773
  case OP_NOT:
8774
  case OP_NOTI:
8775
  if (check_str_ptr)
8776
    detect_partial_match(common, backtracks);
8777
8778
  length = 1;
8779
#ifdef SUPPORT_UNICODE
8780
  if (common->utf)
8781
    {
8782
#if PCRE2_CODE_UNIT_WIDTH == 8
8783
    c = *cc;
8784
    if (c < 128 && !common->invalid_utf)
8785
      {
8786
      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8787
      if (type == OP_NOT || !char_has_othercase(common, cc))
8788
        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8789
      else
8790
        {
8791
        /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
8792
        OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
8793
        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
8794
        }
8795
      /* Skip the variable-length character. */
8796
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8797
      jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8798
      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8799
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8800
      JUMPHERE(jump[0]);
8801
      return cc + 1;
8802
      }
8803
    else
8804
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8805
      {
8806
      GETCHARLEN(c, cc, length);
8807
      }
8808
    }
8809
  else
8810
#endif /* SUPPORT_UNICODE */
8811
    c = *cc;
8812
8813
  if (type == OP_NOT || !char_has_othercase(common, cc))
8814
    {
8815
    read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
8816
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8817
    }
8818
  else
8819
    {
8820
    oc = char_othercase(common, c);
8821
    read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
8822
    bit = c ^ oc;
8823
    if (is_powerof2(bit))
8824
      {
8825
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
8826
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
8827
      }
8828
    else
8829
      {
8830
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8831
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8832
      }
8833
    }
8834
  return cc + length;
8835
8836
  case OP_CLASS:
8837
  case OP_NCLASS:
8838
  if (check_str_ptr)
8839
    detect_partial_match(common, backtracks);
8840
8841
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8842
  bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
8843
  if (type == OP_NCLASS)
8844
    read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
8845
  else
8846
    read_char(common, 0, bit, NULL, 0);
8847
#else
8848
  if (type == OP_NCLASS)
8849
    read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
8850
  else
8851
    read_char(common, 0, 255, NULL, 0);
8852
#endif
8853
8854
  if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
8855
    return cc + 32 / sizeof(PCRE2_UCHAR);
8856
8857
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8858
  jump[0] = NULL;
8859
  if (common->utf)
8860
    {
8861
    jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
8862
    if (type == OP_CLASS)
8863
      {
8864
      add_jump(compiler, backtracks, jump[0]);
8865
      jump[0] = NULL;
8866
      }
8867
    }
8868
#elif PCRE2_CODE_UNIT_WIDTH != 8
8869
  jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
8870
  if (type == OP_CLASS)
8871
    {
8872
    add_jump(compiler, backtracks, jump[0]);
8873
    jump[0] = NULL;
8874
    }
8875
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
8876
8877
  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
8878
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
8879
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
8880
  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
8881
  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
8882
  add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8883
8884
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
8885
  if (jump[0] != NULL)
8886
    JUMPHERE(jump[0]);
8887
#endif
8888
  return cc + 32 / sizeof(PCRE2_UCHAR);
8889
8890
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
8891
  case OP_XCLASS:
8892
  if (check_str_ptr)
8893
    detect_partial_match(common, backtracks);
8894
  compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
8895
  return cc + GET(cc, 0) - 1;
8896
#endif
8897
  }
8898
SLJIT_UNREACHABLE();
8899
return cc;
8900
}
8901
8902
static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
8903
{
8904
/* This function consumes at least one input character. */
8905
/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
8906
DEFINE_COMPILER;
8907
PCRE2_SPTR ccbegin = cc;
8908
compare_context context;
8909
int size;
8910
8911
context.length = 0;
8912
do
8913
  {
8914
  if (cc >= ccend)
8915
    break;
8916
8917
  if (*cc == OP_CHAR)
8918
    {
8919
    size = 1;
8920
#ifdef SUPPORT_UNICODE
8921
    if (common->utf && HAS_EXTRALEN(cc[1]))
8922
      size += GET_EXTRALEN(cc[1]);
8923
#endif
8924
    }
8925
  else if (*cc == OP_CHARI)
8926
    {
8927
    size = 1;
8928
#ifdef SUPPORT_UNICODE
8929
    if (common->utf)
8930
      {
8931
      if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8932
        size = 0;
8933
      else if (HAS_EXTRALEN(cc[1]))
8934
        size += GET_EXTRALEN(cc[1]);
8935
      }
8936
    else
8937
#endif
8938
    if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8939
      size = 0;
8940
    }
8941
  else
8942
    size = 0;
8943
8944
  cc += 1 + size;
8945
  context.length += IN_UCHARS(size);
8946
  }
8947
while (size > 0 && context.length <= 128);
8948
8949
cc = ccbegin;
8950
if (context.length > 0)
8951
  {
8952
  /* We have a fixed-length byte sequence. */
8953
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
8954
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8955
8956
  context.sourcereg = -1;
8957
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8958
  context.ucharptr = 0;
8959
#endif
8960
  do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
8961
  return cc;
8962
  }
8963
8964
/* A non-fixed length character will be checked if length == 0. */
8965
return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
8966
}
8967
8968
/* Forward definitions. */
8969
static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
8970
static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
8971
8972
#define PUSH_BACKTRACK(size, ccstart, error) \
8973
  do \
8974
    { \
8975
    backtrack = sljit_alloc_memory(compiler, (size)); \
8976
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8977
      return error; \
8978
    memset(backtrack, 0, size); \
8979
    backtrack->prev = parent->top; \
8980
    backtrack->cc = (ccstart); \
8981
    parent->top = backtrack; \
8982
    } \
8983
  while (0)
8984
8985
#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
8986
  do \
8987
    { \
8988
    backtrack = sljit_alloc_memory(compiler, (size)); \
8989
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8990
      return; \
8991
    memset(backtrack, 0, size); \
8992
    backtrack->prev = parent->top; \
8993
    backtrack->cc = (ccstart); \
8994
    parent->top = backtrack; \
8995
    } \
8996
  while (0)
8997
8998
#define BACKTRACK_AS(type) ((type *)backtrack)
8999
9000
static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9001
{
9002
/* The OVECTOR offset goes to TMP2. */
9003
DEFINE_COMPILER;
9004
int count = GET2(cc, 1 + IMM2_SIZE);
9005
PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9006
unsigned int offset;
9007
jump_list *found = NULL;
9008
9009
SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9010
9011
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9012
9013
count--;
9014
while (count-- > 0)
9015
  {
9016
  offset = GET2(slot, 0) << 1;
9017
  GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9018
  add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9019
  slot += common->name_entry_size;
9020
  }
9021
9022
offset = GET2(slot, 0) << 1;
9023
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9024
if (backtracks != NULL && !common->unset_backref)
9025
  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9026
9027
set_jumps(found, LABEL());
9028
}
9029
9030
static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9031
{
9032
DEFINE_COMPILER;
9033
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9034
int offset = 0;
9035
struct sljit_jump *jump = NULL;
9036
struct sljit_jump *partial;
9037
struct sljit_jump *nopartial;
9038
#if defined SUPPORT_UNICODE
9039
struct sljit_label *loop;
9040
struct sljit_label *caseless_loop;
9041
jump_list *no_match = NULL;
9042
int source_reg = COUNT_MATCH;
9043
int source_end_reg = ARGUMENTS;
9044
int char1_reg = STACK_LIMIT;
9045
#endif /* SUPPORT_UNICODE */
9046
9047
if (ref)
9048
  {
9049
  offset = GET2(cc, 1) << 1;
9050
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9051
  /* OVECTOR(1) contains the "string begin - 1" constant. */
9052
  if (withchecks && !common->unset_backref)
9053
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9054
  }
9055
else
9056
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9057
9058
#if defined SUPPORT_UNICODE
9059
if (common->utf && *cc == OP_REFI)
9060
  {
9061
  SLJIT_ASSERT(common->iref_ptr != 0);
9062
9063
  if (ref)
9064
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9065
  else
9066
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9067
9068
  if (withchecks && emptyfail)
9069
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9070
9071
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9072
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9073
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9074
9075
  OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9076
  OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9077
9078
  loop = LABEL();
9079
  jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9080
  partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9081
9082
  /* Read original character. It must be a valid UTF character. */
9083
  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9084
  OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9085
9086
  read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9087
9088
  OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9089
  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9090
  OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9091
9092
  /* Read second character. */
9093
  read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9094
9095
  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9096
9097
  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9098
9099
  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9100
9101
  OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9102
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9103
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9104
9105
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9106
9107
  OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9108
  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9109
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9110
  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9111
9112
  add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9113
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9114
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9115
9116
  caseless_loop = LABEL();
9117
  OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9118
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9119
  OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
9120
  JUMPTO(SLJIT_EQUAL, loop);
9121
  JUMPTO(SLJIT_LESS, caseless_loop);
9122
9123
  set_jumps(no_match, LABEL());
9124
  if (common->mode == PCRE2_JIT_COMPLETE)
9125
    JUMPHERE(partial);
9126
9127
  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9128
  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9129
  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9130
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9131
9132
  if (common->mode != PCRE2_JIT_COMPLETE)
9133
    {
9134
    JUMPHERE(partial);
9135
    OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9136
    OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9137
    OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9138
9139
    check_partial(common, FALSE);
9140
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9141
    }
9142
9143
  JUMPHERE(jump);
9144
  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9145
  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9146
  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9147
  return;
9148
  }
9149
else
9150
#endif /* SUPPORT_UNICODE */
9151
  {
9152
  if (ref)
9153
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9154
  else
9155
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9156
9157
  if (withchecks)
9158
    jump = JUMP(SLJIT_ZERO);
9159
9160
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9161
  partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9162
  if (common->mode == PCRE2_JIT_COMPLETE)
9163
    add_jump(compiler, backtracks, partial);
9164
9165
  add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9166
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9167
9168
  if (common->mode != PCRE2_JIT_COMPLETE)
9169
    {
9170
    nopartial = JUMP(SLJIT_JUMP);
9171
    JUMPHERE(partial);
9172
    /* TMP2 -= STR_END - STR_PTR */
9173
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9174
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9175
    partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9176
    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9177
    add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9178
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9179
    JUMPHERE(partial);
9180
    check_partial(common, FALSE);
9181
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9182
    JUMPHERE(nopartial);
9183
    }
9184
  }
9185
9186
if (jump != NULL)
9187
  {
9188
  if (emptyfail)
9189
    add_jump(compiler, backtracks, jump);
9190
  else
9191
    JUMPHERE(jump);
9192
  }
9193
}
9194
9195
static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9196
{
9197
DEFINE_COMPILER;
9198
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9199
backtrack_common *backtrack;
9200
PCRE2_UCHAR type;
9201
int offset = 0;
9202
struct sljit_label *label;
9203
struct sljit_jump *zerolength;
9204
struct sljit_jump *jump = NULL;
9205
PCRE2_SPTR ccbegin = cc;
9206
int min = 0, max = 0;
9207
BOOL minimize;
9208
9209
PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9210
9211
if (ref)
9212
  offset = GET2(cc, 1) << 1;
9213
else
9214
  cc += IMM2_SIZE;
9215
type = cc[1 + IMM2_SIZE];
9216
9217
SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9218
minimize = (type & 0x1) != 0;
9219
switch(type)
9220
  {
9221
  case OP_CRSTAR:
9222
  case OP_CRMINSTAR:
9223
  min = 0;
9224
  max = 0;
9225
  cc += 1 + IMM2_SIZE + 1;
9226
  break;
9227
  case OP_CRPLUS:
9228
  case OP_CRMINPLUS:
9229
  min = 1;
9230
  max = 0;
9231
  cc += 1 + IMM2_SIZE + 1;
9232
  break;
9233
  case OP_CRQUERY:
9234
  case OP_CRMINQUERY:
9235
  min = 0;
9236
  max = 1;
9237
  cc += 1 + IMM2_SIZE + 1;
9238
  break;
9239
  case OP_CRRANGE:
9240
  case OP_CRMINRANGE:
9241
  min = GET2(cc, 1 + IMM2_SIZE + 1);
9242
  max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9243
  cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9244
  break;
9245
  default:
9246
  SLJIT_UNREACHABLE();
9247
  break;
9248
  }
9249
9250
if (!minimize)
9251
  {
9252
  if (min == 0)
9253
    {
9254
    allocate_stack(common, 2);
9255
    if (ref)
9256
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9257
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9258
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9259
    /* Temporary release of STR_PTR. */
9260
    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9261
    /* Handles both invalid and empty cases. Since the minimum repeat,
9262
    is zero the invalid case is basically the same as an empty case. */
9263
    if (ref)
9264
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9265
    else
9266
      {
9267
      compile_dnref_search(common, ccbegin, NULL);
9268
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9269
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9270
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9271
      }
9272
    /* Restore if not zero length. */
9273
    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9274
    }
9275
  else
9276
    {
9277
    allocate_stack(common, 1);
9278
    if (ref)
9279
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9280
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9281
    if (ref)
9282
      {
9283
      add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9284
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9285
      }
9286
    else
9287
      {
9288
      compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9289
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9290
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9291
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9292
      }
9293
    }
9294
9295
  if (min > 1 || max > 1)
9296
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9297
9298
  label = LABEL();
9299
  if (!ref)
9300
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9301
  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9302
9303
  if (min > 1 || max > 1)
9304
    {
9305
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9306
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9307
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9308
    if (min > 1)
9309
      CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9310
    if (max > 1)
9311
      {
9312
      jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9313
      allocate_stack(common, 1);
9314
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9315
      JUMPTO(SLJIT_JUMP, label);
9316
      JUMPHERE(jump);
9317
      }
9318
    }
9319
9320
  if (max == 0)
9321
    {
9322
    /* Includes min > 1 case as well. */
9323
    allocate_stack(common, 1);
9324
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9325
    JUMPTO(SLJIT_JUMP, label);
9326
    }
9327
9328
  JUMPHERE(zerolength);
9329
  BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9330
9331
  count_match(common);
9332
  return cc;
9333
  }
9334
9335
allocate_stack(common, ref ? 2 : 3);
9336
if (ref)
9337
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9338
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9339
if (type != OP_CRMINSTAR)
9340
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9341
9342
if (min == 0)
9343
  {
9344
  /* Handles both invalid and empty cases. Since the minimum repeat,
9345
  is zero the invalid case is basically the same as an empty case. */
9346
  if (ref)
9347
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9348
  else
9349
    {
9350
    compile_dnref_search(common, ccbegin, NULL);
9351
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9352
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9353
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9354
    }
9355
  /* Length is non-zero, we can match real repeats. */
9356
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9357
  jump = JUMP(SLJIT_JUMP);
9358
  }
9359
else
9360
  {
9361
  if (ref)
9362
    {
9363
    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9364
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9365
    }
9366
  else
9367
    {
9368
    compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9369
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9370
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9371
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9372
    }
9373
  }
9374
9375
BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9376
if (max > 0)
9377
  add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9378
9379
if (!ref)
9380
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9381
compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9382
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9383
9384
if (min > 1)
9385
  {
9386
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9387
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9388
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9389
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9390
  }
9391
else if (max > 0)
9392
  OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9393
9394
if (jump != NULL)
9395
  JUMPHERE(jump);
9396
JUMPHERE(zerolength);
9397
9398
count_match(common);
9399
return cc;
9400
}
9401
9402
static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9403
{
9404
DEFINE_COMPILER;
9405
backtrack_common *backtrack;
9406
recurse_entry *entry = common->entries;
9407
recurse_entry *prev = NULL;
9408
sljit_sw start = GET(cc, 1);
9409
PCRE2_SPTR start_cc;
9410
BOOL needs_control_head;
9411
9412
PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9413
9414
/* Inlining simple patterns. */
9415
if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9416
  {
9417
  start_cc = common->start + start;
9418
  compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9419
  BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9420
  return cc + 1 + LINK_SIZE;
9421
  }
9422
9423
while (entry != NULL)
9424
  {
9425
  if (entry->start == start)
9426
    break;
9427
  prev = entry;
9428
  entry = entry->next;
9429
  }
9430
9431
if (entry == NULL)
9432
  {
9433
  entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9434
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9435
    return NULL;
9436
  entry->next = NULL;
9437
  entry->entry_label = NULL;
9438
  entry->backtrack_label = NULL;
9439
  entry->entry_calls = NULL;
9440
  entry->backtrack_calls = NULL;
9441
  entry->start = start;
9442
9443
  if (prev != NULL)
9444
    prev->next = entry;
9445
  else
9446
    common->entries = entry;
9447
  }
9448
9449
BACKTRACK_AS(recurse_backtrack)->entry = entry;
9450
9451
if (entry->entry_label == NULL)
9452
  add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9453
else
9454
  JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9455
/* Leave if the match is failed. */
9456
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9457
BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9458
return cc + 1 + LINK_SIZE;
9459
}
9460
9461
static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9462
{
9463
PCRE2_SPTR begin;
9464
PCRE2_SIZE *ovector;
9465
sljit_u32 oveccount, capture_top;
9466
9467
if (arguments->callout == NULL)
9468
  return 0;
9469
9470
SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9471
9472
begin = arguments->begin;
9473
ovector = (PCRE2_SIZE*)(callout_block + 1);
9474
oveccount = callout_block->capture_top;
9475
9476
SLJIT_ASSERT(oveccount >= 1);
9477
9478
callout_block->version = 2;
9479
callout_block->callout_flags = 0;
9480
9481
/* Offsets in subject. */
9482
callout_block->subject_length = arguments->end - arguments->begin;
9483
callout_block->start_match = jit_ovector[0] - begin;
9484
callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9485
callout_block->subject = begin;
9486
9487
/* Convert and copy the JIT offset vector to the ovector array. */
9488
callout_block->capture_top = 1;
9489
callout_block->offset_vector = ovector;
9490
9491
ovector[0] = PCRE2_UNSET;
9492
ovector[1] = PCRE2_UNSET;
9493
ovector += 2;
9494
jit_ovector += 2;
9495
capture_top = 1;
9496
9497
/* Convert pointers to sizes. */
9498
while (--oveccount != 0)
9499
  {
9500
  capture_top++;
9501
9502
  ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9503
  ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9504
9505
  if (ovector[0] != PCRE2_UNSET)
9506
    callout_block->capture_top = capture_top;
9507
9508
  ovector += 2;
9509
  jit_ovector += 2;
9510
  }
9511
9512
return (arguments->callout)(callout_block, arguments->callout_data);
9513
}
9514
9515
#define CALLOUT_ARG_OFFSET(arg) \
9516
    SLJIT_OFFSETOF(pcre2_callout_block, arg)
9517
9518
static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9519
{
9520
DEFINE_COMPILER;
9521
backtrack_common *backtrack;
9522
sljit_s32 mov_opcode;
9523
unsigned int callout_length = (*cc == OP_CALLOUT)
9524
    ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9525
sljit_sw value1;
9526
sljit_sw value2;
9527
sljit_sw value3;
9528
sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9529
9530
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9531
9532
callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9533
9534
allocate_stack(common, callout_arg_size);
9535
9536
SLJIT_ASSERT(common->capture_last_ptr != 0);
9537
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9538
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9539
value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9540
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9541
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9542
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9543
9544
/* These pointer sized fields temporarly stores internal variables. */
9545
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9546
9547
if (common->mark_ptr != 0)
9548
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9549
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9550
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9551
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9552
9553
if (*cc == OP_CALLOUT)
9554
  {
9555
  value1 = 0;
9556
  value2 = 0;
9557
  value3 = 0;
9558
  }
9559
else
9560
  {
9561
  value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9562
  value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9563
  value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9564
  }
9565
9566
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9567
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9568
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9569
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9570
9571
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9572
9573
/* Needed to save important temporary registers. */
9574
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9575
/* SLJIT_R0 = arguments */
9576
OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9577
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9578
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
9579
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9580
free_stack(common, callout_arg_size);
9581
9582
/* Check return value. */
9583
OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9584
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
9585
if (common->abort_label == NULL)
9586
  add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
9587
else
9588
  JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
9589
return cc + callout_length;
9590
}
9591
9592
#undef CALLOUT_ARG_SIZE
9593
#undef CALLOUT_ARG_OFFSET
9594
9595
static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9596
{
9597
while (TRUE)
9598
  {
9599
  switch (*cc)
9600
    {
9601
    case OP_CALLOUT_STR:
9602
    cc += GET(cc, 1 + 2*LINK_SIZE);
9603
    break;
9604
9605
    case OP_NOT_WORD_BOUNDARY:
9606
    case OP_WORD_BOUNDARY:
9607
    case OP_CIRC:
9608
    case OP_CIRCM:
9609
    case OP_DOLL:
9610
    case OP_DOLLM:
9611
    case OP_CALLOUT:
9612
    case OP_ALT:
9613
    cc += PRIV(OP_lengths)[*cc];
9614
    break;
9615
9616
    case OP_KET:
9617
    return FALSE;
9618
9619
    default:
9620
    return TRUE;
9621
    }
9622
  }
9623
}
9624
9625
static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9626
{
9627
DEFINE_COMPILER;
9628
int framesize;
9629
int extrasize;
9630
BOOL local_quit_available = FALSE;
9631
BOOL needs_control_head;
9632
int private_data_ptr;
9633
backtrack_common altbacktrack;
9634
PCRE2_SPTR ccbegin;
9635
PCRE2_UCHAR opcode;
9636
PCRE2_UCHAR bra = OP_BRA;
9637
jump_list *tmp = NULL;
9638
jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9639
jump_list **found;
9640
/* Saving previous accept variables. */
9641
BOOL save_local_quit_available = common->local_quit_available;
9642
BOOL save_in_positive_assertion = common->in_positive_assertion;
9643
then_trap_backtrack *save_then_trap = common->then_trap;
9644
struct sljit_label *save_quit_label = common->quit_label;
9645
struct sljit_label *save_accept_label = common->accept_label;
9646
jump_list *save_quit = common->quit;
9647
jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9648
jump_list *save_accept = common->accept;
9649
struct sljit_jump *jump;
9650
struct sljit_jump *brajump = NULL;
9651
9652
/* Assert captures then. */
9653
common->then_trap = NULL;
9654
9655
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9656
  {
9657
  SLJIT_ASSERT(!conditional);
9658
  bra = *cc;
9659
  cc++;
9660
  }
9661
private_data_ptr = PRIVATE_DATA(cc);
9662
SLJIT_ASSERT(private_data_ptr != 0);
9663
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9664
backtrack->framesize = framesize;
9665
backtrack->private_data_ptr = private_data_ptr;
9666
opcode = *cc;
9667
SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9668
found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9669
ccbegin = cc;
9670
cc += GET(cc, 1);
9671
9672
if (bra == OP_BRAMINZERO)
9673
  {
9674
  /* This is a braminzero backtrack path. */
9675
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9676
  free_stack(common, 1);
9677
  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9678
  }
9679
9680
if (framesize < 0)
9681
  {
9682
  extrasize = 1;
9683
  if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9684
    extrasize = 0;
9685
9686
  if (needs_control_head)
9687
    extrasize++;
9688
9689
  if (framesize == no_frame)
9690
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9691
9692
  if (extrasize > 0)
9693
    allocate_stack(common, extrasize);
9694
9695
  if (needs_control_head)
9696
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9697
9698
  if (extrasize > 0)
9699
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9700
9701
  if (needs_control_head)
9702
    {
9703
    SLJIT_ASSERT(extrasize == 2);
9704
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9705
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9706
    }
9707
  }
9708
else
9709
  {
9710
  extrasize = needs_control_head ? 3 : 2;
9711
  allocate_stack(common, framesize + extrasize);
9712
9713
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9714
  OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9715
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9716
  if (needs_control_head)
9717
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9718
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9719
9720
  if (needs_control_head)
9721
    {
9722
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9723
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9724
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9725
    }
9726
  else
9727
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9728
9729
  init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9730
  }
9731
9732
memset(&altbacktrack, 0, sizeof(backtrack_common));
9733
if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9734
  {
9735
  /* Control verbs cannot escape from these asserts. */
9736
  local_quit_available = TRUE;
9737
  common->local_quit_available = TRUE;
9738
  common->quit_label = NULL;
9739
  common->quit = NULL;
9740
  }
9741
9742
common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9743
common->positive_assertion_quit = NULL;
9744
9745
while (1)
9746
  {
9747
  common->accept_label = NULL;
9748
  common->accept = NULL;
9749
  altbacktrack.top = NULL;
9750
  altbacktrack.topbacktracks = NULL;
9751
9752
  if (*ccbegin == OP_ALT && extrasize > 0)
9753
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9754
9755
  altbacktrack.cc = ccbegin;
9756
  compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9757
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9758
    {
9759
    if (local_quit_available)
9760
      {
9761
      common->local_quit_available = save_local_quit_available;
9762
      common->quit_label = save_quit_label;
9763
      common->quit = save_quit;
9764
      }
9765
    common->in_positive_assertion = save_in_positive_assertion;
9766
    common->then_trap = save_then_trap;
9767
    common->accept_label = save_accept_label;
9768
    common->positive_assertion_quit = save_positive_assertion_quit;
9769
    common->accept = save_accept;
9770
    return NULL;
9771
    }
9772
  common->accept_label = LABEL();
9773
  if (common->accept != NULL)
9774
    set_jumps(common->accept, common->accept_label);
9775
9776
  /* Reset stack. */
9777
  if (framesize < 0)
9778
    {
9779
    if (framesize == no_frame)
9780
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9781
    else if (extrasize > 0)
9782
      free_stack(common, extrasize);
9783
9784
    if (needs_control_head)
9785
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9786
    }
9787
  else
9788
    {
9789
    if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
9790
      {
9791
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9792
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9793
      if (needs_control_head)
9794
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9795
      }
9796
    else
9797
      {
9798
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9799
      if (needs_control_head)
9800
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
9801
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9802
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9803
      }
9804
    }
9805
9806
  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
9807
    {
9808
    /* We know that STR_PTR was stored on the top of the stack. */
9809
    if (conditional)
9810
      {
9811
      if (extrasize > 0)
9812
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
9813
      }
9814
    else if (bra == OP_BRAZERO)
9815
      {
9816
      if (framesize < 0)
9817
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9818
      else
9819
        {
9820
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9821
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9822
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9823
        }
9824
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9825
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9826
      }
9827
    else if (framesize >= 0)
9828
      {
9829
      /* For OP_BRA and OP_BRAMINZERO. */
9830
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9831
      }
9832
    }
9833
  add_jump(compiler, found, JUMP(SLJIT_JUMP));
9834
9835
  compile_backtrackingpath(common, altbacktrack.top);
9836
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9837
    {
9838
    if (local_quit_available)
9839
      {
9840
      common->local_quit_available = save_local_quit_available;
9841
      common->quit_label = save_quit_label;
9842
      common->quit = save_quit;
9843
      }
9844
    common->in_positive_assertion = save_in_positive_assertion;
9845
    common->then_trap = save_then_trap;
9846
    common->accept_label = save_accept_label;
9847
    common->positive_assertion_quit = save_positive_assertion_quit;
9848
    common->accept = save_accept;
9849
    return NULL;
9850
    }
9851
  set_jumps(altbacktrack.topbacktracks, LABEL());
9852
9853
  if (*cc != OP_ALT)
9854
    break;
9855
9856
  ccbegin = cc;
9857
  cc += GET(cc, 1);
9858
  }
9859
9860
if (local_quit_available)
9861
  {
9862
  SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9863
  /* Makes the check less complicated below. */
9864
  common->positive_assertion_quit = common->quit;
9865
  }
9866
9867
/* None of them matched. */
9868
if (common->positive_assertion_quit != NULL)
9869
  {
9870
  jump = JUMP(SLJIT_JUMP);
9871
  set_jumps(common->positive_assertion_quit, LABEL());
9872
  SLJIT_ASSERT(framesize != no_stack);
9873
  if (framesize < 0)
9874
    OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9875
  else
9876
    {
9877
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9878
    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9879
    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9880
    }
9881
  JUMPHERE(jump);
9882
  }
9883
9884
if (needs_control_head)
9885
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
9886
9887
if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9888
  {
9889
  /* Assert is failed. */
9890
  if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9891
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9892
9893
  if (framesize < 0)
9894
    {
9895
    /* The topmost item should be 0. */
9896
    if (bra == OP_BRAZERO)
9897
      {
9898
      if (extrasize == 2)
9899
        free_stack(common, 1);
9900
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9901
      }
9902
    else if (extrasize > 0)
9903
      free_stack(common, extrasize);
9904
    }
9905
  else
9906
    {
9907
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9908
    /* The topmost item should be 0. */
9909
    if (bra == OP_BRAZERO)
9910
      {
9911
      free_stack(common, framesize + extrasize - 1);
9912
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9913
      }
9914
    else
9915
      free_stack(common, framesize + extrasize);
9916
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9917
    }
9918
  jump = JUMP(SLJIT_JUMP);
9919
  if (bra != OP_BRAZERO)
9920
    add_jump(compiler, target, jump);
9921
9922
  /* Assert is successful. */
9923
  set_jumps(tmp, LABEL());
9924
  if (framesize < 0)
9925
    {
9926
    /* We know that STR_PTR was stored on the top of the stack. */
9927
    if (extrasize > 0)
9928
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9929
9930
    /* Keep the STR_PTR on the top of the stack. */
9931
    if (bra == OP_BRAZERO)
9932
      {
9933
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9934
      if (extrasize == 2)
9935
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9936
      }
9937
    else if (bra == OP_BRAMINZERO)
9938
      {
9939
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9940
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9941
      }
9942
    }
9943
  else
9944
    {
9945
    if (bra == OP_BRA)
9946
      {
9947
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9948
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9949
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
9950
      }
9951
    else
9952
      {
9953
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9954
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
9955
      if (extrasize == 2)
9956
        {
9957
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9958
        if (bra == OP_BRAMINZERO)
9959
          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9960
        }
9961
      else
9962
        {
9963
        SLJIT_ASSERT(extrasize == 3);
9964
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9965
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
9966
        }
9967
      }
9968
    }
9969
9970
  if (bra == OP_BRAZERO)
9971
    {
9972
    backtrack->matchingpath = LABEL();
9973
    SET_LABEL(jump, backtrack->matchingpath);
9974
    }
9975
  else if (bra == OP_BRAMINZERO)
9976
    {
9977
    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9978
    JUMPHERE(brajump);
9979
    if (framesize >= 0)
9980
      {
9981
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9982
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9983
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
9984
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9985
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9986
      }
9987
    set_jumps(backtrack->common.topbacktracks, LABEL());
9988
    }
9989
  }
9990
else
9991
  {
9992
  /* AssertNot is successful. */
9993
  if (framesize < 0)
9994
    {
9995
    if (extrasize > 0)
9996
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9997
9998
    if (bra != OP_BRA)
9999
      {
10000
      if (extrasize == 2)
10001
        free_stack(common, 1);
10002
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10003
      }
10004
    else if (extrasize > 0)
10005
      free_stack(common, extrasize);
10006
    }
10007
  else
10008
    {
10009
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10010
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10011
    /* The topmost item should be 0. */
10012
    if (bra != OP_BRA)
10013
      {
10014
      free_stack(common, framesize + extrasize - 1);
10015
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10016
      }
10017
    else
10018
      free_stack(common, framesize + extrasize);
10019
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10020
    }
10021
10022
  if (bra == OP_BRAZERO)
10023
    backtrack->matchingpath = LABEL();
10024
  else if (bra == OP_BRAMINZERO)
10025
    {
10026
    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10027
    JUMPHERE(brajump);
10028
    }
10029
10030
  if (bra != OP_BRA)
10031
    {
10032
    SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10033
    set_jumps(backtrack->common.topbacktracks, LABEL());
10034
    backtrack->common.topbacktracks = NULL;
10035
    }
10036
  }
10037
10038
if (local_quit_available)
10039
  {
10040
  common->local_quit_available = save_local_quit_available;
10041
  common->quit_label = save_quit_label;
10042
  common->quit = save_quit;
10043
  }
10044
common->in_positive_assertion = save_in_positive_assertion;
10045
common->then_trap = save_then_trap;
10046
common->accept_label = save_accept_label;
10047
common->positive_assertion_quit = save_positive_assertion_quit;
10048
common->accept = save_accept;
10049
return cc + 1 + LINK_SIZE;
10050
}
10051
10052
static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10053
{
10054
DEFINE_COMPILER;
10055
int stacksize;
10056
10057
if (framesize < 0)
10058
  {
10059
  if (framesize == no_frame)
10060
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10061
  else
10062
    {
10063
    stacksize = needs_control_head ? 1 : 0;
10064
    if (ket != OP_KET || has_alternatives)
10065
      stacksize++;
10066
10067
    if (stacksize > 0)
10068
      free_stack(common, stacksize);
10069
    }
10070
10071
  if (needs_control_head)
10072
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10073
10074
  /* TMP2 which is set here used by OP_KETRMAX below. */
10075
  if (ket == OP_KETRMAX)
10076
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10077
  else if (ket == OP_KETRMIN)
10078
    {
10079
    /* Move the STR_PTR to the private_data_ptr. */
10080
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10081
    }
10082
  }
10083
else
10084
  {
10085
  stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10086
  OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10087
  if (needs_control_head)
10088
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10089
10090
  if (ket == OP_KETRMAX)
10091
    {
10092
    /* TMP2 which is set here used by OP_KETRMAX below. */
10093
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10094
    }
10095
  }
10096
if (needs_control_head)
10097
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10098
}
10099
10100
static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10101
{
10102
DEFINE_COMPILER;
10103
10104
if (common->capture_last_ptr != 0)
10105
  {
10106
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10107
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10108
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10109
  stacksize++;
10110
  }
10111
if (common->optimized_cbracket[offset >> 1] == 0)
10112
  {
10113
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10114
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10115
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10116
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10117
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10118
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10119
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10120
  stacksize += 2;
10121
  }
10122
return stacksize;
10123
}
10124
10125
static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10126
{
10127
  if (PRIV(script_run)(ptr, endptr, FALSE))
10128
    return endptr;
10129
  return NULL;
10130
}
10131
10132
#ifdef SUPPORT_UNICODE
10133
10134
static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10135
{
10136
  if (PRIV(script_run)(ptr, endptr, TRUE))
10137
    return endptr;
10138
  return NULL;
10139
}
10140
10141
#endif /* SUPPORT_UNICODE */
10142
10143
static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10144
{
10145
DEFINE_COMPILER;
10146
10147
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10148
10149
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10150
#ifdef SUPPORT_UNICODE
10151
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
10152
  common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
10153
#else
10154
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
10155
#endif
10156
10157
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10158
add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10159
}
10160
10161
/*
10162
  Handling bracketed expressions is probably the most complex part.
10163
10164
  Stack layout naming characters:
10165
    S - Push the current STR_PTR
10166
    0 - Push a 0 (NULL)
10167
    A - Push the current STR_PTR. Needed for restoring the STR_PTR
10168
        before the next alternative. Not pushed if there are no alternatives.
10169
    M - Any values pushed by the current alternative. Can be empty, or anything.
10170
    C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10171
    L - Push the previous local (pointed by localptr) to the stack
10172
   () - opional values stored on the stack
10173
  ()* - optonal, can be stored multiple times
10174
10175
  The following list shows the regular expression templates, their PCRE byte codes
10176
  and stack layout supported by pcre-sljit.
10177
10178
  (?:)                     OP_BRA     | OP_KET                A M
10179
  ()                       OP_CBRA    | OP_KET                C M
10180
  (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
10181
                           OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
10182
  (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
10183
                           OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
10184
  ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
10185
                           OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
10186
  ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
10187
                           OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
10188
  (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
10189
  (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
10190
  ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
10191
  ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
10192
  (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
10193
           OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
10194
  (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
10195
           OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
10196
  ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
10197
           OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
10198
  ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
10199
           OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
10200
10201
10202
  Stack layout naming characters:
10203
    A - Push the alternative index (starting from 0) on the stack.
10204
        Not pushed if there is no alternatives.
10205
    M - Any values pushed by the current alternative. Can be empty, or anything.
10206
10207
  The next list shows the possible content of a bracket:
10208
  (|)     OP_*BRA    | OP_ALT ...         M A
10209
  (?()|)  OP_*COND   | OP_ALT             M A
10210
  (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
10211
                                          Or nothing, if trace is unnecessary
10212
*/
10213
10214
static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10215
{
10216
DEFINE_COMPILER;
10217
backtrack_common *backtrack;
10218
PCRE2_UCHAR opcode;
10219
int private_data_ptr = 0;
10220
int offset = 0;
10221
int i, stacksize;
10222
int repeat_ptr = 0, repeat_length = 0;
10223
int repeat_type = 0, repeat_count = 0;
10224
PCRE2_SPTR ccbegin;
10225
PCRE2_SPTR matchingpath;
10226
PCRE2_SPTR slot;
10227
PCRE2_UCHAR bra = OP_BRA;
10228
PCRE2_UCHAR ket;
10229
assert_backtrack *assert;
10230
BOOL has_alternatives;
10231
BOOL needs_control_head = FALSE;
10232
struct sljit_jump *jump;
10233
struct sljit_jump *skip;
10234
struct sljit_label *rmax_label = NULL;
10235
struct sljit_jump *braminzero = NULL;
10236
10237
PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10238
10239
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10240
  {
10241
  bra = *cc;
10242
  cc++;
10243
  opcode = *cc;
10244
  }
10245
10246
opcode = *cc;
10247
ccbegin = cc;
10248
matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10249
ket = *matchingpath;
10250
if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10251
  {
10252
  repeat_ptr = PRIVATE_DATA(matchingpath);
10253
  repeat_length = PRIVATE_DATA(matchingpath + 1);
10254
  repeat_type = PRIVATE_DATA(matchingpath + 2);
10255
  repeat_count = PRIVATE_DATA(matchingpath + 3);
10256
  SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10257
  if (repeat_type == OP_UPTO)
10258
    ket = OP_KETRMAX;
10259
  if (repeat_type == OP_MINUPTO)
10260
    ket = OP_KETRMIN;
10261
  }
10262
10263
matchingpath = ccbegin + 1 + LINK_SIZE;
10264
SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10265
SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10266
cc += GET(cc, 1);
10267
10268
has_alternatives = *cc == OP_ALT;
10269
if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10270
  {
10271
  SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10272
    compile_time_checks_must_be_grouped_together);
10273
  has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10274
  }
10275
10276
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10277
  opcode = OP_SCOND;
10278
10279
if (opcode == OP_CBRA || opcode == OP_SCBRA)
10280
  {
10281
  /* Capturing brackets has a pre-allocated space. */
10282
  offset = GET2(ccbegin, 1 + LINK_SIZE);
10283
  if (common->optimized_cbracket[offset] == 0)
10284
    {
10285
    private_data_ptr = OVECTOR_PRIV(offset);
10286
    offset <<= 1;
10287
    }
10288
  else
10289
    {
10290
    offset <<= 1;
10291
    private_data_ptr = OVECTOR(offset);
10292
    }
10293
  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10294
  matchingpath += IMM2_SIZE;
10295
  }
10296
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10297
  {
10298
  /* Other brackets simply allocate the next entry. */
10299
  private_data_ptr = PRIVATE_DATA(ccbegin);
10300
  SLJIT_ASSERT(private_data_ptr != 0);
10301
  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10302
  if (opcode == OP_ONCE)
10303
    BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10304
  }
10305
10306
/* Instructions before the first alternative. */
10307
stacksize = 0;
10308
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10309
  stacksize++;
10310
if (bra == OP_BRAZERO)
10311
  stacksize++;
10312
10313
if (stacksize > 0)
10314
  allocate_stack(common, stacksize);
10315
10316
stacksize = 0;
10317
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10318
  {
10319
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10320
  stacksize++;
10321
  }
10322
10323
if (bra == OP_BRAZERO)
10324
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10325
10326
if (bra == OP_BRAMINZERO)
10327
  {
10328
  /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10329
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10330
  if (ket != OP_KETRMIN)
10331
    {
10332
    free_stack(common, 1);
10333
    braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10334
    }
10335
  else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10336
    {
10337
    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10338
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10339
    /* Nothing stored during the first run. */
10340
    skip = JUMP(SLJIT_JUMP);
10341
    JUMPHERE(jump);
10342
    /* Checking zero-length iteration. */
10343
    if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10344
      {
10345
      /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10346
      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10347
      }
10348
    else
10349
      {
10350
      /* Except when the whole stack frame must be saved. */
10351
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10352
      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10353
      }
10354
    JUMPHERE(skip);
10355
    }
10356
  else
10357
    {
10358
    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10359
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10360
    JUMPHERE(jump);
10361
    }
10362
  }
10363
10364
if (repeat_type != 0)
10365
  {
10366
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10367
  if (repeat_type == OP_EXACT)
10368
    rmax_label = LABEL();
10369
  }
10370
10371
if (ket == OP_KETRMIN)
10372
  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10373
10374
if (ket == OP_KETRMAX)
10375
  {
10376
  rmax_label = LABEL();
10377
  if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10378
    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10379
  }
10380
10381
/* Handling capturing brackets and alternatives. */
10382
if (opcode == OP_ONCE)
10383
  {
10384
  stacksize = 0;
10385
  if (needs_control_head)
10386
    {
10387
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10388
    stacksize++;
10389
    }
10390
10391
  if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10392
    {
10393
    /* Neither capturing brackets nor recursions are found in the block. */
10394
    if (ket == OP_KETRMIN)
10395
      {
10396
      stacksize += 2;
10397
      if (!needs_control_head)
10398
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10399
      }
10400
    else
10401
      {
10402
      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10403
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10404
      if (ket == OP_KETRMAX || has_alternatives)
10405
        stacksize++;
10406
      }
10407
10408
    if (stacksize > 0)
10409
      allocate_stack(common, stacksize);
10410
10411
    stacksize = 0;
10412
    if (needs_control_head)
10413
      {
10414
      stacksize++;
10415
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10416
      }
10417
10418
    if (ket == OP_KETRMIN)
10419
      {
10420
      if (needs_control_head)
10421
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10422
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10423
      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10424
        OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10425
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10426
      }
10427
    else if (ket == OP_KETRMAX || has_alternatives)
10428
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10429
    }
10430
  else
10431
    {
10432
    if (ket != OP_KET || has_alternatives)
10433
      stacksize++;
10434
10435
    stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10436
    allocate_stack(common, stacksize);
10437
10438
    if (needs_control_head)
10439
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10440
10441
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10442
    OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10443
10444
    stacksize = needs_control_head ? 1 : 0;
10445
    if (ket != OP_KET || has_alternatives)
10446
      {
10447
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10448
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10449
      stacksize++;
10450
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10451
      }
10452
    else
10453
      {
10454
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10455
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10456
      }
10457
    init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10458
    }
10459
  }
10460
else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10461
  {
10462
  /* Saving the previous values. */
10463
  if (common->optimized_cbracket[offset >> 1] != 0)
10464
    {
10465
    SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10466
    allocate_stack(common, 2);
10467
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10468
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10469
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10470
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10471
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10472
    }
10473
  else
10474
    {
10475
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10476
    allocate_stack(common, 1);
10477
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10478
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10479
    }
10480
  }
10481
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10482
  {
10483
  /* Saving the previous value. */
10484
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10485
  allocate_stack(common, 1);
10486
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10487
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10488
  }
10489
else if (has_alternatives)
10490
  {
10491
  /* Pushing the starting string pointer. */
10492
  allocate_stack(common, 1);
10493
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10494
  }
10495
10496
/* Generating code for the first alternative. */
10497
if (opcode == OP_COND || opcode == OP_SCOND)
10498
  {
10499
  if (*matchingpath == OP_CREF)
10500
    {
10501
    SLJIT_ASSERT(has_alternatives);
10502
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10503
      CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10504
    matchingpath += 1 + IMM2_SIZE;
10505
    }
10506
  else if (*matchingpath == OP_DNCREF)
10507
    {
10508
    SLJIT_ASSERT(has_alternatives);
10509
10510
    i = GET2(matchingpath, 1 + IMM2_SIZE);
10511
    slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10512
    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10513
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10514
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10515
    slot += common->name_entry_size;
10516
    i--;
10517
    while (i-- > 0)
10518
      {
10519
      OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10520
      OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10521
      slot += common->name_entry_size;
10522
      }
10523
    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10524
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10525
    matchingpath += 1 + 2 * IMM2_SIZE;
10526
    }
10527
  else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10528
    {
10529
    /* Never has other case. */
10530
    BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10531
    SLJIT_ASSERT(!has_alternatives);
10532
10533
    if (*matchingpath == OP_TRUE)
10534
      {
10535
      stacksize = 1;
10536
      matchingpath++;
10537
      }
10538
    else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10539
      stacksize = 0;
10540
    else if (*matchingpath == OP_RREF)
10541
      {
10542
      stacksize = GET2(matchingpath, 1);
10543
      if (common->currententry == NULL)
10544
        stacksize = 0;
10545
      else if (stacksize == RREF_ANY)
10546
        stacksize = 1;
10547
      else if (common->currententry->start == 0)
10548
        stacksize = stacksize == 0;
10549
      else
10550
        stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10551
10552
      if (stacksize != 0)
10553
        matchingpath += 1 + IMM2_SIZE;
10554
      }
10555
    else
10556
      {
10557
      if (common->currententry == NULL || common->currententry->start == 0)
10558
        stacksize = 0;
10559
      else
10560
        {
10561
        stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10562
        slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10563
        i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10564
        while (stacksize > 0)
10565
          {
10566
          if ((int)GET2(slot, 0) == i)
10567
            break;
10568
          slot += common->name_entry_size;
10569
          stacksize--;
10570
          }
10571
        }
10572
10573
      if (stacksize != 0)
10574
        matchingpath += 1 + 2 * IMM2_SIZE;
10575
      }
10576
10577
      /* The stacksize == 0 is a common "else" case. */
10578
      if (stacksize == 0)
10579
        {
10580
        if (*cc == OP_ALT)
10581
          {
10582
          matchingpath = cc + 1 + LINK_SIZE;
10583
          cc += GET(cc, 1);
10584
          }
10585
        else
10586
          matchingpath = cc;
10587
        }
10588
    }
10589
  else
10590
    {
10591
    SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10592
    /* Similar code as PUSH_BACKTRACK macro. */
10593
    assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10594
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10595
      return NULL;
10596
    memset(assert, 0, sizeof(assert_backtrack));
10597
    assert->common.cc = matchingpath;
10598
    BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10599
    matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10600
    }
10601
  }
10602
10603
compile_matchingpath(common, matchingpath, cc, backtrack);
10604
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10605
  return NULL;
10606
10607
if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10608
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10609
10610
if (opcode == OP_ONCE)
10611
  match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10612
10613
if (opcode == OP_SCRIPT_RUN)
10614
  match_script_run_common(common, private_data_ptr, backtrack);
10615
10616
stacksize = 0;
10617
if (repeat_type == OP_MINUPTO)
10618
  {
10619
  /* We need to preserve the counter. TMP2 will be used below. */
10620
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10621
  stacksize++;
10622
  }
10623
if (ket != OP_KET || bra != OP_BRA)
10624
  stacksize++;
10625
if (offset != 0)
10626
  {
10627
  if (common->capture_last_ptr != 0)
10628
    stacksize++;
10629
  if (common->optimized_cbracket[offset >> 1] == 0)
10630
    stacksize += 2;
10631
  }
10632
if (has_alternatives && opcode != OP_ONCE)
10633
  stacksize++;
10634
10635
if (stacksize > 0)
10636
  allocate_stack(common, stacksize);
10637
10638
stacksize = 0;
10639
if (repeat_type == OP_MINUPTO)
10640
  {
10641
  /* TMP2 was set above. */
10642
  OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10643
  stacksize++;
10644
  }
10645
10646
if (ket != OP_KET || bra != OP_BRA)
10647
  {
10648
  if (ket != OP_KET)
10649
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10650
  else
10651
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10652
  stacksize++;
10653
  }
10654
10655
if (offset != 0)
10656
  stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10657
10658
/* Skip and count the other alternatives. */
10659
i = 1;
10660
while (*cc == OP_ALT)
10661
  {
10662
  cc += GET(cc, 1);
10663
  i++;
10664
  }
10665
10666
if (has_alternatives)
10667
  {
10668
  if (opcode != OP_ONCE)
10669
    {
10670
    if (i <= 3)
10671
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10672
    else
10673
      BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10674
    }
10675
  if (ket != OP_KETRMAX)
10676
    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10677
  }
10678
10679
/* Must be after the matchingpath label. */
10680
if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10681
  {
10682
  SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10683
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10684
  }
10685
10686
if (ket == OP_KETRMAX)
10687
  {
10688
  if (repeat_type != 0)
10689
    {
10690
    if (has_alternatives)
10691
      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10692
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10693
    JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10694
    /* Drop STR_PTR for greedy plus quantifier. */
10695
    if (opcode != OP_ONCE)
10696
      free_stack(common, 1);
10697
    }
10698
  else if (opcode < OP_BRA || opcode >= OP_SBRA)
10699
    {
10700
    if (has_alternatives)
10701
      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10702
10703
    /* Checking zero-length iteration. */
10704
    if (opcode != OP_ONCE)
10705
      {
10706
      /* This case includes opcodes such as OP_SCRIPT_RUN. */
10707
      CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10708
      /* Drop STR_PTR for greedy plus quantifier. */
10709
      if (bra != OP_BRAZERO)
10710
        free_stack(common, 1);
10711
      }
10712
    else
10713
      /* TMP2 must contain the starting STR_PTR. */
10714
      CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10715
    }
10716
  else
10717
    JUMPTO(SLJIT_JUMP, rmax_label);
10718
  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10719
  }
10720
10721
if (repeat_type == OP_EXACT)
10722
  {
10723
  count_match(common);
10724
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10725
  JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10726
  }
10727
else if (repeat_type == OP_UPTO)
10728
  {
10729
  /* We need to preserve the counter. */
10730
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10731
  allocate_stack(common, 1);
10732
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10733
  }
10734
10735
if (bra == OP_BRAZERO)
10736
  BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10737
10738
if (bra == OP_BRAMINZERO)
10739
  {
10740
  /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10741
  JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10742
  if (braminzero != NULL)
10743
    {
10744
    JUMPHERE(braminzero);
10745
    /* We need to release the end pointer to perform the
10746
    backtrack for the zero-length iteration. When
10747
    framesize is < 0, OP_ONCE will do the release itself. */
10748
    if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10749
      {
10750
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10751
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10752
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10753
      }
10754
    else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10755
      free_stack(common, 1);
10756
    }
10757
  /* Continue to the normal backtrack. */
10758
  }
10759
10760
if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10761
  count_match(common);
10762
10763
cc += 1 + LINK_SIZE;
10764
10765
if (opcode == OP_ONCE)
10766
  {
10767
  /* We temporarily encode the needs_control_head in the lowest bit.
10768
     Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
10769
     the same value for small signed numbers (including negative numbers). */
10770
  BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
10771
  }
10772
return cc + repeat_length;
10773
}
10774
10775
static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10776
{
10777
DEFINE_COMPILER;
10778
backtrack_common *backtrack;
10779
PCRE2_UCHAR opcode;
10780
int private_data_ptr;
10781
int cbraprivptr = 0;
10782
BOOL needs_control_head;
10783
int framesize;
10784
int stacksize;
10785
int offset = 0;
10786
BOOL zero = FALSE;
10787
PCRE2_SPTR ccbegin = NULL;
10788
int stack; /* Also contains the offset of control head. */
10789
struct sljit_label *loop = NULL;
10790
struct jump_list *emptymatch = NULL;
10791
10792
PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10793
if (*cc == OP_BRAPOSZERO)
10794
  {
10795
  zero = TRUE;
10796
  cc++;
10797
  }
10798
10799
opcode = *cc;
10800
private_data_ptr = PRIVATE_DATA(cc);
10801
SLJIT_ASSERT(private_data_ptr != 0);
10802
BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10803
switch(opcode)
10804
  {
10805
  case OP_BRAPOS:
10806
  case OP_SBRAPOS:
10807
  ccbegin = cc + 1 + LINK_SIZE;
10808
  break;
10809
10810
  case OP_CBRAPOS:
10811
  case OP_SCBRAPOS:
10812
  offset = GET2(cc, 1 + LINK_SIZE);
10813
  /* This case cannot be optimized in the same was as
10814
  normal capturing brackets. */
10815
  SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
10816
  cbraprivptr = OVECTOR_PRIV(offset);
10817
  offset <<= 1;
10818
  ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10819
  break;
10820
10821
  default:
10822
  SLJIT_UNREACHABLE();
10823
  break;
10824
  }
10825
10826
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10827
BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10828
if (framesize < 0)
10829
  {
10830
  if (offset != 0)
10831
    {
10832
    stacksize = 2;
10833
    if (common->capture_last_ptr != 0)
10834
      stacksize++;
10835
    }
10836
  else
10837
    stacksize = 1;
10838
10839
  if (needs_control_head)
10840
    stacksize++;
10841
  if (!zero)
10842
    stacksize++;
10843
10844
  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10845
  allocate_stack(common, stacksize);
10846
  if (framesize == no_frame)
10847
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10848
10849
  stack = 0;
10850
  if (offset != 0)
10851
    {
10852
    stack = 2;
10853
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10854
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10855
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10856
    if (common->capture_last_ptr != 0)
10857
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10858
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10859
    if (needs_control_head)
10860
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10861
    if (common->capture_last_ptr != 0)
10862
      {
10863
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10864
      stack = 3;
10865
      }
10866
    }
10867
  else
10868
    {
10869
    if (needs_control_head)
10870
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10871
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10872
    stack = 1;
10873
    }
10874
10875
  if (needs_control_head)
10876
    stack++;
10877
  if (!zero)
10878
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10879
  if (needs_control_head)
10880
    {
10881
    stack--;
10882
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10883
    }
10884
  }
10885
else
10886
  {
10887
  stacksize = framesize + 1;
10888
  if (!zero)
10889
    stacksize++;
10890
  if (needs_control_head)
10891
    stacksize++;
10892
  if (offset == 0)
10893
    stacksize++;
10894
  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10895
10896
  allocate_stack(common, stacksize);
10897
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10898
  if (needs_control_head)
10899
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10900
  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10901
10902
  stack = 0;
10903
  if (!zero)
10904
    {
10905
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
10906
    stack = 1;
10907
    }
10908
  if (needs_control_head)
10909
    {
10910
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10911
    stack++;
10912
    }
10913
  if (offset == 0)
10914
    {
10915
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
10916
    stack++;
10917
    }
10918
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
10919
  init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
10920
  stack -= 1 + (offset == 0);
10921
  }
10922
10923
if (offset != 0)
10924
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10925
10926
loop = LABEL();
10927
while (*cc != OP_KETRPOS)
10928
  {
10929
  backtrack->top = NULL;
10930
  backtrack->topbacktracks = NULL;
10931
  cc += GET(cc, 1);
10932
10933
  compile_matchingpath(common, ccbegin, cc, backtrack);
10934
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10935
    return NULL;
10936
10937
  if (framesize < 0)
10938
    {
10939
    if (framesize == no_frame)
10940
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10941
10942
    if (offset != 0)
10943
      {
10944
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10945
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10946
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10947
      if (common->capture_last_ptr != 0)
10948
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10949
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10950
      }
10951
    else
10952
      {
10953
      if (opcode == OP_SBRAPOS)
10954
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10955
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10956
      }
10957
10958
    /* Even if the match is empty, we need to reset the control head. */
10959
    if (needs_control_head)
10960
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10961
10962
    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10963
      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10964
10965
    if (!zero)
10966
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10967
    }
10968
  else
10969
    {
10970
    if (offset != 0)
10971
      {
10972
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10973
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10974
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10975
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10976
      if (common->capture_last_ptr != 0)
10977
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10978
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10979
      }
10980
    else
10981
      {
10982
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10983
      OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10984
      if (opcode == OP_SBRAPOS)
10985
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10986
      OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
10987
      }
10988
10989
    /* Even if the match is empty, we need to reset the control head. */
10990
    if (needs_control_head)
10991
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10992
10993
    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10994
      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10995
10996
    if (!zero)
10997
      {
10998
      if (framesize < 0)
10999
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11000
      else
11001
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11002
      }
11003
    }
11004
11005
  JUMPTO(SLJIT_JUMP, loop);
11006
  flush_stubs(common);
11007
11008
  compile_backtrackingpath(common, backtrack->top);
11009
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11010
    return NULL;
11011
  set_jumps(backtrack->topbacktracks, LABEL());
11012
11013
  if (framesize < 0)
11014
    {
11015
    if (offset != 0)
11016
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11017
    else
11018
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11019
    }
11020
  else
11021
    {
11022
    if (offset != 0)
11023
      {
11024
      /* Last alternative. */
11025
      if (*cc == OP_KETRPOS)
11026
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11027
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11028
      }
11029
    else
11030
      {
11031
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11032
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11033
      }
11034
    }
11035
11036
  if (*cc == OP_KETRPOS)
11037
    break;
11038
  ccbegin = cc + 1 + LINK_SIZE;
11039
  }
11040
11041
/* We don't have to restore the control head in case of a failed match. */
11042
11043
backtrack->topbacktracks = NULL;
11044
if (!zero)
11045
  {
11046
  if (framesize < 0)
11047
    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11048
  else /* TMP2 is set to [private_data_ptr] above. */
11049
    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11050
  }
11051
11052
/* None of them matched. */
11053
set_jumps(emptymatch, LABEL());
11054
count_match(common);
11055
return cc + 1 + LINK_SIZE;
11056
}
11057
11058
static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11059
{
11060
int class_len;
11061
11062
*opcode = *cc;
11063
*exact = 0;
11064
11065
if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11066
  {
11067
  cc++;
11068
  *type = OP_CHAR;
11069
  }
11070
else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11071
  {
11072
  cc++;
11073
  *type = OP_CHARI;
11074
  *opcode -= OP_STARI - OP_STAR;
11075
  }
11076
else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11077
  {
11078
  cc++;
11079
  *type = OP_NOT;
11080
  *opcode -= OP_NOTSTAR - OP_STAR;
11081
  }
11082
else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11083
  {
11084
  cc++;
11085
  *type = OP_NOTI;
11086
  *opcode -= OP_NOTSTARI - OP_STAR;
11087
  }
11088
else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11089
  {
11090
  cc++;
11091
  *opcode -= OP_TYPESTAR - OP_STAR;
11092
  *type = OP_END;
11093
  }
11094
else
11095
  {
11096
  SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11097
  *type = *opcode;
11098
  cc++;
11099
  class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11100
  *opcode = cc[class_len - 1];
11101
11102
  if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11103
    {
11104
    *opcode -= OP_CRSTAR - OP_STAR;
11105
    *end = cc + class_len;
11106
11107
    if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11108
      {
11109
      *exact = 1;
11110
      *opcode -= OP_PLUS - OP_STAR;
11111
      }
11112
    }
11113
  else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11114
    {
11115
    *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11116
    *end = cc + class_len;
11117
11118
    if (*opcode == OP_POSPLUS)
11119
      {
11120
      *exact = 1;
11121
      *opcode = OP_POSSTAR;
11122
      }
11123
    }
11124
  else
11125
    {
11126
    SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11127
    *max = GET2(cc, (class_len + IMM2_SIZE));
11128
    *exact = GET2(cc, class_len);
11129
11130
    if (*max == 0)
11131
      {
11132
      if (*opcode == OP_CRPOSRANGE)
11133
        *opcode = OP_POSSTAR;
11134
      else
11135
        *opcode -= OP_CRRANGE - OP_STAR;
11136
      }
11137
    else
11138
      {
11139
      *max -= *exact;
11140
      if (*max == 0)
11141
        *opcode = OP_EXACT;
11142
      else if (*max == 1)
11143
        {
11144
        if (*opcode == OP_CRPOSRANGE)
11145
          *opcode = OP_POSQUERY;
11146
        else
11147
          *opcode -= OP_CRRANGE - OP_QUERY;
11148
        }
11149
      else
11150
        {
11151
        if (*opcode == OP_CRPOSRANGE)
11152
          *opcode = OP_POSUPTO;
11153
        else
11154
          *opcode -= OP_CRRANGE - OP_UPTO;
11155
        }
11156
      }
11157
    *end = cc + class_len + 2 * IMM2_SIZE;
11158
    }
11159
  return cc;
11160
  }
11161
11162
switch(*opcode)
11163
  {
11164
  case OP_EXACT:
11165
  *exact = GET2(cc, 0);
11166
  cc += IMM2_SIZE;
11167
  break;
11168
11169
  case OP_PLUS:
11170
  case OP_MINPLUS:
11171
  *exact = 1;
11172
  *opcode -= OP_PLUS - OP_STAR;
11173
  break;
11174
11175
  case OP_POSPLUS:
11176
  *exact = 1;
11177
  *opcode = OP_POSSTAR;
11178
  break;
11179
11180
  case OP_UPTO:
11181
  case OP_MINUPTO:
11182
  case OP_POSUPTO:
11183
  *max = GET2(cc, 0);
11184
  cc += IMM2_SIZE;
11185
  break;
11186
  }
11187
11188
if (*type == OP_END)
11189
  {
11190
  *type = *cc;
11191
  *end = next_opcode(common, cc);
11192
  cc++;
11193
  return cc;
11194
  }
11195
11196
*end = cc + 1;
11197
#ifdef SUPPORT_UNICODE
11198
if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11199
#endif
11200
return cc;
11201
}
11202
11203
static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11204
{
11205
DEFINE_COMPILER;
11206
backtrack_common *backtrack;
11207
PCRE2_UCHAR opcode;
11208
PCRE2_UCHAR type;
11209
sljit_u32 max = 0, exact;
11210
sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11211
sljit_s32 early_fail_type;
11212
BOOL charpos_enabled;
11213
PCRE2_UCHAR charpos_char;
11214
unsigned int charpos_othercasebit;
11215
PCRE2_SPTR end;
11216
jump_list *no_match = NULL;
11217
jump_list *no_char1_match = NULL;
11218
struct sljit_jump *jump = NULL;
11219
struct sljit_label *label;
11220
int private_data_ptr = PRIVATE_DATA(cc);
11221
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11222
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11223
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11224
int tmp_base, tmp_offset;
11225
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11226
BOOL use_tmp;
11227
#endif
11228
11229
PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11230
11231
early_fail_type = (early_fail_ptr & 0x7);
11232
early_fail_ptr >>= 3;
11233
11234
/* During recursion, these optimizations are disabled. */
11235
if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11236
  {
11237
  early_fail_ptr = 0;
11238
  early_fail_type = type_skip;
11239
  }
11240
11241
SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11242
  || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11243
11244
if (early_fail_type == type_fail)
11245
  add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11246
11247
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11248
11249
if (type != OP_EXTUNI)
11250
  {
11251
  tmp_base = TMP3;
11252
  tmp_offset = 0;
11253
  }
11254
else
11255
  {
11256
  tmp_base = SLJIT_MEM1(SLJIT_SP);
11257
  tmp_offset = POSSESSIVE0;
11258
  }
11259
11260
/* Handle fixed part first. */
11261
if (exact > 1)
11262
  {
11263
  SLJIT_ASSERT(early_fail_ptr == 0);
11264
11265
  if (common->mode == PCRE2_JIT_COMPLETE
11266
#ifdef SUPPORT_UNICODE
11267
      && !common->utf
11268
#endif
11269
      && type != OP_ANYNL && type != OP_EXTUNI)
11270
    {
11271
    OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11272
    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11273
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11274
    label = LABEL();
11275
    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11276
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11277
    JUMPTO(SLJIT_NOT_ZERO, label);
11278
    }
11279
  else
11280
    {
11281
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11282
    label = LABEL();
11283
    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11284
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11285
    JUMPTO(SLJIT_NOT_ZERO, label);
11286
    }
11287
  }
11288
else if (exact == 1)
11289
  {
11290
  compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11291
11292
  if (early_fail_type == type_fail_range)
11293
    {
11294
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11295
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
11296
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11297
    OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11298
    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11299
11300
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
11301
    }
11302
  }
11303
11304
switch(opcode)
11305
  {
11306
  case OP_STAR:
11307
  case OP_UPTO:
11308
  SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11309
11310
  if (type == OP_ANYNL || type == OP_EXTUNI)
11311
    {
11312
    SLJIT_ASSERT(private_data_ptr == 0);
11313
    SLJIT_ASSERT(early_fail_ptr == 0);
11314
11315
    allocate_stack(common, 2);
11316
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11317
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11318
11319
    if (opcode == OP_UPTO)
11320
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11321
11322
    label = LABEL();
11323
    compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11324
    if (opcode == OP_UPTO)
11325
      {
11326
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11327
      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11328
      jump = JUMP(SLJIT_ZERO);
11329
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11330
      }
11331
11332
    /* We cannot use TMP3 because of allocate_stack. */
11333
    allocate_stack(common, 1);
11334
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11335
    JUMPTO(SLJIT_JUMP, label);
11336
    if (jump != NULL)
11337
      JUMPHERE(jump);
11338
    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11339
    break;
11340
    }
11341
#ifdef SUPPORT_UNICODE
11342
  else if (type == OP_ALLANY && !common->invalid_utf)
11343
#else
11344
  else if (type == OP_ALLANY)
11345
#endif
11346
    {
11347
    if (opcode == OP_STAR)
11348
      {
11349
      if (private_data_ptr == 0)
11350
        allocate_stack(common, 2);
11351
11352
      OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11353
      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11354
11355
      OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11356
      process_partial_match(common);
11357
11358
      if (early_fail_ptr != 0)
11359
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11360
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11361
      break;
11362
      }
11363
#ifdef SUPPORT_UNICODE
11364
    else if (!common->utf)
11365
#else
11366
    else
11367
#endif
11368
      {
11369
      if (private_data_ptr == 0)
11370
        allocate_stack(common, 2);
11371
11372
      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11373
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11374
11375
      if (common->mode == PCRE2_JIT_COMPLETE)
11376
        {
11377
        OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11378
        CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11379
        }
11380
      else
11381
        {
11382
        jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11383
        process_partial_match(common);
11384
        JUMPHERE(jump);
11385
        }
11386
11387
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11388
11389
      if (early_fail_ptr != 0)
11390
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11391
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11392
      break;
11393
      }
11394
    }
11395
11396
  charpos_enabled = FALSE;
11397
  charpos_char = 0;
11398
  charpos_othercasebit = 0;
11399
11400
  if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11401
    {
11402
#ifdef SUPPORT_UNICODE
11403
    charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11404
#else
11405
    charpos_enabled = TRUE;
11406
#endif
11407
    if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11408
      {
11409
      charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11410
      if (charpos_othercasebit == 0)
11411
        charpos_enabled = FALSE;
11412
      }
11413
11414
    if (charpos_enabled)
11415
      {
11416
      charpos_char = end[1];
11417
      /* Consume the OP_CHAR opcode. */
11418
      end += 2;
11419
#if PCRE2_CODE_UNIT_WIDTH == 8
11420
      SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11421
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11422
      SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11423
      if ((charpos_othercasebit & 0x100) != 0)
11424
        charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11425
#endif
11426
      if (charpos_othercasebit != 0)
11427
        charpos_char |= charpos_othercasebit;
11428
11429
      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11430
      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11431
      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11432
      }
11433
    }
11434
11435
  if (charpos_enabled)
11436
    {
11437
    if (opcode == OP_UPTO)
11438
      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11439
11440
    /* Search the first instance of charpos_char. */
11441
    jump = JUMP(SLJIT_JUMP);
11442
    label = LABEL();
11443
    if (opcode == OP_UPTO)
11444
      {
11445
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11446
      add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11447
      }
11448
    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11449
    if (early_fail_ptr != 0)
11450
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11451
    JUMPHERE(jump);
11452
11453
    detect_partial_match(common, &backtrack->topbacktracks);
11454
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11455
    if (charpos_othercasebit != 0)
11456
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11457
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11458
11459
    if (private_data_ptr == 0)
11460
      allocate_stack(common, 2);
11461
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11462
    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11463
11464
    if (opcode == OP_UPTO)
11465
      {
11466
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11467
      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11468
      }
11469
11470
    /* Search the last instance of charpos_char. */
11471
    label = LABEL();
11472
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11473
    if (early_fail_ptr != 0)
11474
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11475
    detect_partial_match(common, &no_match);
11476
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11477
    if (charpos_othercasebit != 0)
11478
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11479
11480
    if (opcode == OP_STAR)
11481
      {
11482
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11483
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11484
      JUMPTO(SLJIT_JUMP, label);
11485
      }
11486
    else
11487
      {
11488
      jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11489
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11490
      JUMPHERE(jump);
11491
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11492
      JUMPTO(SLJIT_NOT_ZERO, label);
11493
      }
11494
11495
    set_jumps(no_match, LABEL());
11496
    OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11497
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11498
    }
11499
  else
11500
    {
11501
    if (private_data_ptr == 0)
11502
      allocate_stack(common, 2);
11503
11504
    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11505
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11506
    use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11507
    SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11508
11509
    if (common->utf)
11510
      OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11511
#endif
11512
    if (opcode == OP_UPTO)
11513
      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11514
11515
    detect_partial_match(common, &no_match);
11516
    label = LABEL();
11517
    compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11518
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11519
    if (common->utf)
11520
      OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11521
#endif
11522
11523
    if (opcode == OP_UPTO)
11524
      {
11525
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11526
      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11527
      }
11528
11529
    detect_partial_match_to(common, label);
11530
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11531
11532
    set_jumps(no_char1_match, LABEL());
11533
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11534
    if (common->utf)
11535
      {
11536
      set_jumps(no_match, LABEL());
11537
      if (use_tmp)
11538
        {
11539
        OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11540
        OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11541
        }
11542
      else
11543
        OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11544
      }
11545
    else
11546
#endif
11547
      {
11548
      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11549
      set_jumps(no_match, LABEL());
11550
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11551
      }
11552
11553
    if (early_fail_ptr != 0)
11554
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11555
    }
11556
11557
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11558
  break;
11559
11560
  case OP_MINSTAR:
11561
  if (private_data_ptr == 0)
11562
    allocate_stack(common, 1);
11563
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11564
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11565
  if (early_fail_ptr != 0)
11566
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11567
  break;
11568
11569
  case OP_MINUPTO:
11570
  SLJIT_ASSERT(early_fail_ptr == 0);
11571
  if (private_data_ptr == 0)
11572
    allocate_stack(common, 2);
11573
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11574
  OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11575
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11576
  break;
11577
11578
  case OP_QUERY:
11579
  case OP_MINQUERY:
11580
  SLJIT_ASSERT(early_fail_ptr == 0);
11581
  if (private_data_ptr == 0)
11582
    allocate_stack(common, 1);
11583
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11584
  if (opcode == OP_QUERY)
11585
    compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11586
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11587
  break;
11588
11589
  case OP_EXACT:
11590
  break;
11591
11592
  case OP_POSSTAR:
11593
#if defined SUPPORT_UNICODE
11594
  if (type == OP_ALLANY && !common->invalid_utf)
11595
#else
11596
  if (type == OP_ALLANY)
11597
#endif
11598
    {
11599
    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11600
    process_partial_match(common);
11601
    if (early_fail_ptr != 0)
11602
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11603
    break;
11604
    }
11605
11606
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11607
  if (common->utf)
11608
    {
11609
    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11610
    detect_partial_match(common, &no_match);
11611
    label = LABEL();
11612
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11613
    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11614
    detect_partial_match_to(common, label);
11615
11616
    set_jumps(no_match, LABEL());
11617
    OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11618
    if (early_fail_ptr != 0)
11619
      {
11620
      if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11621
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11622
      else
11623
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11624
      }
11625
    break;
11626
    }
11627
#endif
11628
11629
  detect_partial_match(common, &no_match);
11630
  label = LABEL();
11631
  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11632
  detect_partial_match_to(common, label);
11633
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11634
11635
  set_jumps(no_char1_match, LABEL());
11636
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11637
  set_jumps(no_match, LABEL());
11638
  if (early_fail_ptr != 0)
11639
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11640
  break;
11641
11642
  case OP_POSUPTO:
11643
  SLJIT_ASSERT(early_fail_ptr == 0);
11644
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11645
  if (common->utf)
11646
    {
11647
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11648
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11649
11650
    detect_partial_match(common, &no_match);
11651
    label = LABEL();
11652
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11653
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11654
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11655
    add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11656
    detect_partial_match_to(common, label);
11657
11658
    set_jumps(no_match, LABEL());
11659
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11660
    break;
11661
    }
11662
#endif
11663
11664
  if (type == OP_ALLANY)
11665
    {
11666
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11667
11668
    if (common->mode == PCRE2_JIT_COMPLETE)
11669
      {
11670
      OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11671
      CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11672
      }
11673
    else
11674
      {
11675
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11676
      process_partial_match(common);
11677
      JUMPHERE(jump);
11678
      }
11679
    break;
11680
    }
11681
11682
  OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11683
11684
  detect_partial_match(common, &no_match);
11685
  label = LABEL();
11686
  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11687
  OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11688
  add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11689
  detect_partial_match_to(common, label);
11690
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11691
11692
  set_jumps(no_char1_match, LABEL());
11693
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11694
  set_jumps(no_match, LABEL());
11695
  break;
11696
11697
  case OP_POSQUERY:
11698
  SLJIT_ASSERT(early_fail_ptr == 0);
11699
  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11700
  compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11701
  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11702
  set_jumps(no_match, LABEL());
11703
  OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11704
  break;
11705
11706
  default:
11707
  SLJIT_UNREACHABLE();
11708
  break;
11709
  }
11710
11711
count_match(common);
11712
return end;
11713
}
11714
11715
static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11716
{
11717
DEFINE_COMPILER;
11718
backtrack_common *backtrack;
11719
11720
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11721
11722
if (*cc == OP_FAIL)
11723
  {
11724
  add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11725
  return cc + 1;
11726
  }
11727
11728
if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11729
  add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11730
11731
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11732
  {
11733
  /* No need to check notempty conditions. */
11734
  if (common->accept_label == NULL)
11735
    add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11736
  else
11737
    JUMPTO(SLJIT_JUMP, common->accept_label);
11738
  return cc + 1;
11739
  }
11740
11741
if (common->accept_label == NULL)
11742
  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11743
else
11744
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11745
11746
if (HAS_VIRTUAL_REGISTERS)
11747
  {
11748
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11749
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11750
  }
11751
else
11752
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11753
11754
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11755
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11756
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11757
if (common->accept_label == NULL)
11758
  add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11759
else
11760
  JUMPTO(SLJIT_ZERO, common->accept_label);
11761
11762
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11763
if (common->accept_label == NULL)
11764
  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11765
else
11766
  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11767
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11768
return cc + 1;
11769
}
11770
11771
static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11772
{
11773
DEFINE_COMPILER;
11774
int offset = GET2(cc, 1);
11775
BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
11776
11777
/* Data will be discarded anyway... */
11778
if (common->currententry != NULL)
11779
  return cc + 1 + IMM2_SIZE;
11780
11781
if (!optimized_cbracket)
11782
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11783
offset <<= 1;
11784
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11785
if (!optimized_cbracket)
11786
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11787
return cc + 1 + IMM2_SIZE;
11788
}
11789
11790
static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11791
{
11792
DEFINE_COMPILER;
11793
backtrack_common *backtrack;
11794
PCRE2_UCHAR opcode = *cc;
11795
PCRE2_SPTR ccend = cc + 1;
11796
11797
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11798
    opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11799
  ccend += 2 + cc[1];
11800
11801
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11802
11803
if (opcode == OP_SKIP)
11804
  {
11805
  allocate_stack(common, 1);
11806
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11807
  return ccend;
11808
  }
11809
11810
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11811
  {
11812
  if (HAS_VIRTUAL_REGISTERS)
11813
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11814
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11815
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11816
  OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11817
  }
11818
11819
return ccend;
11820
}
11821
11822
static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11823
11824
static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11825
{
11826
DEFINE_COMPILER;
11827
backtrack_common *backtrack;
11828
BOOL needs_control_head;
11829
int size;
11830
11831
PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11832
common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11833
BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11834
BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11835
BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11836
11837
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11838
size = 3 + (size < 0 ? 0 : size);
11839
11840
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11841
allocate_stack(common, size);
11842
if (size > 3)
11843
  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11844
else
11845
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11846
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11847
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11848
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11849
11850
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11851
if (size >= 0)
11852
  init_frame(common, cc, ccend, size - 1, 0);
11853
}
11854
11855
static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11856
{
11857
DEFINE_COMPILER;
11858
backtrack_common *backtrack;
11859
BOOL has_then_trap = FALSE;
11860
then_trap_backtrack *save_then_trap = NULL;
11861
11862
SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11863
11864
if (common->has_then && common->then_offsets[cc - common->start] != 0)
11865
  {
11866
  SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11867
  has_then_trap = TRUE;
11868
  save_then_trap = common->then_trap;
11869
  /* Tail item on backtrack. */
11870
  compile_then_trap_matchingpath(common, cc, ccend, parent);
11871
  }
11872
11873
while (cc < ccend)
11874
  {
11875
  switch(*cc)
11876
    {
11877
    case OP_SOD:
11878
    case OP_SOM:
11879
    case OP_NOT_WORD_BOUNDARY:
11880
    case OP_WORD_BOUNDARY:
11881
    case OP_EODN:
11882
    case OP_EOD:
11883
    case OP_DOLL:
11884
    case OP_DOLLM:
11885
    case OP_CIRC:
11886
    case OP_CIRCM:
11887
    case OP_REVERSE:
11888
    cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11889
    break;
11890
11891
    case OP_NOT_DIGIT:
11892
    case OP_DIGIT:
11893
    case OP_NOT_WHITESPACE:
11894
    case OP_WHITESPACE:
11895
    case OP_NOT_WORDCHAR:
11896
    case OP_WORDCHAR:
11897
    case OP_ANY:
11898
    case OP_ALLANY:
11899
    case OP_ANYBYTE:
11900
    case OP_NOTPROP:
11901
    case OP_PROP:
11902
    case OP_ANYNL:
11903
    case OP_NOT_HSPACE:
11904
    case OP_HSPACE:
11905
    case OP_NOT_VSPACE:
11906
    case OP_VSPACE:
11907
    case OP_EXTUNI:
11908
    case OP_NOT:
11909
    case OP_NOTI:
11910
    cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11911
    break;
11912
11913
    case OP_SET_SOM:
11914
    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11915
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11916
    allocate_stack(common, 1);
11917
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11918
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11919
    cc++;
11920
    break;
11921
11922
    case OP_CHAR:
11923
    case OP_CHARI:
11924
    if (common->mode == PCRE2_JIT_COMPLETE)
11925
      cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11926
    else
11927
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11928
    break;
11929
11930
    case OP_STAR:
11931
    case OP_MINSTAR:
11932
    case OP_PLUS:
11933
    case OP_MINPLUS:
11934
    case OP_QUERY:
11935
    case OP_MINQUERY:
11936
    case OP_UPTO:
11937
    case OP_MINUPTO:
11938
    case OP_EXACT:
11939
    case OP_POSSTAR:
11940
    case OP_POSPLUS:
11941
    case OP_POSQUERY:
11942
    case OP_POSUPTO:
11943
    case OP_STARI:
11944
    case OP_MINSTARI:
11945
    case OP_PLUSI:
11946
    case OP_MINPLUSI:
11947
    case OP_QUERYI:
11948
    case OP_MINQUERYI:
11949
    case OP_UPTOI:
11950
    case OP_MINUPTOI:
11951
    case OP_EXACTI:
11952
    case OP_POSSTARI:
11953
    case OP_POSPLUSI:
11954
    case OP_POSQUERYI:
11955
    case OP_POSUPTOI:
11956
    case OP_NOTSTAR:
11957
    case OP_NOTMINSTAR:
11958
    case OP_NOTPLUS:
11959
    case OP_NOTMINPLUS:
11960
    case OP_NOTQUERY:
11961
    case OP_NOTMINQUERY:
11962
    case OP_NOTUPTO:
11963
    case OP_NOTMINUPTO:
11964
    case OP_NOTEXACT:
11965
    case OP_NOTPOSSTAR:
11966
    case OP_NOTPOSPLUS:
11967
    case OP_NOTPOSQUERY:
11968
    case OP_NOTPOSUPTO:
11969
    case OP_NOTSTARI:
11970
    case OP_NOTMINSTARI:
11971
    case OP_NOTPLUSI:
11972
    case OP_NOTMINPLUSI:
11973
    case OP_NOTQUERYI:
11974
    case OP_NOTMINQUERYI:
11975
    case OP_NOTUPTOI:
11976
    case OP_NOTMINUPTOI:
11977
    case OP_NOTEXACTI:
11978
    case OP_NOTPOSSTARI:
11979
    case OP_NOTPOSPLUSI:
11980
    case OP_NOTPOSQUERYI:
11981
    case OP_NOTPOSUPTOI:
11982
    case OP_TYPESTAR:
11983
    case OP_TYPEMINSTAR:
11984
    case OP_TYPEPLUS:
11985
    case OP_TYPEMINPLUS:
11986
    case OP_TYPEQUERY:
11987
    case OP_TYPEMINQUERY:
11988
    case OP_TYPEUPTO:
11989
    case OP_TYPEMINUPTO:
11990
    case OP_TYPEEXACT:
11991
    case OP_TYPEPOSSTAR:
11992
    case OP_TYPEPOSPLUS:
11993
    case OP_TYPEPOSQUERY:
11994
    case OP_TYPEPOSUPTO:
11995
    cc = compile_iterator_matchingpath(common, cc, parent);
11996
    break;
11997
11998
    case OP_CLASS:
11999
    case OP_NCLASS:
12000
    if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12001
      cc = compile_iterator_matchingpath(common, cc, parent);
12002
    else
12003
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12004
    break;
12005
12006
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12007
    case OP_XCLASS:
12008
    if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12009
      cc = compile_iterator_matchingpath(common, cc, parent);
12010
    else
12011
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12012
    break;
12013
#endif
12014
12015
    case OP_REF:
12016
    case OP_REFI:
12017
    if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12018
      cc = compile_ref_iterator_matchingpath(common, cc, parent);
12019
    else
12020
      {
12021
      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12022
      cc += 1 + IMM2_SIZE;
12023
      }
12024
    break;
12025
12026
    case OP_DNREF:
12027
    case OP_DNREFI:
12028
    if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12029
      cc = compile_ref_iterator_matchingpath(common, cc, parent);
12030
    else
12031
      {
12032
      compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12033
      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12034
      cc += 1 + 2 * IMM2_SIZE;
12035
      }
12036
    break;
12037
12038
    case OP_RECURSE:
12039
    cc = compile_recurse_matchingpath(common, cc, parent);
12040
    break;
12041
12042
    case OP_CALLOUT:
12043
    case OP_CALLOUT_STR:
12044
    cc = compile_callout_matchingpath(common, cc, parent);
12045
    break;
12046
12047
    case OP_ASSERT:
12048
    case OP_ASSERT_NOT:
12049
    case OP_ASSERTBACK:
12050
    case OP_ASSERTBACK_NOT:
12051
    PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12052
    cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12053
    break;
12054
12055
    case OP_BRAMINZERO:
12056
    PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12057
    cc = bracketend(cc + 1);
12058
    if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12059
      {
12060
      allocate_stack(common, 1);
12061
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12062
      }
12063
    else
12064
      {
12065
      allocate_stack(common, 2);
12066
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12067
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12068
      }
12069
    BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12070
    count_match(common);
12071
    break;
12072
12073
    case OP_ASSERT_NA:
12074
    case OP_ASSERTBACK_NA:
12075
    case OP_ONCE:
12076
    case OP_SCRIPT_RUN:
12077
    case OP_BRA:
12078
    case OP_CBRA:
12079
    case OP_COND:
12080
    case OP_SBRA:
12081
    case OP_SCBRA:
12082
    case OP_SCOND:
12083
    cc = compile_bracket_matchingpath(common, cc, parent);
12084
    break;
12085
12086
    case OP_BRAZERO:
12087
    if (cc[1] > OP_ASSERTBACK_NOT)
12088
      cc = compile_bracket_matchingpath(common, cc, parent);
12089
    else
12090
      {
12091
      PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12092
      cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12093
      }
12094
    break;
12095
12096
    case OP_BRAPOS:
12097
    case OP_CBRAPOS:
12098
    case OP_SBRAPOS:
12099
    case OP_SCBRAPOS:
12100
    case OP_BRAPOSZERO:
12101
    cc = compile_bracketpos_matchingpath(common, cc, parent);
12102
    break;
12103
12104
    case OP_MARK:
12105
    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12106
    SLJIT_ASSERT(common->mark_ptr != 0);
12107
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12108
    allocate_stack(common, common->has_skip_arg ? 5 : 1);
12109
    if (HAS_VIRTUAL_REGISTERS)
12110
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12111
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12112
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12113
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12114
    OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12115
    if (common->has_skip_arg)
12116
      {
12117
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12118
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12119
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12120
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12121
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12122
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12123
      }
12124
    cc += 1 + 2 + cc[1];
12125
    break;
12126
12127
    case OP_PRUNE:
12128
    case OP_PRUNE_ARG:
12129
    case OP_SKIP:
12130
    case OP_SKIP_ARG:
12131
    case OP_THEN:
12132
    case OP_THEN_ARG:
12133
    case OP_COMMIT:
12134
    case OP_COMMIT_ARG:
12135
    cc = compile_control_verb_matchingpath(common, cc, parent);
12136
    break;
12137
12138
    case OP_FAIL:
12139
    case OP_ACCEPT:
12140
    case OP_ASSERT_ACCEPT:
12141
    cc = compile_fail_accept_matchingpath(common, cc, parent);
12142
    break;
12143
12144
    case OP_CLOSE:
12145
    cc = compile_close_matchingpath(common, cc);
12146
    break;
12147
12148
    case OP_SKIPZERO:
12149
    cc = bracketend(cc + 1);
12150
    break;
12151
12152
    default:
12153
    SLJIT_UNREACHABLE();
12154
    return;
12155
    }
12156
  if (cc == NULL)
12157
    return;
12158
  }
12159
12160
if (has_then_trap)
12161
  {
12162
  /* Head item on backtrack. */
12163
  PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12164
  BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12165
  BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12166
  common->then_trap = save_then_trap;
12167
  }
12168
SLJIT_ASSERT(cc == ccend);
12169
}
12170
12171
#undef PUSH_BACKTRACK
12172
#undef PUSH_BACKTRACK_NOVALUE
12173
#undef BACKTRACK_AS
12174
12175
#define COMPILE_BACKTRACKINGPATH(current) \
12176
  do \
12177
    { \
12178
    compile_backtrackingpath(common, (current)); \
12179
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12180
      return; \
12181
    } \
12182
  while (0)
12183
12184
#define CURRENT_AS(type) ((type *)current)
12185
12186
static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12187
{
12188
DEFINE_COMPILER;
12189
PCRE2_SPTR cc = current->cc;
12190
PCRE2_UCHAR opcode;
12191
PCRE2_UCHAR type;
12192
sljit_u32 max = 0, exact;
12193
struct sljit_label *label = NULL;
12194
struct sljit_jump *jump = NULL;
12195
jump_list *jumplist = NULL;
12196
PCRE2_SPTR end;
12197
int private_data_ptr = PRIVATE_DATA(cc);
12198
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12199
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12200
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12201
12202
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12203
12204
switch(opcode)
12205
  {
12206
  case OP_STAR:
12207
  case OP_UPTO:
12208
  if (type == OP_ANYNL || type == OP_EXTUNI)
12209
    {
12210
    SLJIT_ASSERT(private_data_ptr == 0);
12211
    set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12212
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12213
    free_stack(common, 1);
12214
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12215
    }
12216
  else
12217
    {
12218
    if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12219
      {
12220
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12221
      OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12222
      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12223
12224
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12225
      label = LABEL();
12226
      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12227
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12228
      if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12229
        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12230
      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12231
      move_back(common, NULL, TRUE);
12232
      CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12233
      }
12234
    else
12235
      {
12236
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12237
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12238
      move_back(common, NULL, TRUE);
12239
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12240
      JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12241
      }
12242
    JUMPHERE(jump);
12243
    if (private_data_ptr == 0)
12244
      free_stack(common, 2);
12245
    }
12246
  break;
12247
12248
  case OP_MINSTAR:
12249
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12250
  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12251
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12252
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12253
  set_jumps(jumplist, LABEL());
12254
  if (private_data_ptr == 0)
12255
    free_stack(common, 1);
12256
  break;
12257
12258
  case OP_MINUPTO:
12259
  OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12260
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12261
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12262
  add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12263
12264
  OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12265
  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12266
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12267
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12268
12269
  set_jumps(jumplist, LABEL());
12270
  if (private_data_ptr == 0)
12271
    free_stack(common, 2);
12272
  break;
12273
12274
  case OP_QUERY:
12275
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12276
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12277
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12278
  jump = JUMP(SLJIT_JUMP);
12279
  set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12280
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12281
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12282
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12283
  JUMPHERE(jump);
12284
  if (private_data_ptr == 0)
12285
    free_stack(common, 1);
12286
  break;
12287
12288
  case OP_MINQUERY:
12289
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12290
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12291
  jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12292
  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12293
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12294
  set_jumps(jumplist, LABEL());
12295
  JUMPHERE(jump);
12296
  if (private_data_ptr == 0)
12297
    free_stack(common, 1);
12298
  break;
12299
12300
  case OP_EXACT:
12301
  case OP_POSSTAR:
12302
  case OP_POSQUERY:
12303
  case OP_POSUPTO:
12304
  break;
12305
12306
  default:
12307
  SLJIT_UNREACHABLE();
12308
  break;
12309
  }
12310
12311
set_jumps(current->topbacktracks, LABEL());
12312
}
12313
12314
static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12315
{
12316
DEFINE_COMPILER;
12317
PCRE2_SPTR cc = current->cc;
12318
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12319
PCRE2_UCHAR type;
12320
12321
type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12322
12323
if ((type & 0x1) == 0)
12324
  {
12325
  /* Maximize case. */
12326
  set_jumps(current->topbacktracks, LABEL());
12327
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12328
  free_stack(common, 1);
12329
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12330
  return;
12331
  }
12332
12333
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12334
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12335
set_jumps(current->topbacktracks, LABEL());
12336
free_stack(common, ref ? 2 : 3);
12337
}
12338
12339
static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12340
{
12341
DEFINE_COMPILER;
12342
recurse_entry *entry;
12343
12344
if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12345
  {
12346
  entry = CURRENT_AS(recurse_backtrack)->entry;
12347
  if (entry->backtrack_label == NULL)
12348
    add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12349
  else
12350
    JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12351
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12352
  }
12353
else
12354
  compile_backtrackingpath(common, current->top);
12355
12356
set_jumps(current->topbacktracks, LABEL());
12357
}
12358
12359
static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12360
{
12361
DEFINE_COMPILER;
12362
PCRE2_SPTR cc = current->cc;
12363
PCRE2_UCHAR bra = OP_BRA;
12364
struct sljit_jump *brajump = NULL;
12365
12366
SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12367
if (*cc == OP_BRAZERO)
12368
  {
12369
  bra = *cc;
12370
  cc++;
12371
  }
12372
12373
if (bra == OP_BRAZERO)
12374
  {
12375
  SLJIT_ASSERT(current->topbacktracks == NULL);
12376
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12377
  }
12378
12379
if (CURRENT_AS(assert_backtrack)->framesize < 0)
12380
  {
12381
  set_jumps(current->topbacktracks, LABEL());
12382
12383
  if (bra == OP_BRAZERO)
12384
    {
12385
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12386
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12387
    free_stack(common, 1);
12388
    }
12389
  return;
12390
  }
12391
12392
if (bra == OP_BRAZERO)
12393
  {
12394
  if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12395
    {
12396
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12397
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12398
    free_stack(common, 1);
12399
    return;
12400
    }
12401
  free_stack(common, 1);
12402
  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12403
  }
12404
12405
if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12406
  {
12407
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12408
  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12409
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12410
  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12411
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12412
12413
  set_jumps(current->topbacktracks, LABEL());
12414
  }
12415
else
12416
  set_jumps(current->topbacktracks, LABEL());
12417
12418
if (bra == OP_BRAZERO)
12419
  {
12420
  /* We know there is enough place on the stack. */
12421
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12422
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12423
  JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12424
  JUMPHERE(brajump);
12425
  }
12426
}
12427
12428
static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12429
{
12430
DEFINE_COMPILER;
12431
int opcode, stacksize, alt_count, alt_max;
12432
int offset = 0;
12433
int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12434
int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12435
PCRE2_SPTR cc = current->cc;
12436
PCRE2_SPTR ccbegin;
12437
PCRE2_SPTR ccprev;
12438
PCRE2_UCHAR bra = OP_BRA;
12439
PCRE2_UCHAR ket;
12440
assert_backtrack *assert;
12441
BOOL has_alternatives;
12442
BOOL needs_control_head = FALSE;
12443
struct sljit_jump *brazero = NULL;
12444
struct sljit_jump *next_alt = NULL;
12445
struct sljit_jump *once = NULL;
12446
struct sljit_jump *cond = NULL;
12447
struct sljit_label *rmin_label = NULL;
12448
struct sljit_label *exact_label = NULL;
12449
struct sljit_put_label *put_label = NULL;
12450
12451
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12452
  {
12453
  bra = *cc;
12454
  cc++;
12455
  }
12456
12457
opcode = *cc;
12458
ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12459
ket = *ccbegin;
12460
if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12461
  {
12462
  repeat_ptr = PRIVATE_DATA(ccbegin);
12463
  repeat_type = PRIVATE_DATA(ccbegin + 2);
12464
  repeat_count = PRIVATE_DATA(ccbegin + 3);
12465
  SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12466
  if (repeat_type == OP_UPTO)
12467
    ket = OP_KETRMAX;
12468
  if (repeat_type == OP_MINUPTO)
12469
    ket = OP_KETRMIN;
12470
  }
12471
ccbegin = cc;
12472
cc += GET(cc, 1);
12473
has_alternatives = *cc == OP_ALT;
12474
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12475
  has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12476
if (opcode == OP_CBRA || opcode == OP_SCBRA)
12477
  offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12478
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12479
  opcode = OP_SCOND;
12480
12481
alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12482
12483
/* Decoding the needs_control_head in framesize. */
12484
if (opcode == OP_ONCE)
12485
  {
12486
  needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12487
  CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12488
  }
12489
12490
if (ket != OP_KET && repeat_type != 0)
12491
  {
12492
  /* TMP1 is used in OP_KETRMIN below. */
12493
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12494
  free_stack(common, 1);
12495
  if (repeat_type == OP_UPTO)
12496
    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12497
  else
12498
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12499
  }
12500
12501
if (ket == OP_KETRMAX)
12502
  {
12503
  if (bra == OP_BRAZERO)
12504
    {
12505
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12506
    free_stack(common, 1);
12507
    brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12508
    }
12509
  }
12510
else if (ket == OP_KETRMIN)
12511
  {
12512
  if (bra != OP_BRAMINZERO)
12513
    {
12514
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12515
    if (repeat_type != 0)
12516
      {
12517
      /* TMP1 was set a few lines above. */
12518
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12519
      /* Drop STR_PTR for non-greedy plus quantifier. */
12520
      if (opcode != OP_ONCE)
12521
        free_stack(common, 1);
12522
      }
12523
    else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12524
      {
12525
      /* Checking zero-length iteration. */
12526
      if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12527
        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12528
      else
12529
        {
12530
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12531
        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12532
        }
12533
      /* Drop STR_PTR for non-greedy plus quantifier. */
12534
      if (opcode != OP_ONCE)
12535
        free_stack(common, 1);
12536
      }
12537
    else
12538
      JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12539
    }
12540
  rmin_label = LABEL();
12541
  if (repeat_type != 0)
12542
    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12543
  }
12544
else if (bra == OP_BRAZERO)
12545
  {
12546
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12547
  free_stack(common, 1);
12548
  brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12549
  }
12550
else if (repeat_type == OP_EXACT)
12551
  {
12552
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12553
  exact_label = LABEL();
12554
  }
12555
12556
if (offset != 0)
12557
  {
12558
  if (common->capture_last_ptr != 0)
12559
    {
12560
    SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12561
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12562
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12563
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12564
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12565
    free_stack(common, 3);
12566
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12567
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12568
    }
12569
  else if (common->optimized_cbracket[offset >> 1] == 0)
12570
    {
12571
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12572
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12573
    free_stack(common, 2);
12574
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12575
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12576
    }
12577
  }
12578
12579
if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12580
  {
12581
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12582
    {
12583
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12584
    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12585
    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12586
    }
12587
  once = JUMP(SLJIT_JUMP);
12588
  }
12589
else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12590
  {
12591
  if (has_alternatives)
12592
    {
12593
    /* Always exactly one alternative. */
12594
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12595
    free_stack(common, 1);
12596
12597
    alt_max = 2;
12598
    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12599
    }
12600
  }
12601
else if (has_alternatives)
12602
  {
12603
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12604
  free_stack(common, 1);
12605
12606
  if (alt_max > 3)
12607
    {
12608
    sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12609
12610
    SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12611
    sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12612
    sljit_emit_op0(compiler, SLJIT_ENDBR);
12613
    }
12614
  else
12615
    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12616
  }
12617
12618
COMPILE_BACKTRACKINGPATH(current->top);
12619
if (current->topbacktracks)
12620
  set_jumps(current->topbacktracks, LABEL());
12621
12622
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12623
  {
12624
  /* Conditional block always has at most one alternative. */
12625
  if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12626
    {
12627
    SLJIT_ASSERT(has_alternatives);
12628
    assert = CURRENT_AS(bracket_backtrack)->u.assert;
12629
    if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12630
      {
12631
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12632
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12633
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12634
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12635
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12636
      }
12637
    cond = JUMP(SLJIT_JUMP);
12638
    set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12639
    }
12640
  else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12641
    {
12642
    SLJIT_ASSERT(has_alternatives);
12643
    cond = JUMP(SLJIT_JUMP);
12644
    set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12645
    }
12646
  else
12647
    SLJIT_ASSERT(!has_alternatives);
12648
  }
12649
12650
if (has_alternatives)
12651
  {
12652
  alt_count = 1;
12653
  do
12654
    {
12655
    current->top = NULL;
12656
    current->topbacktracks = NULL;
12657
    current->nextbacktracks = NULL;
12658
    /* Conditional blocks always have an additional alternative, even if it is empty. */
12659
    if (*cc == OP_ALT)
12660
      {
12661
      ccprev = cc + 1 + LINK_SIZE;
12662
      cc += GET(cc, 1);
12663
      if (opcode != OP_COND && opcode != OP_SCOND)
12664
        {
12665
        if (opcode != OP_ONCE)
12666
          {
12667
          if (private_data_ptr != 0)
12668
            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12669
          else
12670
            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12671
          }
12672
        else
12673
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12674
        }
12675
      compile_matchingpath(common, ccprev, cc, current);
12676
      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12677
        return;
12678
12679
      if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12680
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12681
12682
      if (opcode == OP_SCRIPT_RUN)
12683
        match_script_run_common(common, private_data_ptr, current);
12684
      }
12685
12686
    /* Instructions after the current alternative is successfully matched. */
12687
    /* There is a similar code in compile_bracket_matchingpath. */
12688
    if (opcode == OP_ONCE)
12689
      match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12690
12691
    stacksize = 0;
12692
    if (repeat_type == OP_MINUPTO)
12693
      {
12694
      /* We need to preserve the counter. TMP2 will be used below. */
12695
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12696
      stacksize++;
12697
      }
12698
    if (ket != OP_KET || bra != OP_BRA)
12699
      stacksize++;
12700
    if (offset != 0)
12701
      {
12702
      if (common->capture_last_ptr != 0)
12703
        stacksize++;
12704
      if (common->optimized_cbracket[offset >> 1] == 0)
12705
        stacksize += 2;
12706
      }
12707
    if (opcode != OP_ONCE)
12708
      stacksize++;
12709
12710
    if (stacksize > 0)
12711
      allocate_stack(common, stacksize);
12712
12713
    stacksize = 0;
12714
    if (repeat_type == OP_MINUPTO)
12715
      {
12716
      /* TMP2 was set above. */
12717
      OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12718
      stacksize++;
12719
      }
12720
12721
    if (ket != OP_KET || bra != OP_BRA)
12722
      {
12723
      if (ket != OP_KET)
12724
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12725
      else
12726
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12727
      stacksize++;
12728
      }
12729
12730
    if (offset != 0)
12731
      stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12732
12733
    if (opcode != OP_ONCE)
12734
      {
12735
      if (alt_max <= 3)
12736
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12737
      else
12738
        put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12739
      }
12740
12741
    if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12742
      {
12743
      /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12744
      SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12745
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12746
      }
12747
12748
    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12749
12750
    if (opcode != OP_ONCE)
12751
      {
12752
      if (alt_max <= 3)
12753
        {
12754
        JUMPHERE(next_alt);
12755
        alt_count++;
12756
        if (alt_count < alt_max)
12757
          {
12758
          SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12759
          next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12760
          }
12761
        }
12762
      else
12763
        {
12764
        sljit_set_put_label(put_label, LABEL());
12765
        sljit_emit_op0(compiler, SLJIT_ENDBR);
12766
        }
12767
      }
12768
12769
    COMPILE_BACKTRACKINGPATH(current->top);
12770
    if (current->topbacktracks)
12771
      set_jumps(current->topbacktracks, LABEL());
12772
    SLJIT_ASSERT(!current->nextbacktracks);
12773
    }
12774
  while (*cc == OP_ALT);
12775
12776
  if (cond != NULL)
12777
    {
12778
    SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12779
    assert = CURRENT_AS(bracket_backtrack)->u.assert;
12780
    if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
12781
      {
12782
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12783
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12784
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12785
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12786
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12787
      }
12788
    JUMPHERE(cond);
12789
    }
12790
12791
  /* Free the STR_PTR. */
12792
  if (private_data_ptr == 0)
12793
    free_stack(common, 1);
12794
  }
12795
12796
if (offset != 0)
12797
  {
12798
  /* Using both tmp register is better for instruction scheduling. */
12799
  if (common->optimized_cbracket[offset >> 1] != 0)
12800
    {
12801
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12802
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12803
    free_stack(common, 2);
12804
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12805
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12806
    }
12807
  else
12808
    {
12809
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12810
    free_stack(common, 1);
12811
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12812
    }
12813
  }
12814
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12815
  {
12816
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12817
  free_stack(common, 1);
12818
  }
12819
else if (opcode == OP_ONCE)
12820
  {
12821
  cc = ccbegin + GET(ccbegin, 1);
12822
  stacksize = needs_control_head ? 1 : 0;
12823
12824
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12825
    {
12826
    /* Reset head and drop saved frame. */
12827
    stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12828
    }
12829
  else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12830
    {
12831
    /* The STR_PTR must be released. */
12832
    stacksize++;
12833
    }
12834
12835
  if (stacksize > 0)
12836
    free_stack(common, stacksize);
12837
12838
  JUMPHERE(once);
12839
  /* Restore previous private_data_ptr */
12840
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12841
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12842
  else if (ket == OP_KETRMIN)
12843
    {
12844
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12845
    /* See the comment below. */
12846
    free_stack(common, 2);
12847
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12848
    }
12849
  }
12850
12851
if (repeat_type == OP_EXACT)
12852
  {
12853
  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12854
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12855
  CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12856
  }
12857
else if (ket == OP_KETRMAX)
12858
  {
12859
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12860
  if (bra != OP_BRAZERO)
12861
    free_stack(common, 1);
12862
12863
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12864
  if (bra == OP_BRAZERO)
12865
    {
12866
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12867
    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12868
    JUMPHERE(brazero);
12869
    free_stack(common, 1);
12870
    }
12871
  }
12872
else if (ket == OP_KETRMIN)
12873
  {
12874
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12875
12876
  /* OP_ONCE removes everything in case of a backtrack, so we don't
12877
  need to explicitly release the STR_PTR. The extra release would
12878
  affect badly the free_stack(2) above. */
12879
  if (opcode != OP_ONCE)
12880
    free_stack(common, 1);
12881
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12882
  if (opcode == OP_ONCE)
12883
    free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12884
  else if (bra == OP_BRAMINZERO)
12885
    free_stack(common, 1);
12886
  }
12887
else if (bra == OP_BRAZERO)
12888
  {
12889
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12890
  JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12891
  JUMPHERE(brazero);
12892
  }
12893
}
12894
12895
static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12896
{
12897
DEFINE_COMPILER;
12898
int offset;
12899
struct sljit_jump *jump;
12900
12901
if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12902
  {
12903
  if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
12904
    {
12905
    offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
12906
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12907
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12908
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12909
    if (common->capture_last_ptr != 0)
12910
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12911
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12912
    if (common->capture_last_ptr != 0)
12913
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12914
    }
12915
  set_jumps(current->topbacktracks, LABEL());
12916
  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12917
  return;
12918
  }
12919
12920
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12921
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12922
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12923
12924
if (current->topbacktracks)
12925
  {
12926
  jump = JUMP(SLJIT_JUMP);
12927
  set_jumps(current->topbacktracks, LABEL());
12928
  /* Drop the stack frame. */
12929
  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12930
  JUMPHERE(jump);
12931
  }
12932
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
12933
}
12934
12935
static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12936
{
12937
assert_backtrack backtrack;
12938
12939
current->top = NULL;
12940
current->topbacktracks = NULL;
12941
current->nextbacktracks = NULL;
12942
if (current->cc[1] > OP_ASSERTBACK_NOT)
12943
  {
12944
  /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
12945
  compile_bracket_matchingpath(common, current->cc, current);
12946
  compile_bracket_backtrackingpath(common, current->top);
12947
  }
12948
else
12949
  {
12950
  memset(&backtrack, 0, sizeof(backtrack));
12951
  backtrack.common.cc = current->cc;
12952
  backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
12953
  /* Manual call of compile_assert_matchingpath. */
12954
  compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
12955
  }
12956
SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
12957
}
12958
12959
static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12960
{
12961
DEFINE_COMPILER;
12962
PCRE2_UCHAR opcode = *current->cc;
12963
struct sljit_label *loop;
12964
struct sljit_jump *jump;
12965
12966
if (opcode == OP_THEN || opcode == OP_THEN_ARG)
12967
  {
12968
  if (common->then_trap != NULL)
12969
    {
12970
    SLJIT_ASSERT(common->control_head_ptr != 0);
12971
12972
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12973
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
12974
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
12975
    jump = JUMP(SLJIT_JUMP);
12976
12977
    loop = LABEL();
12978
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12979
    JUMPHERE(jump);
12980
    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
12981
    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
12982
    add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
12983
    return;
12984
    }
12985
  else if (!common->local_quit_available && common->in_positive_assertion)
12986
    {
12987
    add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
12988
    return;
12989
    }
12990
  }
12991
12992
if (common->local_quit_available)
12993
  {
12994
  /* Abort match with a fail. */
12995
  if (common->quit_label == NULL)
12996
    add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12997
  else
12998
    JUMPTO(SLJIT_JUMP, common->quit_label);
12999
  return;
13000
  }
13001
13002
if (opcode == OP_SKIP_ARG)
13003
  {
13004
  SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13005
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13006
  OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13007
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
13008
13009
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13010
  add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13011
  return;
13012
  }
13013
13014
if (opcode == OP_SKIP)
13015
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13016
else
13017
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13018
add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13019
}
13020
13021
static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13022
{
13023
DEFINE_COMPILER;
13024
struct sljit_jump *jump;
13025
int size;
13026
13027
if (CURRENT_AS(then_trap_backtrack)->then_trap)
13028
  {
13029
  common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13030
  return;
13031
  }
13032
13033
size = CURRENT_AS(then_trap_backtrack)->framesize;
13034
size = 3 + (size < 0 ? 0 : size);
13035
13036
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13037
free_stack(common, size);
13038
jump = JUMP(SLJIT_JUMP);
13039
13040
set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13041
/* STACK_TOP is set by THEN. */
13042
if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13043
  {
13044
  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13045
  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13046
  }
13047
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13048
free_stack(common, 3);
13049
13050
JUMPHERE(jump);
13051
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13052
}
13053
13054
static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13055
{
13056
DEFINE_COMPILER;
13057
then_trap_backtrack *save_then_trap = common->then_trap;
13058
13059
while (current)
13060
  {
13061
  if (current->nextbacktracks != NULL)
13062
    set_jumps(current->nextbacktracks, LABEL());
13063
  switch(*current->cc)
13064
    {
13065
    case OP_SET_SOM:
13066
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13067
    free_stack(common, 1);
13068
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13069
    break;
13070
13071
    case OP_STAR:
13072
    case OP_MINSTAR:
13073
    case OP_PLUS:
13074
    case OP_MINPLUS:
13075
    case OP_QUERY:
13076
    case OP_MINQUERY:
13077
    case OP_UPTO:
13078
    case OP_MINUPTO:
13079
    case OP_EXACT:
13080
    case OP_POSSTAR:
13081
    case OP_POSPLUS:
13082
    case OP_POSQUERY:
13083
    case OP_POSUPTO:
13084
    case OP_STARI:
13085
    case OP_MINSTARI:
13086
    case OP_PLUSI:
13087
    case OP_MINPLUSI:
13088
    case OP_QUERYI:
13089
    case OP_MINQUERYI:
13090
    case OP_UPTOI:
13091
    case OP_MINUPTOI:
13092
    case OP_EXACTI:
13093
    case OP_POSSTARI:
13094
    case OP_POSPLUSI:
13095
    case OP_POSQUERYI:
13096
    case OP_POSUPTOI:
13097
    case OP_NOTSTAR:
13098
    case OP_NOTMINSTAR:
13099
    case OP_NOTPLUS:
13100
    case OP_NOTMINPLUS:
13101
    case OP_NOTQUERY:
13102
    case OP_NOTMINQUERY:
13103
    case OP_NOTUPTO:
13104
    case OP_NOTMINUPTO:
13105
    case OP_NOTEXACT:
13106
    case OP_NOTPOSSTAR:
13107
    case OP_NOTPOSPLUS:
13108
    case OP_NOTPOSQUERY:
13109
    case OP_NOTPOSUPTO:
13110
    case OP_NOTSTARI:
13111
    case OP_NOTMINSTARI:
13112
    case OP_NOTPLUSI:
13113
    case OP_NOTMINPLUSI:
13114
    case OP_NOTQUERYI:
13115
    case OP_NOTMINQUERYI:
13116
    case OP_NOTUPTOI:
13117
    case OP_NOTMINUPTOI:
13118
    case OP_NOTEXACTI:
13119
    case OP_NOTPOSSTARI:
13120
    case OP_NOTPOSPLUSI:
13121
    case OP_NOTPOSQUERYI:
13122
    case OP_NOTPOSUPTOI:
13123
    case OP_TYPESTAR:
13124
    case OP_TYPEMINSTAR:
13125
    case OP_TYPEPLUS:
13126
    case OP_TYPEMINPLUS:
13127
    case OP_TYPEQUERY:
13128
    case OP_TYPEMINQUERY:
13129
    case OP_TYPEUPTO:
13130
    case OP_TYPEMINUPTO:
13131
    case OP_TYPEEXACT:
13132
    case OP_TYPEPOSSTAR:
13133
    case OP_TYPEPOSPLUS:
13134
    case OP_TYPEPOSQUERY:
13135
    case OP_TYPEPOSUPTO:
13136
    case OP_CLASS:
13137
    case OP_NCLASS:
13138
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13139
    case OP_XCLASS:
13140
#endif
13141
    compile_iterator_backtrackingpath(common, current);
13142
    break;
13143
13144
    case OP_REF:
13145
    case OP_REFI:
13146
    case OP_DNREF:
13147
    case OP_DNREFI:
13148
    compile_ref_iterator_backtrackingpath(common, current);
13149
    break;
13150
13151
    case OP_RECURSE:
13152
    compile_recurse_backtrackingpath(common, current);
13153
    break;
13154
13155
    case OP_ASSERT:
13156
    case OP_ASSERT_NOT:
13157
    case OP_ASSERTBACK:
13158
    case OP_ASSERTBACK_NOT:
13159
    compile_assert_backtrackingpath(common, current);
13160
    break;
13161
13162
    case OP_ASSERT_NA:
13163
    case OP_ASSERTBACK_NA:
13164
    case OP_ONCE:
13165
    case OP_SCRIPT_RUN:
13166
    case OP_BRA:
13167
    case OP_CBRA:
13168
    case OP_COND:
13169
    case OP_SBRA:
13170
    case OP_SCBRA:
13171
    case OP_SCOND:
13172
    compile_bracket_backtrackingpath(common, current);
13173
    break;
13174
13175
    case OP_BRAZERO:
13176
    if (current->cc[1] > OP_ASSERTBACK_NOT)
13177
      compile_bracket_backtrackingpath(common, current);
13178
    else
13179
      compile_assert_backtrackingpath(common, current);
13180
    break;
13181
13182
    case OP_BRAPOS:
13183
    case OP_CBRAPOS:
13184
    case OP_SBRAPOS:
13185
    case OP_SCBRAPOS:
13186
    case OP_BRAPOSZERO:
13187
    compile_bracketpos_backtrackingpath(common, current);
13188
    break;
13189
13190
    case OP_BRAMINZERO:
13191
    compile_braminzero_backtrackingpath(common, current);
13192
    break;
13193
13194
    case OP_MARK:
13195
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13196
    if (common->has_skip_arg)
13197
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13198
    free_stack(common, common->has_skip_arg ? 5 : 1);
13199
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13200
    if (common->has_skip_arg)
13201
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13202
    break;
13203
13204
    case OP_THEN:
13205
    case OP_THEN_ARG:
13206
    case OP_PRUNE:
13207
    case OP_PRUNE_ARG:
13208
    case OP_SKIP:
13209
    case OP_SKIP_ARG:
13210
    compile_control_verb_backtrackingpath(common, current);
13211
    break;
13212
13213
    case OP_COMMIT:
13214
    case OP_COMMIT_ARG:
13215
    if (!common->local_quit_available)
13216
      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13217
    if (common->quit_label == NULL)
13218
      add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13219
    else
13220
      JUMPTO(SLJIT_JUMP, common->quit_label);
13221
    break;
13222
13223
    case OP_CALLOUT:
13224
    case OP_CALLOUT_STR:
13225
    case OP_FAIL:
13226
    case OP_ACCEPT:
13227
    case OP_ASSERT_ACCEPT:
13228
    set_jumps(current->topbacktracks, LABEL());
13229
    break;
13230
13231
    case OP_THEN_TRAP:
13232
    /* A virtual opcode for then traps. */
13233
    compile_then_trap_backtrackingpath(common, current);
13234
    break;
13235
13236
    default:
13237
    SLJIT_UNREACHABLE();
13238
    break;
13239
    }
13240
  current = current->prev;
13241
  }
13242
common->then_trap = save_then_trap;
13243
}
13244
13245
static SLJIT_INLINE void compile_recurse(compiler_common *common)
13246
{
13247
DEFINE_COMPILER;
13248
PCRE2_SPTR cc = common->start + common->currententry->start;
13249
PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13250
PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13251
BOOL needs_control_head;
13252
BOOL has_quit;
13253
BOOL has_accept;
13254
int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
13255
int alt_count, alt_max, local_size;
13256
backtrack_common altbacktrack;
13257
jump_list *match = NULL;
13258
struct sljit_jump *next_alt = NULL;
13259
struct sljit_jump *accept_exit = NULL;
13260
struct sljit_label *quit;
13261
struct sljit_put_label *put_label = NULL;
13262
13263
/* Recurse captures then. */
13264
common->then_trap = NULL;
13265
13266
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13267
13268
alt_max = no_alternatives(cc);
13269
alt_count = 0;
13270
13271
/* Matching path. */
13272
SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13273
common->currententry->entry_label = LABEL();
13274
set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13275
13276
sljit_emit_fast_enter(compiler, TMP2, 0);
13277
count_match(common);
13278
13279
local_size = (alt_max > 1) ? 2 : 1;
13280
13281
/* (Reversed) stack layout:
13282
   [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13283
13284
allocate_stack(common, private_data_size + local_size);
13285
/* Save return address. */
13286
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13287
13288
copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
13289
13290
/* This variable is saved and restored all time when we enter or exit from a recursive context. */
13291
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13292
13293
if (needs_control_head)
13294
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13295
13296
if (alt_max > 1)
13297
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13298
13299
memset(&altbacktrack, 0, sizeof(backtrack_common));
13300
common->quit_label = NULL;
13301
common->accept_label = NULL;
13302
common->quit = NULL;
13303
common->accept = NULL;
13304
altbacktrack.cc = ccbegin;
13305
cc += GET(cc, 1);
13306
while (1)
13307
  {
13308
  altbacktrack.top = NULL;
13309
  altbacktrack.topbacktracks = NULL;
13310
13311
  if (altbacktrack.cc != ccbegin)
13312
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13313
13314
  compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13315
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13316
    return;
13317
13318
  allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
13319
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13320
13321
  if (alt_max > 1 || has_accept)
13322
    {
13323
    if (alt_max > 3)
13324
      put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13325
    else
13326
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13327
    }
13328
13329
  add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13330
13331
  if (alt_count == 0)
13332
    {
13333
    /* Backtracking path entry. */
13334
    SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13335
    common->currententry->backtrack_label = LABEL();
13336
    set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13337
13338
    sljit_emit_fast_enter(compiler, TMP1, 0);
13339
13340
    if (has_accept)
13341
      accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13342
13343
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13344
    /* Save return address. */
13345
    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13346
13347
    copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13348
13349
    if (alt_max > 1)
13350
      {
13351
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13352
      free_stack(common, 2);
13353
13354
      if (alt_max > 3)
13355
        {
13356
        sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13357
        sljit_set_put_label(put_label, LABEL());
13358
        sljit_emit_op0(compiler, SLJIT_ENDBR);
13359
        }
13360
      else
13361
        next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13362
      }
13363
    else
13364
      free_stack(common, has_accept ? 2 : 1);
13365
    }
13366
  else if (alt_max > 3)
13367
    {
13368
    sljit_set_put_label(put_label, LABEL());
13369
    sljit_emit_op0(compiler, SLJIT_ENDBR);
13370
    }
13371
  else
13372
    {
13373
    JUMPHERE(next_alt);
13374
    if (alt_count + 1 < alt_max)
13375
      {
13376
      SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13377
      next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13378
      }
13379
    }
13380
13381
  alt_count++;
13382
13383
  compile_backtrackingpath(common, altbacktrack.top);
13384
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13385
    return;
13386
  set_jumps(altbacktrack.topbacktracks, LABEL());
13387
13388
  if (*cc != OP_ALT)
13389
    break;
13390
13391
  altbacktrack.cc = cc + 1 + LINK_SIZE;
13392
  cc += GET(cc, 1);
13393
  }
13394
13395
/* No alternative is matched. */
13396
13397
quit = LABEL();
13398
13399
copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
13400
13401
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13402
free_stack(common, private_data_size + local_size);
13403
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13404
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13405
13406
if (common->quit != NULL)
13407
  {
13408
  SLJIT_ASSERT(has_quit);
13409
13410
  set_jumps(common->quit, LABEL());
13411
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13412
  copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
13413
  JUMPTO(SLJIT_JUMP, quit);
13414
  }
13415
13416
if (has_accept)
13417
  {
13418
  JUMPHERE(accept_exit);
13419
  free_stack(common, 2);
13420
13421
  /* Save return address. */
13422
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13423
13424
  copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
13425
13426
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13427
  free_stack(common, private_data_size + local_size);
13428
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13429
  OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13430
  }
13431
13432
if (common->accept != NULL)
13433
  {
13434
  SLJIT_ASSERT(has_accept);
13435
13436
  set_jumps(common->accept, LABEL());
13437
13438
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13439
  OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13440
13441
  allocate_stack(common, 2);
13442
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13443
  }
13444
13445
set_jumps(match, LABEL());
13446
13447
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13448
13449
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13450
13451
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13452
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13453
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13454
}
13455
13456
#undef COMPILE_BACKTRACKINGPATH
13457
#undef CURRENT_AS
13458
13459
#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13460
  (PCRE2_JIT_INVALID_UTF)
13461
13462
static int jit_compile(pcre2_code *code, sljit_u32 mode)
13463
{
13464
pcre2_real_code *re = (pcre2_real_code *)code;
13465
struct sljit_compiler *compiler;
13466
backtrack_common rootbacktrack;
13467
compiler_common common_data;
13468
compiler_common *common = &common_data;
13469
const sljit_u8 *tables = re->tables;
13470
void *allocator_data = &re->memctl;
13471
int private_data_size;
13472
PCRE2_SPTR ccend;
13473
executable_functions *functions;
13474
void *executable_func;
13475
sljit_uw executable_size;
13476
sljit_uw total_length;
13477
struct sljit_label *mainloop_label = NULL;
13478
struct sljit_label *continue_match_label;
13479
struct sljit_label *empty_match_found_label = NULL;
13480
struct sljit_label *empty_match_backtrack_label = NULL;
13481
struct sljit_label *reset_match_label;
13482
struct sljit_label *quit_label;
13483
struct sljit_jump *jump;
13484
struct sljit_jump *minlength_check_failed = NULL;
13485
struct sljit_jump *empty_match = NULL;
13486
struct sljit_jump *end_anchor_failed = NULL;
13487
jump_list *reqcu_not_found = NULL;
13488
13489
SLJIT_ASSERT(tables);
13490
13491
#if HAS_VIRTUAL_REGISTERS == 1
13492
SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13493
#elif HAS_VIRTUAL_REGISTERS == 0
13494
SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13495
#else
13496
#error "Invalid value for HAS_VIRTUAL_REGISTERS"
13497
#endif
13498
13499
memset(&rootbacktrack, 0, sizeof(backtrack_common));
13500
memset(common, 0, sizeof(compiler_common));
13501
common->re = re;
13502
common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13503
rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13504
13505
#ifdef SUPPORT_UNICODE
13506
common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13507
#endif /* SUPPORT_UNICODE */
13508
mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13509
13510
common->start = rootbacktrack.cc;
13511
common->read_only_data_head = NULL;
13512
common->fcc = tables + fcc_offset;
13513
common->lcc = (sljit_sw)(tables + lcc_offset);
13514
common->mode = mode;
13515
common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13516
common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13517
common->nltype = NLTYPE_FIXED;
13518
switch(re->newline_convention)
13519
  {
13520
  case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13521
  case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13522
  case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13523
  case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13524
  case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13525
  case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13526
  default: return PCRE2_ERROR_INTERNAL;
13527
  }
13528
common->nlmax = READ_CHAR_MAX;
13529
common->nlmin = 0;
13530
if (re->bsr_convention == PCRE2_BSR_UNICODE)
13531
  common->bsr_nltype = NLTYPE_ANY;
13532
else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13533
  common->bsr_nltype = NLTYPE_ANYCRLF;
13534
else
13535
  {
13536
#ifdef BSR_ANYCRLF
13537
  common->bsr_nltype = NLTYPE_ANYCRLF;
13538
#else
13539
  common->bsr_nltype = NLTYPE_ANY;
13540
#endif
13541
  }
13542
common->bsr_nlmax = READ_CHAR_MAX;
13543
common->bsr_nlmin = 0;
13544
common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13545
common->ctypes = (sljit_sw)(tables + ctypes_offset);
13546
common->name_count = re->name_count;
13547
common->name_entry_size = re->name_entry_size;
13548
common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13549
common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13550
#ifdef SUPPORT_UNICODE
13551
/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13552
common->utf = (re->overall_options & PCRE2_UTF) != 0;
13553
common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13554
if (common->utf)
13555
  {
13556
  if (common->nltype == NLTYPE_ANY)
13557
    common->nlmax = 0x2029;
13558
  else if (common->nltype == NLTYPE_ANYCRLF)
13559
    common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13560
  else
13561
    {
13562
    /* We only care about the first newline character. */
13563
    common->nlmax = common->newline & 0xff;
13564
    }
13565
13566
  if (common->nltype == NLTYPE_FIXED)
13567
    common->nlmin = common->newline & 0xff;
13568
  else
13569
    common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13570
13571
  if (common->bsr_nltype == NLTYPE_ANY)
13572
    common->bsr_nlmax = 0x2029;
13573
  else
13574
    common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13575
  common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13576
  }
13577
else
13578
  common->invalid_utf = FALSE;
13579
#endif /* SUPPORT_UNICODE */
13580
ccend = bracketend(common->start);
13581
13582
/* Calculate the local space size on the stack. */
13583
common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13584
common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13585
if (!common->optimized_cbracket)
13586
  return PCRE2_ERROR_NOMEMORY;
13587
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13588
memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13589
#else
13590
memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13591
#endif
13592
13593
SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13594
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13595
common->capture_last_ptr = common->ovector_start;
13596
common->ovector_start += sizeof(sljit_sw);
13597
#endif
13598
if (!check_opcode_types(common, common->start, ccend))
13599
  {
13600
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13601
  return PCRE2_ERROR_NOMEMORY;
13602
  }
13603
13604
/* Checking flags and updating ovector_start. */
13605
if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13606
  {
13607
  common->req_char_ptr = common->ovector_start;
13608
  common->ovector_start += sizeof(sljit_sw);
13609
  }
13610
if (mode != PCRE2_JIT_COMPLETE)
13611
  {
13612
  common->start_used_ptr = common->ovector_start;
13613
  common->ovector_start += sizeof(sljit_sw);
13614
  if (mode == PCRE2_JIT_PARTIAL_SOFT)
13615
    {
13616
    common->hit_start = common->ovector_start;
13617
    common->ovector_start += sizeof(sljit_sw);
13618
    }
13619
  }
13620
if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13621
  {
13622
  common->match_end_ptr = common->ovector_start;
13623
  common->ovector_start += sizeof(sljit_sw);
13624
  }
13625
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13626
common->control_head_ptr = 1;
13627
#endif
13628
if (common->control_head_ptr != 0)
13629
  {
13630
  common->control_head_ptr = common->ovector_start;
13631
  common->ovector_start += sizeof(sljit_sw);
13632
  }
13633
if (common->has_set_som)
13634
  {
13635
  /* Saving the real start pointer is necessary. */
13636
  common->start_ptr = common->ovector_start;
13637
  common->ovector_start += sizeof(sljit_sw);
13638
  }
13639
13640
/* Aligning ovector to even number of sljit words. */
13641
if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13642
  common->ovector_start += sizeof(sljit_sw);
13643
13644
if (common->start_ptr == 0)
13645
  common->start_ptr = OVECTOR(0);
13646
13647
/* Capturing brackets cannot be optimized if callouts are allowed. */
13648
if (common->capture_last_ptr != 0)
13649
  memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13650
13651
SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13652
common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13653
13654
total_length = ccend - common->start;
13655
common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13656
if (!common->private_data_ptrs)
13657
  {
13658
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13659
  return PCRE2_ERROR_NOMEMORY;
13660
  }
13661
memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13662
13663
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13664
13665
if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13666
  detect_early_fail(common, common->start, &private_data_size, 0, 0, TRUE);
13667
13668
set_private_data_ptrs(common, &private_data_size, ccend);
13669
13670
SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13671
13672
if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13673
  {
13674
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13675
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13676
  return PCRE2_ERROR_NOMEMORY;
13677
  }
13678
13679
if (common->has_then)
13680
  {
13681
  common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13682
  memset(common->then_offsets, 0, total_length);
13683
  set_then_offsets(common, common->start, NULL);
13684
  }
13685
13686
compiler = sljit_create_compiler(allocator_data, NULL);
13687
if (!compiler)
13688
  {
13689
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13690
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13691
  return PCRE2_ERROR_NOMEMORY;
13692
  }
13693
common->compiler = compiler;
13694
13695
/* Main pcre_jit_exec entry. */
13696
sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
13697
13698
/* Register init. */
13699
reset_ovector(common, (re->top_bracket + 1) * 2);
13700
if (common->req_char_ptr != 0)
13701
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13702
13703
OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13704
OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13705
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13706
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13707
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13708
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13709
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13710
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13711
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13712
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13713
13714
if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13715
  reset_early_fail(common);
13716
13717
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13718
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13719
if (common->mark_ptr != 0)
13720
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13721
if (common->control_head_ptr != 0)
13722
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13723
13724
/* Main part of the matching */
13725
if ((re->overall_options & PCRE2_ANCHORED) == 0)
13726
  {
13727
  mainloop_label = mainloop_entry(common);
13728
  continue_match_label = LABEL();
13729
  /* Forward search if possible. */
13730
  if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13731
    {
13732
    if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13733
      ;
13734
    else if ((re->flags & PCRE2_FIRSTSET) != 0)
13735
      fast_forward_first_char(common);
13736
    else if ((re->flags & PCRE2_STARTLINE) != 0)
13737
      fast_forward_newline(common);
13738
    else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13739
      fast_forward_start_bits(common);
13740
    }
13741
  }
13742
else
13743
  continue_match_label = LABEL();
13744
13745
if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13746
  {
13747
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13748
  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13749
  minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13750
  }
13751
if (common->req_char_ptr != 0)
13752
  reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13753
13754
/* Store the current STR_PTR in OVECTOR(0). */
13755
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13756
/* Copy the limit of allowed recursions. */
13757
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13758
if (common->capture_last_ptr != 0)
13759
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13760
if (common->fast_forward_bc_ptr != NULL)
13761
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13762
13763
if (common->start_ptr != OVECTOR(0))
13764
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13765
13766
/* Copy the beginning of the string. */
13767
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13768
  {
13769
  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13770
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13771
  JUMPHERE(jump);
13772
  }
13773
else if (mode == PCRE2_JIT_PARTIAL_HARD)
13774
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13775
13776
compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13777
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13778
  {
13779
  sljit_free_compiler(compiler);
13780
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13781
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13782
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13783
  return PCRE2_ERROR_NOMEMORY;
13784
  }
13785
13786
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13787
  end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13788
13789
if (common->might_be_empty)
13790
  {
13791
  empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13792
  empty_match_found_label = LABEL();
13793
  }
13794
13795
common->accept_label = LABEL();
13796
if (common->accept != NULL)
13797
  set_jumps(common->accept, common->accept_label);
13798
13799
/* This means we have a match. Update the ovector. */
13800
copy_ovector(common, re->top_bracket + 1);
13801
common->quit_label = common->abort_label = LABEL();
13802
if (common->quit != NULL)
13803
  set_jumps(common->quit, common->quit_label);
13804
if (common->abort != NULL)
13805
  set_jumps(common->abort, common->abort_label);
13806
if (minlength_check_failed != NULL)
13807
  SET_LABEL(minlength_check_failed, common->abort_label);
13808
13809
sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
13810
sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13811
13812
if (common->failed_match != NULL)
13813
  {
13814
  SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13815
  set_jumps(common->failed_match, LABEL());
13816
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13817
  JUMPTO(SLJIT_JUMP, common->abort_label);
13818
  }
13819
13820
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13821
  JUMPHERE(end_anchor_failed);
13822
13823
if (mode != PCRE2_JIT_COMPLETE)
13824
  {
13825
  common->partialmatchlabel = LABEL();
13826
  set_jumps(common->partialmatch, common->partialmatchlabel);
13827
  return_with_partial_match(common, common->quit_label);
13828
  }
13829
13830
if (common->might_be_empty)
13831
  empty_match_backtrack_label = LABEL();
13832
compile_backtrackingpath(common, rootbacktrack.top);
13833
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13834
  {
13835
  sljit_free_compiler(compiler);
13836
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13837
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13838
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13839
  return PCRE2_ERROR_NOMEMORY;
13840
  }
13841
13842
SLJIT_ASSERT(rootbacktrack.prev == NULL);
13843
reset_match_label = LABEL();
13844
13845
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13846
  {
13847
  /* Update hit_start only in the first time. */
13848
  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13849
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13850
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13851
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13852
  JUMPHERE(jump);
13853
  }
13854
13855
/* Check we have remaining characters. */
13856
if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13857
  {
13858
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13859
  }
13860
13861
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13862
    (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
13863
13864
if ((re->overall_options & PCRE2_ANCHORED) == 0)
13865
  {
13866
  if (common->ff_newline_shortcut != NULL)
13867
    {
13868
    /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13869
    if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13870
      {
13871
      if (common->match_end_ptr != 0)
13872
        {
13873
        OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13874
        OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13875
        CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13876
        OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13877
        }
13878
      else
13879
        CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13880
      }
13881
    }
13882
  else
13883
    CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13884
  }
13885
13886
/* No more remaining characters. */
13887
if (reqcu_not_found != NULL)
13888
  set_jumps(reqcu_not_found, LABEL());
13889
13890
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13891
  CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13892
13893
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13894
JUMPTO(SLJIT_JUMP, common->quit_label);
13895
13896
flush_stubs(common);
13897
13898
if (common->might_be_empty)
13899
  {
13900
  JUMPHERE(empty_match);
13901
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13902
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13903
  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13904
  JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13905
  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13906
  JUMPTO(SLJIT_ZERO, empty_match_found_label);
13907
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13908
  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13909
  JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13910
  }
13911
13912
common->fast_forward_bc_ptr = NULL;
13913
common->early_fail_start_ptr = 0;
13914
common->early_fail_end_ptr = 0;
13915
common->currententry = common->entries;
13916
common->local_quit_available = TRUE;
13917
quit_label = common->quit_label;
13918
while (common->currententry != NULL)
13919
  {
13920
  /* Might add new entries. */
13921
  compile_recurse(common);
13922
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13923
    {
13924
    sljit_free_compiler(compiler);
13925
    SLJIT_FREE(common->optimized_cbracket, allocator_data);
13926
    SLJIT_FREE(common->private_data_ptrs, allocator_data);
13927
    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13928
    return PCRE2_ERROR_NOMEMORY;
13929
    }
13930
  flush_stubs(common);
13931
  common->currententry = common->currententry->next;
13932
  }
13933
common->local_quit_available = FALSE;
13934
common->quit_label = quit_label;
13935
13936
/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
13937
/* This is a (really) rare case. */
13938
set_jumps(common->stackalloc, LABEL());
13939
/* RETURN_ADDR is not a saved register. */
13940
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13941
13942
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13943
13944
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
13945
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
13946
OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
13947
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
13948
OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
13949
13950
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
13951
13952
jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
13953
OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
13954
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
13955
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13956
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
13957
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
13958
13959
/* Allocation failed. */
13960
JUMPHERE(jump);
13961
/* We break the return address cache here, but this is a really rare case. */
13962
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
13963
JUMPTO(SLJIT_JUMP, common->quit_label);
13964
13965
/* Call limit reached. */
13966
set_jumps(common->calllimit, LABEL());
13967
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
13968
JUMPTO(SLJIT_JUMP, common->quit_label);
13969
13970
if (common->revertframes != NULL)
13971
  {
13972
  set_jumps(common->revertframes, LABEL());
13973
  do_revertframes(common);
13974
  }
13975
if (common->wordboundary != NULL)
13976
  {
13977
  set_jumps(common->wordboundary, LABEL());
13978
  check_wordboundary(common);
13979
  }
13980
if (common->anynewline != NULL)
13981
  {
13982
  set_jumps(common->anynewline, LABEL());
13983
  check_anynewline(common);
13984
  }
13985
if (common->hspace != NULL)
13986
  {
13987
  set_jumps(common->hspace, LABEL());
13988
  check_hspace(common);
13989
  }
13990
if (common->vspace != NULL)
13991
  {
13992
  set_jumps(common->vspace, LABEL());
13993
  check_vspace(common);
13994
  }
13995
if (common->casefulcmp != NULL)
13996
  {
13997
  set_jumps(common->casefulcmp, LABEL());
13998
  do_casefulcmp(common);
13999
  }
14000
if (common->caselesscmp != NULL)
14001
  {
14002
  set_jumps(common->caselesscmp, LABEL());
14003
  do_caselesscmp(common);
14004
  }
14005
if (common->reset_match != NULL)
14006
  {
14007
  set_jumps(common->reset_match, LABEL());
14008
  do_reset_match(common, (re->top_bracket + 1) * 2);
14009
  CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14010
  OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14011
  JUMPTO(SLJIT_JUMP, reset_match_label);
14012
  }
14013
#ifdef SUPPORT_UNICODE
14014
#if PCRE2_CODE_UNIT_WIDTH == 8
14015
if (common->utfreadchar != NULL)
14016
  {
14017
  set_jumps(common->utfreadchar, LABEL());
14018
  do_utfreadchar(common);
14019
  }
14020
if (common->utfreadtype8 != NULL)
14021
  {
14022
  set_jumps(common->utfreadtype8, LABEL());
14023
  do_utfreadtype8(common);
14024
  }
14025
if (common->utfpeakcharback != NULL)
14026
  {
14027
  set_jumps(common->utfpeakcharback, LABEL());
14028
  do_utfpeakcharback(common);
14029
  }
14030
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14031
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14032
if (common->utfreadchar_invalid != NULL)
14033
  {
14034
  set_jumps(common->utfreadchar_invalid, LABEL());
14035
  do_utfreadchar_invalid(common);
14036
  }
14037
if (common->utfreadnewline_invalid != NULL)
14038
  {
14039
  set_jumps(common->utfreadnewline_invalid, LABEL());
14040
  do_utfreadnewline_invalid(common);
14041
  }
14042
if (common->utfmoveback_invalid)
14043
  {
14044
  set_jumps(common->utfmoveback_invalid, LABEL());
14045
  do_utfmoveback_invalid(common);
14046
  }
14047
if (common->utfpeakcharback_invalid)
14048
  {
14049
  set_jumps(common->utfpeakcharback_invalid, LABEL());
14050
  do_utfpeakcharback_invalid(common);
14051
  }
14052
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14053
if (common->getucd != NULL)
14054
  {
14055
  set_jumps(common->getucd, LABEL());
14056
  do_getucd(common);
14057
  }
14058
if (common->getucdtype != NULL)
14059
  {
14060
  set_jumps(common->getucdtype, LABEL());
14061
  do_getucdtype(common);
14062
  }
14063
#endif /* SUPPORT_UNICODE */
14064
14065
SLJIT_FREE(common->optimized_cbracket, allocator_data);
14066
SLJIT_FREE(common->private_data_ptrs, allocator_data);
14067
14068
executable_func = sljit_generate_code(compiler);
14069
executable_size = sljit_get_generated_code_size(compiler);
14070
sljit_free_compiler(compiler);
14071
14072
if (executable_func == NULL)
14073
  {
14074
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14075
  return PCRE2_ERROR_NOMEMORY;
14076
  }
14077
14078
/* Reuse the function descriptor if possible. */
14079
if (re->executable_jit != NULL)
14080
  functions = (executable_functions *)re->executable_jit;
14081
else
14082
  {
14083
  functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14084
  if (functions == NULL)
14085
    {
14086
    /* This case is highly unlikely since we just recently
14087
    freed a lot of memory. Not impossible though. */
14088
    sljit_free_code(executable_func, NULL);
14089
    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14090
    return PCRE2_ERROR_NOMEMORY;
14091
    }
14092
  memset(functions, 0, sizeof(executable_functions));
14093
  functions->top_bracket = re->top_bracket + 1;
14094
  functions->limit_match = re->limit_match;
14095
  re->executable_jit = functions;
14096
  }
14097
14098
/* Turn mode into an index. */
14099
if (mode == PCRE2_JIT_COMPLETE)
14100
  mode = 0;
14101
else
14102
  mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14103
14104
SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14105
functions->executable_funcs[mode] = executable_func;
14106
functions->read_only_data_heads[mode] = common->read_only_data_head;
14107
functions->executable_sizes[mode] = executable_size;
14108
return 0;
14109
}
14110
14111
#endif
14112
14113
/*************************************************
14114
*        JIT compile a Regular Expression        *
14115
*************************************************/
14116
14117
/* This function used JIT to convert a previously-compiled pattern into machine
14118
code.
14119
14120
Arguments:
14121
  code          a compiled pattern
14122
  options       JIT option bits
14123
14124
Returns:        0: success or (*NOJIT) was used
14125
               <0: an error code
14126
*/
14127
14128
#define PUBLIC_JIT_COMPILE_OPTIONS \
14129
644k
  (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14130
14131
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
14132
pcre2_jit_compile(pcre2_code *code, uint32_t options)
14133
644k
{
14134
644k
pcre2_real_code *re = (pcre2_real_code *)code;
14135
#ifdef SUPPORT_JIT
14136
executable_functions *functions;
14137
static int executable_allocator_is_working = 0;
14138
#endif
14139
14140
644k
if (code == NULL)
14141
0
  return PCRE2_ERROR_NULL;
14142
14143
644k
if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14144
0
  return PCRE2_ERROR_JIT_BADOPTION;
14145
14146
/* Support for invalid UTF was first introduced in JIT, with the option
14147
PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14148
compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14149
preferred feature, with the earlier option deprecated. However, for backward
14150
compatibility, if the earlier option is set, it forces the new option so that
14151
if JIT matching falls back to the interpreter, there is still support for
14152
invalid UTF. However, if this function has already been successfully called
14153
without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14154
non-invalid-supporting JIT code was compiled), give an error.
14155
14156
If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14157
actions are needed:
14158
14159
  1. Remove the definition from pcre2.h.in and from the list in
14160
     PUBLIC_JIT_COMPILE_OPTIONS above.
14161
14162
  2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14163
14164
  3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14165
14166
  4. Delete the following short block of code. The setting of "re" and
14167
     "functions" can be moved into the JIT-only block below, but if that is
14168
     done, (void)re and (void)functions will be needed in the non-JIT case, to
14169
     avoid compiler warnings.
14170
*/
14171
14172
#ifdef SUPPORT_JIT
14173
functions = (executable_functions *)re->executable_jit;
14174
#endif
14175
14176
644k
if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14177
0
  {
14178
0
  if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14179
0
    {
14180
#ifdef SUPPORT_JIT
14181
    if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14182
#endif
14183
0
    re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14184
0
    }
14185
0
  }
14186
14187
/* The above tests are run with and without JIT support. This means that
14188
PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14189
interpreter support) even in the absence of JIT. But now, if there is no JIT
14190
support, give an error return. */
14191
14192
644k
#ifndef SUPPORT_JIT
14193
644k
return PCRE2_ERROR_JIT_BADOPTION;
14194
#else  /* SUPPORT_JIT */
14195
14196
/* There is JIT support. Do the necessary. */
14197
14198
if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14199
14200
if (executable_allocator_is_working == 0)
14201
  {
14202
  /* Checks whether the executable allocator is working. This check
14203
     might run multiple times in multi-threaded environments, but the
14204
     result should not be affected by it. */
14205
  void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14206
14207
  executable_allocator_is_working = -1;
14208
14209
  if (ptr != NULL)
14210
    {
14211
    SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14212
    executable_allocator_is_working = 1;
14213
    }
14214
  }
14215
14216
if (executable_allocator_is_working < 0)
14217
  return PCRE2_ERROR_NOMEMORY;
14218
14219
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14220
  options |= PCRE2_JIT_INVALID_UTF;
14221
14222
if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14223
    || functions->executable_funcs[0] == NULL)) {
14224
  uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14225
  int result = jit_compile(code, options & ~excluded_options);
14226
  if (result != 0)
14227
    return result;
14228
  }
14229
14230
if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14231
    || functions->executable_funcs[1] == NULL)) {
14232
  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14233
  int result = jit_compile(code, options & ~excluded_options);
14234
  if (result != 0)
14235
    return result;
14236
  }
14237
14238
if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14239
    || functions->executable_funcs[2] == NULL)) {
14240
  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14241
  int result = jit_compile(code, options & ~excluded_options);
14242
  if (result != 0)
14243
    return result;
14244
  }
14245
14246
return 0;
14247
14248
#endif  /* SUPPORT_JIT */
14249
644k
}
14250
14251
/* JIT compiler uses an all-in-one approach. This improves security,
14252
   since the code generator functions are not exported. */
14253
14254
#define INCLUDED_FROM_PCRE2_JIT_COMPILE
14255
14256
#include "pcre2_jit_match.c"
14257
#include "pcre2_jit_misc.c"
14258
14259
/* End of pcre2_jit_compile.c */