Coverage Report

Created: 2025-06-13 06:43

/src/php-src/ext/pcre/pcre2lib/pcre2_jit_compile.c
Line
Count
Source (jump to first uncovered line)
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
                    This module by Zoltan Herczeg
10
     Original API code Copyright (c) 1997-2012 University of Cambridge
11
          New API code Copyright (c) 2016-2024 University of Cambridge
12
13
-----------------------------------------------------------------------------
14
Redistribution and use in source and binary forms, with or without
15
modification, are permitted provided that the following conditions are met:
16
17
    * Redistributions of source code must retain the above copyright notice,
18
      this list of conditions and the following disclaimer.
19
20
    * Redistributions in binary form must reproduce the above copyright
21
      notice, this list of conditions and the following disclaimer in the
22
      documentation and/or other materials provided with the distribution.
23
24
    * Neither the name of the University of Cambridge nor the names of its
25
      contributors may be used to endorse or promote products derived from
26
      this software without specific prior written permission.
27
28
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
-----------------------------------------------------------------------------
40
*/
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#if defined(__has_feature)
47
#if __has_feature(memory_sanitizer)
48
#include <sanitizer/msan_interface.h>
49
#endif /* __has_feature(memory_sanitizer) */
50
#endif /* defined(__has_feature) */
51
52
#include "pcre2_internal.h"
53
54
#ifdef SUPPORT_JIT
55
56
/* All-in-one: Since we use the JIT compiler only from here,
57
we just include it. This way we don't need to touch the build
58
system files. */
59
60
#define SLJIT_CONFIG_AUTO 1
61
#define SLJIT_CONFIG_STATIC 1
62
#define SLJIT_VERBOSE 0
63
64
#ifdef PCRE2_DEBUG
65
#define SLJIT_DEBUG 1
66
#else
67
#define SLJIT_DEBUG 0
68
#endif
69
70
#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
71
#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
72
73
static void * pcre2_jit_malloc(size_t size, void *allocator_data)
74
{
75
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76
return allocator->malloc(size, allocator->memory_data);
77
}
78
79
static void pcre2_jit_free(void *ptr, void *allocator_data)
80
{
81
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
82
allocator->free(ptr, allocator->memory_data);
83
}
84
85
#include "sljit/sljitLir.c"
86
87
#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
88
#error Unsupported architecture
89
#endif
90
91
/* Defines for debugging purposes. */
92
93
/* 1 - Use unoptimized capturing brackets.
94
   2 - Enable capture_last_ptr (includes option 1). */
95
/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
96
97
/* 1 - Always have a control head. */
98
/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
99
100
/* Allocate memory for the regex stack on the real machine stack.
101
Fast, but limited size. */
102
#define MACHINE_STACK_SIZE 32768
103
104
/* Growth rate for stack allocated by the OS. Should be the multiply
105
of page size. */
106
#define STACK_GROWTH_RATE 8192
107
108
/* Enable to check that the allocation could destroy temporaries. */
109
#if defined SLJIT_DEBUG && SLJIT_DEBUG
110
#define DESTROY_REGISTERS 1
111
#endif
112
113
/*
114
Short summary about the backtracking mechanism empolyed by the jit code generator:
115
116
The code generator follows the recursive nature of the PERL compatible regular
117
expressions. The basic blocks of regular expressions are condition checkers
118
whose execute different commands depending on the result of the condition check.
119
The relationship between the operators can be horizontal (concatenation) and
120
vertical (sub-expression) (See struct backtrack_common for more details).
121
122
  'ab' - 'a' and 'b' regexps are concatenated
123
  'a+' - 'a' is the sub-expression of the '+' operator
124
125
The condition checkers are boolean (true/false) checkers. Machine code is generated
126
for the checker itself and for the actions depending on the result of the checker.
127
The 'true' case is called as the matching path (expected path), and the other is called as
128
the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
129
branches on the matching path.
130
131
 Greedy star operator (*) :
132
   Matching path: match happens.
133
   Backtrack path: match failed.
134
 Non-greedy star operator (*?) :
135
   Matching path: no need to perform a match.
136
   Backtrack path: match is required.
137
138
The following example shows how the code generated for a capturing bracket
139
with two alternatives. Let A, B, C, D are arbirary regular expressions, and
140
we have the following regular expression:
141
142
   A(B|C)D
143
144
The generated code will be the following:
145
146
 A matching path
147
 '(' matching path (pushing arguments to the stack)
148
 B matching path
149
 ')' matching path (pushing arguments to the stack)
150
 D matching path
151
 return with successful match
152
153
 D backtrack path
154
 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
155
 B backtrack path
156
 C expected path
157
 jump to D matching path
158
 C backtrack path
159
 A backtrack path
160
161
 Notice, that the order of backtrack code paths are the opposite of the fast
162
 code paths. In this way the topmost value on the stack is always belong
163
 to the current backtrack code path. The backtrack path must check
164
 whether there is a next alternative. If so, it needs to jump back to
165
 the matching path eventually. Otherwise it needs to clear out its own stack
166
 frame and continue the execution on the backtrack code paths.
167
*/
168
169
/*
170
Saved stack frames:
171
172
Atomic blocks and asserts require reloading the values of private data
173
when the backtrack mechanism performed. Because of OP_RECURSE, the data
174
are not necessarly known in compile time, thus we need a dynamic restore
175
mechanism.
176
177
The stack frames are stored in a chain list, and have the following format:
178
([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
179
180
Thus we can restore the private data to a particular point in the stack.
181
*/
182
183
typedef struct jit_arguments {
184
  /* Pointers first. */
185
  struct sljit_stack *stack;
186
  PCRE2_SPTR str;
187
  PCRE2_SPTR begin;
188
  PCRE2_SPTR end;
189
  pcre2_match_data *match_data;
190
  PCRE2_SPTR startchar_ptr;
191
  PCRE2_UCHAR *mark_ptr;
192
  int (*callout)(pcre2_callout_block *, void *);
193
  void *callout_data;
194
  /* Everything else after. */
195
  sljit_uw offset_limit;
196
  sljit_u32 limit_match;
197
  sljit_u32 oveccount;
198
  sljit_u32 options;
199
} jit_arguments;
200
201
#define JIT_NUMBER_OF_COMPILE_MODES 3
202
203
typedef struct executable_functions {
204
  void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
205
  void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
206
  sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
207
  sljit_u32 top_bracket;
208
  sljit_u32 limit_match;
209
} executable_functions;
210
211
typedef struct jump_list {
212
  struct sljit_jump *jump;
213
  struct jump_list *next;
214
} jump_list;
215
216
typedef struct stub_list {
217
  struct sljit_jump *start;
218
  struct sljit_label *quit;
219
  struct stub_list *next;
220
} stub_list;
221
222
enum frame_types {
223
  no_frame = -1,
224
  no_stack = -2
225
};
226
227
enum control_types {
228
  type_mark = 0,
229
  type_then_trap = 1
230
};
231
232
enum  early_fail_types {
233
  type_skip = 0,
234
  type_fail = 1,
235
  type_fail_range = 2
236
};
237
238
typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
239
240
/* The following structure is the key data type for the recursive
241
code generator. It is allocated by compile_matchingpath, and contains
242
the arguments for compile_backtrackingpath. Must be the first member
243
of its descendants. */
244
typedef struct backtrack_common {
245
  /* Backtracking path of an opcode, which falls back
246
     to our opcode, if it cannot resume matching. */
247
  struct backtrack_common *prev;
248
  /* Backtracks for opcodes without backtracking path.
249
     These opcodes are between 'prev' and the current
250
     opcode, and they never resume the match. */
251
  jump_list *simple_backtracks;
252
  /* Internal backtracking list for block constructs
253
     which contains other opcodes, such as brackets,
254
     asserts, conditionals, etc. */
255
  struct backtrack_common *top;
256
  /* Backtracks used internally by the opcode. For component
257
     opcodes, this list is also used by those opcodes without
258
     backtracking path which follows the 'top' backtrack. */
259
  jump_list *own_backtracks;
260
  /* Opcode pointer. */
261
  PCRE2_SPTR cc;
262
} backtrack_common;
263
264
typedef struct assert_backtrack {
265
  backtrack_common common;
266
  jump_list *condfailed;
267
  /* Less than 0 if a frame is not needed. */
268
  int framesize;
269
  /* Points to our private memory word on the stack. */
270
  int private_data_ptr;
271
  /* For iterators. */
272
  struct sljit_label *matchingpath;
273
} assert_backtrack;
274
275
typedef struct bracket_backtrack {
276
  backtrack_common common;
277
  /* Where to coninue if an alternative is successfully matched. */
278
  struct sljit_label *alternative_matchingpath;
279
  /* For rmin and rmax iterators. */
280
  struct sljit_label *recursive_matchingpath;
281
  /* For greedy ? operator. */
282
  struct sljit_label *zero_matchingpath;
283
  /* Contains the branches of a failed condition. */
284
  union {
285
    /* Both for OP_COND, OP_SCOND, OP_ASSERT_SCS. */
286
    jump_list *no_capture;
287
    assert_backtrack *assert;
288
    /* For OP_ONCE. Less than 0 if not needed. */
289
    int framesize;
290
  } u;
291
  /* For brackets with >3 alternatives. */
292
  struct sljit_jump *matching_mov_addr;
293
  /* Points to our private memory word on the stack. */
294
  int private_data_ptr;
295
} bracket_backtrack;
296
297
typedef struct bracketpos_backtrack {
298
  backtrack_common common;
299
  /* Points to our private memory word on the stack. */
300
  int private_data_ptr;
301
  /* Reverting stack is needed. */
302
  int framesize;
303
  /* Allocated stack size. */
304
  int stacksize;
305
} bracketpos_backtrack;
306
307
typedef struct braminzero_backtrack {
308
  backtrack_common common;
309
  struct sljit_label *matchingpath;
310
} braminzero_backtrack;
311
312
typedef struct char_iterator_backtrack {
313
  backtrack_common common;
314
  /* Next iteration. */
315
  struct sljit_label *matchingpath;
316
  /* Creating a range based on the next character. */
317
  struct {
318
    unsigned int othercasebit;
319
    PCRE2_UCHAR chr;
320
    BOOL charpos_enabled;
321
  } charpos;
322
} char_iterator_backtrack;
323
324
typedef struct ref_iterator_backtrack {
325
  backtrack_common common;
326
  /* Next iteration. */
327
  struct sljit_label *matchingpath;
328
} ref_iterator_backtrack;
329
330
typedef struct recurse_entry {
331
  struct recurse_entry *next;
332
  /* Contains the function entry label. */
333
  struct sljit_label *entry_label;
334
  /* Contains the function entry label. */
335
  struct sljit_label *backtrack_label;
336
  /* Collects the entry calls until the function is not created. */
337
  jump_list *entry_calls;
338
  /* Collects the backtrack calls until the function is not created. */
339
  jump_list *backtrack_calls;
340
  /* Points to the starting opcode. */
341
  sljit_sw start;
342
} recurse_entry;
343
344
typedef struct recurse_backtrack {
345
  backtrack_common common;
346
  /* Return to the matching path. */
347
  struct sljit_label *matchingpath;
348
  /* Recursive pattern. */
349
  recurse_entry *entry;
350
  /* Pattern is inlined. */
351
  BOOL inlined_pattern;
352
} recurse_backtrack;
353
354
typedef struct vreverse_backtrack {
355
  backtrack_common common;
356
  /* Return to the matching path. */
357
  struct sljit_label *matchingpath;
358
} vreverse_backtrack;
359
360
#define OP_THEN_TRAP OP_TABLE_LENGTH
361
362
typedef struct then_trap_backtrack {
363
  backtrack_common common;
364
  /* If then_trap is not NULL, this structure contains the real
365
  then_trap for the backtracking path. */
366
  struct then_trap_backtrack *then_trap;
367
  /* Points to the starting opcode. */
368
  sljit_sw start;
369
  /* Exit point for the then opcodes of this alternative. */
370
  jump_list *quit;
371
  /* Frame size of the current alternative. */
372
  int framesize;
373
} then_trap_backtrack;
374
375
#define MAX_N_CHARS 12
376
#define MAX_DIFF_CHARS 5
377
378
typedef struct fast_forward_char_data {
379
  /* Number of characters in the chars array, 255 for any character. */
380
  sljit_u8 count;
381
  /* Number of last UTF-8 characters in the chars array. */
382
  sljit_u8 last_count;
383
  /* Available characters in the current position. */
384
  PCRE2_UCHAR chars[MAX_DIFF_CHARS];
385
} fast_forward_char_data;
386
387
#define MAX_CLASS_RANGE_SIZE 4
388
#define MAX_CLASS_CHARS_SIZE 3
389
390
typedef struct compiler_common {
391
  /* The sljit ceneric compiler. */
392
  struct sljit_compiler *compiler;
393
  /* Compiled regular expression. */
394
  pcre2_real_code *re;
395
  /* First byte code. */
396
  PCRE2_SPTR start;
397
  /* Maps private data offset to each opcode. */
398
  sljit_s32 *private_data_ptrs;
399
  /* Chain list of read-only data ptrs. */
400
  void *read_only_data_head;
401
  /* Tells whether the capturing bracket is optimized. */
402
  sljit_u8 *optimized_cbracket;
403
  /* Tells whether the starting offset is a target of then. */
404
  sljit_u8 *then_offsets;
405
  /* Current position where a THEN must jump. */
406
  then_trap_backtrack *then_trap;
407
  /* Starting offset of private data for capturing brackets. */
408
  sljit_s32 cbra_ptr;
409
#if defined SLJIT_DEBUG && SLJIT_DEBUG
410
  /* End offset of locals for assertions. */
411
  sljit_s32 locals_size;
412
#endif
413
  /* Output vector starting point. Must be divisible by 2. */
414
  sljit_s32 ovector_start;
415
  /* Points to the starting character of the current match. */
416
  sljit_s32 start_ptr;
417
  /* Last known position of the requested byte. */
418
  sljit_s32 req_char_ptr;
419
  /* Head of the last recursion. */
420
  sljit_s32 recursive_head_ptr;
421
  /* First inspected character for partial matching.
422
     (Needed for avoiding zero length partial matches.) */
423
  sljit_s32 start_used_ptr;
424
  /* Starting pointer for partial soft matches. */
425
  sljit_s32 hit_start;
426
  /* Pointer of the match end position. */
427
  sljit_s32 match_end_ptr;
428
  /* Points to the marked string. */
429
  sljit_s32 mark_ptr;
430
  /* Head of the recursive control verb management chain.
431
     Each item must have a previous offset and type
432
     (see control_types) values. See do_search_mark. */
433
  sljit_s32 control_head_ptr;
434
  /* The offset of the saved STR_END in the outermost
435
     scan substring block. Since scan substring restores
436
     STR_END after a match, it is enough to restore
437
     STR_END inside a scan substring block. */
438
  sljit_s32 restore_end_ptr;
439
  /* Points to the last matched capture block index. */
440
  sljit_s32 capture_last_ptr;
441
  /* Fast forward skipping byte code pointer. */
442
  PCRE2_SPTR fast_forward_bc_ptr;
443
  /* Locals used by fast fail optimization. */
444
  sljit_s32 early_fail_start_ptr;
445
  sljit_s32 early_fail_end_ptr;
446
  /* Variables used by recursive call generator. */
447
  sljit_s32 recurse_bitset_size;
448
  uint8_t *recurse_bitset;
449
450
  /* Flipped and lower case tables. */
451
  const sljit_u8 *fcc;
452
  sljit_sw lcc;
453
  /* Mode can be PCRE2_JIT_COMPLETE and others. */
454
  int mode;
455
  /* TRUE, when empty match is accepted for partial matching. */
456
  BOOL allow_empty_partial;
457
  /* TRUE, when minlength is greater than 0. */
458
  BOOL might_be_empty;
459
  /* \K is found in the pattern. */
460
  BOOL has_set_som;
461
  /* (*SKIP:arg) is found in the pattern. */
462
  BOOL has_skip_arg;
463
  /* (*THEN) is found in the pattern. */
464
  BOOL has_then;
465
  /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
466
  BOOL has_skip_in_assert_back;
467
  /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
468
  BOOL local_quit_available;
469
  /* Currently in a positive assertion. */
470
  BOOL in_positive_assertion;
471
  /* Newline control. */
472
  int nltype;
473
  sljit_u32 nlmax;
474
  sljit_u32 nlmin;
475
  int newline;
476
  int bsr_nltype;
477
  sljit_u32 bsr_nlmax;
478
  sljit_u32 bsr_nlmin;
479
  /* Dollar endonly. */
480
  int endonly;
481
  /* Tables. */
482
  sljit_sw ctypes;
483
  /* Named capturing brackets. */
484
  PCRE2_SPTR name_table;
485
  sljit_sw name_count;
486
  sljit_sw name_entry_size;
487
488
  /* Labels and jump lists. */
489
  struct sljit_label *partialmatchlabel;
490
  struct sljit_label *quit_label;
491
  struct sljit_label *abort_label;
492
  struct sljit_label *accept_label;
493
  struct sljit_label *ff_newline_shortcut;
494
  stub_list *stubs;
495
  recurse_entry *entries;
496
  recurse_entry *currententry;
497
  jump_list *partialmatch;
498
  jump_list *quit;
499
  jump_list *positive_assertion_quit;
500
  jump_list *abort;
501
  jump_list *failed_match;
502
  jump_list *accept;
503
  jump_list *calllimit;
504
  jump_list *stackalloc;
505
  jump_list *revertframes;
506
  jump_list *wordboundary;
507
  jump_list *ucp_wordboundary;
508
  jump_list *anynewline;
509
  jump_list *hspace;
510
  jump_list *vspace;
511
  jump_list *casefulcmp;
512
  jump_list *caselesscmp;
513
  jump_list *reset_match;
514
  /* Same as reset_match, but resets the STR_PTR as well. */
515
  jump_list *restart_match;
516
  BOOL unset_backref;
517
  BOOL alt_circumflex;
518
#ifdef SUPPORT_UNICODE
519
  BOOL utf;
520
  BOOL invalid_utf;
521
  BOOL ucp;
522
  /* Points to saving area for iref. */
523
  jump_list *getucd;
524
  jump_list *getucdtype;
525
#if PCRE2_CODE_UNIT_WIDTH == 8
526
  jump_list *utfreadchar;
527
  jump_list *utfreadtype8;
528
  jump_list *utfpeakcharback;
529
#endif
530
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
531
  jump_list *utfreadchar_invalid;
532
  jump_list *utfreadnewline_invalid;
533
  jump_list *utfmoveback_invalid;
534
  jump_list *utfpeakcharback_invalid;
535
#endif
536
#endif /* SUPPORT_UNICODE */
537
} compiler_common;
538
539
/* For byte_sequence_compare. */
540
541
typedef struct compare_context {
542
  int length;
543
  int sourcereg;
544
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
545
  int ucharptr;
546
  union {
547
    sljit_s32 asint;
548
    sljit_u16 asushort;
549
#if PCRE2_CODE_UNIT_WIDTH == 8
550
    sljit_u8 asbyte;
551
    sljit_u8 asuchars[4];
552
#elif PCRE2_CODE_UNIT_WIDTH == 16
553
    sljit_u16 asuchars[2];
554
#elif PCRE2_CODE_UNIT_WIDTH == 32
555
    sljit_u32 asuchars[1];
556
#endif
557
  } c;
558
  union {
559
    sljit_s32 asint;
560
    sljit_u16 asushort;
561
#if PCRE2_CODE_UNIT_WIDTH == 8
562
    sljit_u8 asbyte;
563
    sljit_u8 asuchars[4];
564
#elif PCRE2_CODE_UNIT_WIDTH == 16
565
    sljit_u16 asuchars[2];
566
#elif PCRE2_CODE_UNIT_WIDTH == 32
567
    sljit_u32 asuchars[1];
568
#endif
569
  } oc;
570
#endif
571
} compare_context;
572
573
/* Undefine sljit macros. */
574
#undef CMP
575
576
/* Used for accessing the elements of the stack. */
577
#define STACK(i)      ((i) * SSIZE_OF(sw))
578
579
#ifdef SLJIT_PREF_SHIFT_REG
580
#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
581
/* Nothing. */
582
#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
583
#define SHIFT_REG_IS_R3
584
#else
585
#error "Unsupported shift register"
586
#endif
587
#endif
588
589
#define TMP1          SLJIT_R0
590
#ifdef SHIFT_REG_IS_R3
591
#define TMP2          SLJIT_R3
592
#define TMP3          SLJIT_R2
593
#else
594
#define TMP2          SLJIT_R2
595
#define TMP3          SLJIT_R3
596
#endif
597
#define STR_PTR       SLJIT_R1
598
#define STR_END       SLJIT_S0
599
#define STACK_TOP     SLJIT_S1
600
#define STACK_LIMIT   SLJIT_S2
601
#define COUNT_MATCH   SLJIT_S3
602
#define ARGUMENTS     SLJIT_S4
603
#define RETURN_ADDR   SLJIT_R4
604
605
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
606
#define HAS_VIRTUAL_REGISTERS 1
607
#else
608
#define HAS_VIRTUAL_REGISTERS 0
609
#endif
610
611
/* Local space layout. */
612
/* Max limit of recursions. */
613
#define LIMIT_MATCH      (0 * sizeof(sljit_sw))
614
/* Local variables. Their number is computed by check_opcode_types. */
615
#define LOCAL0           (1 * sizeof(sljit_sw))
616
#define LOCAL1           (2 * sizeof(sljit_sw))
617
#define LOCAL2           (3 * sizeof(sljit_sw))
618
#define LOCAL3           (4 * sizeof(sljit_sw))
619
#define LOCAL4           (5 * sizeof(sljit_sw))
620
/* The output vector is stored on the stack, and contains pointers
621
to characters. The vector data is divided into two groups: the first
622
group contains the start / end character pointers, and the second is
623
the start pointers when the end of the capturing group has not yet reached. */
624
#define OVECTOR_START    (common->ovector_start)
625
#define OVECTOR(i)       (OVECTOR_START + (i) * SSIZE_OF(sw))
626
#define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * SSIZE_OF(sw))
627
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
628
629
#if PCRE2_CODE_UNIT_WIDTH == 8
630
#define MOV_UCHAR  SLJIT_MOV_U8
631
#define IN_UCHARS(x) (x)
632
#elif PCRE2_CODE_UNIT_WIDTH == 16
633
#define MOV_UCHAR  SLJIT_MOV_U16
634
#define UCHAR_SHIFT (1)
635
#define IN_UCHARS(x) ((x) * 2)
636
#elif PCRE2_CODE_UNIT_WIDTH == 32
637
#define MOV_UCHAR  SLJIT_MOV_U32
638
#define UCHAR_SHIFT (2)
639
#define IN_UCHARS(x) ((x) * 4)
640
#else
641
#error Unsupported compiling mode
642
#endif
643
644
/* Shortcuts. */
645
#define DEFINE_COMPILER \
646
  struct sljit_compiler *compiler = common->compiler
647
#define OP1(op, dst, dstw, src, srcw) \
648
  sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
649
#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
650
  sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
651
#define OP2U(op, src1, src1w, src2, src2w) \
652
  sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
653
#define OP_SRC(op, src, srcw) \
654
  sljit_emit_op_src(compiler, (op), (src), (srcw))
655
#define LABEL() \
656
  sljit_emit_label(compiler)
657
#define JUMP(type) \
658
  sljit_emit_jump(compiler, (type))
659
#define JUMPTO(type, label) \
660
  sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
661
#define JUMPHERE(jump) \
662
  sljit_set_label((jump), sljit_emit_label(compiler))
663
#define SET_LABEL(jump, label) \
664
  sljit_set_label((jump), (label))
665
#define CMP(type, src1, src1w, src2, src2w) \
666
  sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
667
#define CMPTO(type, src1, src1w, src2, src2w, label) \
668
  sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
669
#define OP_FLAGS(op, dst, dstw, type) \
670
  sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
671
#define SELECT(type, dst_reg, src1, src1w, src2_reg) \
672
  sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))
673
#define GET_LOCAL_BASE(dst, dstw, offset) \
674
  sljit_get_local_base(compiler, (dst), (dstw), (offset))
675
676
#define READ_CHAR_MAX ((sljit_u32)0xffffffff)
677
678
#define INVALID_UTF_CHAR -1
679
#define UNASSIGNED_UTF_CHAR 888
680
681
#if defined SUPPORT_UNICODE
682
#if PCRE2_CODE_UNIT_WIDTH == 8
683
684
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
685
  { \
686
  if (ptr[0] <= 0x7f) \
687
    c = *ptr++; \
688
  else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
689
    { \
690
    c = ptr[1] - 0x80; \
691
    \
692
    if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
693
      { \
694
      c |= (ptr[0] - 0xc0) << 6; \
695
      ptr += 2; \
696
      } \
697
    else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
698
      { \
699
      c = c << 6 | (ptr[2] - 0x80); \
700
      \
701
      if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
702
        { \
703
        c |= (ptr[0] - 0xe0) << 12; \
704
        ptr += 3; \
705
        \
706
        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
707
          { \
708
          invalid_action; \
709
          } \
710
        } \
711
      else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
712
        { \
713
        c = c << 6 | (ptr[3] - 0x80); \
714
        \
715
        if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
716
          { \
717
          c |= (ptr[0] - 0xf0) << 18; \
718
          ptr += 4; \
719
          \
720
          if (c >= 0x110000 || c < 0x10000) \
721
            { \
722
            invalid_action; \
723
            } \
724
          } \
725
        else \
726
          { \
727
          invalid_action; \
728
          } \
729
        } \
730
      else \
731
        { \
732
        invalid_action; \
733
        } \
734
      } \
735
    else \
736
      { \
737
      invalid_action; \
738
      } \
739
    } \
740
  else \
741
    { \
742
    invalid_action; \
743
    } \
744
  }
745
746
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
747
  { \
748
  c = ptr[-1]; \
749
  if (c <= 0x7f) \
750
    ptr--; \
751
  else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
752
    { \
753
    c -= 0x80; \
754
    \
755
    if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
756
      { \
757
      c |= (ptr[-2] - 0xc0) << 6; \
758
      ptr -= 2; \
759
      } \
760
    else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
761
      { \
762
      c = c << 6 | (ptr[-2] - 0x80); \
763
      \
764
      if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
765
        { \
766
        c |= (ptr[-3] - 0xe0) << 12; \
767
        ptr -= 3; \
768
        \
769
        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
770
          { \
771
          invalid_action; \
772
          } \
773
        } \
774
      else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
775
        { \
776
        c = c << 6 | (ptr[-3] - 0x80); \
777
        \
778
        if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
779
          { \
780
          c |= (ptr[-4] - 0xf0) << 18; \
781
          ptr -= 4; \
782
          \
783
          if (c >= 0x110000 || c < 0x10000) \
784
            { \
785
            invalid_action; \
786
            } \
787
          } \
788
        else \
789
          { \
790
          invalid_action; \
791
          } \
792
        } \
793
      else \
794
        { \
795
        invalid_action; \
796
        } \
797
      } \
798
    else \
799
      { \
800
      invalid_action; \
801
      } \
802
    } \
803
  else \
804
    { \
805
    invalid_action; \
806
    } \
807
  }
808
809
#elif PCRE2_CODE_UNIT_WIDTH == 16
810
811
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
812
  { \
813
  if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
814
    c = *ptr++; \
815
  else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
816
    { \
817
    c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
818
    ptr += 2; \
819
    } \
820
  else \
821
    { \
822
    invalid_action; \
823
    } \
824
  }
825
826
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
827
  { \
828
  c = ptr[-1]; \
829
  if (c < 0xd800 || c >= 0xe000) \
830
    ptr--; \
831
  else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
832
    { \
833
    c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
834
    ptr -= 2; \
835
    } \
836
  else \
837
    { \
838
    invalid_action; \
839
    } \
840
  }
841
842
843
#elif PCRE2_CODE_UNIT_WIDTH == 32
844
845
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
846
  { \
847
  if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
848
    c = *ptr++; \
849
  else \
850
    { \
851
    invalid_action; \
852
    } \
853
  }
854
855
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
856
  { \
857
  c = ptr[-1]; \
858
  if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
859
    ptr--; \
860
  else \
861
    { \
862
    invalid_action; \
863
    } \
864
  }
865
866
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
867
#endif /* SUPPORT_UNICODE */
868
869
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
870
{
871
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
872
do cc += GET(cc, 1); while (*cc == OP_ALT);
873
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
874
cc += 1 + LINK_SIZE;
875
return cc;
876
}
877
878
static int no_alternatives(PCRE2_SPTR cc)
879
{
880
int count = 0;
881
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
882
do
883
  {
884
  cc += GET(cc, 1);
885
  count++;
886
  }
887
while (*cc == OP_ALT);
888
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
889
return count;
890
}
891
892
static BOOL find_vreverse(PCRE2_SPTR cc)
893
{
894
  SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT ||  *cc == OP_ASSERTBACK_NA);
895
896
  do
897
    {
898
    if (cc[1 + LINK_SIZE] == OP_VREVERSE)
899
      return TRUE;
900
    cc += GET(cc, 1);
901
    }
902
  while (*cc == OP_ALT);
903
904
  return FALSE;
905
}
906
907
/* Functions whose might need modification for all new supported opcodes:
908
 next_opcode
909
 check_opcode_types
910
 set_private_data_ptrs
911
 get_framesize
912
 init_frame
913
 get_recurse_data_length
914
 copy_recurse_data
915
 compile_matchingpath
916
 compile_backtrackingpath
917
*/
918
919
static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
920
{
921
SLJIT_UNUSED_ARG(common);
922
switch(*cc)
923
  {
924
  case OP_SOD:
925
  case OP_SOM:
926
  case OP_SET_SOM:
927
  case OP_NOT_WORD_BOUNDARY:
928
  case OP_WORD_BOUNDARY:
929
  case OP_NOT_DIGIT:
930
  case OP_DIGIT:
931
  case OP_NOT_WHITESPACE:
932
  case OP_WHITESPACE:
933
  case OP_NOT_WORDCHAR:
934
  case OP_WORDCHAR:
935
  case OP_ANY:
936
  case OP_ALLANY:
937
  case OP_NOTPROP:
938
  case OP_PROP:
939
  case OP_ANYNL:
940
  case OP_NOT_HSPACE:
941
  case OP_HSPACE:
942
  case OP_NOT_VSPACE:
943
  case OP_VSPACE:
944
  case OP_EXTUNI:
945
  case OP_EODN:
946
  case OP_EOD:
947
  case OP_CIRC:
948
  case OP_CIRCM:
949
  case OP_DOLL:
950
  case OP_DOLLM:
951
  case OP_CRSTAR:
952
  case OP_CRMINSTAR:
953
  case OP_CRPLUS:
954
  case OP_CRMINPLUS:
955
  case OP_CRQUERY:
956
  case OP_CRMINQUERY:
957
  case OP_CRRANGE:
958
  case OP_CRMINRANGE:
959
  case OP_CRPOSSTAR:
960
  case OP_CRPOSPLUS:
961
  case OP_CRPOSQUERY:
962
  case OP_CRPOSRANGE:
963
  case OP_CLASS:
964
  case OP_NCLASS:
965
  case OP_REF:
966
  case OP_REFI:
967
  case OP_DNREF:
968
  case OP_DNREFI:
969
  case OP_RECURSE:
970
  case OP_CALLOUT:
971
  case OP_ALT:
972
  case OP_KET:
973
  case OP_KETRMAX:
974
  case OP_KETRMIN:
975
  case OP_KETRPOS:
976
  case OP_REVERSE:
977
  case OP_VREVERSE:
978
  case OP_ASSERT:
979
  case OP_ASSERT_NOT:
980
  case OP_ASSERTBACK:
981
  case OP_ASSERTBACK_NOT:
982
  case OP_ASSERT_NA:
983
  case OP_ASSERTBACK_NA:
984
  case OP_ASSERT_SCS:
985
  case OP_ONCE:
986
  case OP_SCRIPT_RUN:
987
  case OP_BRA:
988
  case OP_BRAPOS:
989
  case OP_CBRA:
990
  case OP_CBRAPOS:
991
  case OP_COND:
992
  case OP_SBRA:
993
  case OP_SBRAPOS:
994
  case OP_SCBRA:
995
  case OP_SCBRAPOS:
996
  case OP_SCOND:
997
  case OP_CREF:
998
  case OP_DNCREF:
999
  case OP_RREF:
1000
  case OP_DNRREF:
1001
  case OP_FALSE:
1002
  case OP_TRUE:
1003
  case OP_BRAZERO:
1004
  case OP_BRAMINZERO:
1005
  case OP_BRAPOSZERO:
1006
  case OP_PRUNE:
1007
  case OP_SKIP:
1008
  case OP_THEN:
1009
  case OP_COMMIT:
1010
  case OP_FAIL:
1011
  case OP_ACCEPT:
1012
  case OP_ASSERT_ACCEPT:
1013
  case OP_CLOSE:
1014
  case OP_SKIPZERO:
1015
  case OP_NOT_UCP_WORD_BOUNDARY:
1016
  case OP_UCP_WORD_BOUNDARY:
1017
  return cc + PRIV(OP_lengths)[*cc];
1018
1019
  case OP_CHAR:
1020
  case OP_CHARI:
1021
  case OP_NOT:
1022
  case OP_NOTI:
1023
  case OP_STAR:
1024
  case OP_MINSTAR:
1025
  case OP_PLUS:
1026
  case OP_MINPLUS:
1027
  case OP_QUERY:
1028
  case OP_MINQUERY:
1029
  case OP_UPTO:
1030
  case OP_MINUPTO:
1031
  case OP_EXACT:
1032
  case OP_POSSTAR:
1033
  case OP_POSPLUS:
1034
  case OP_POSQUERY:
1035
  case OP_POSUPTO:
1036
  case OP_STARI:
1037
  case OP_MINSTARI:
1038
  case OP_PLUSI:
1039
  case OP_MINPLUSI:
1040
  case OP_QUERYI:
1041
  case OP_MINQUERYI:
1042
  case OP_UPTOI:
1043
  case OP_MINUPTOI:
1044
  case OP_EXACTI:
1045
  case OP_POSSTARI:
1046
  case OP_POSPLUSI:
1047
  case OP_POSQUERYI:
1048
  case OP_POSUPTOI:
1049
  case OP_NOTSTAR:
1050
  case OP_NOTMINSTAR:
1051
  case OP_NOTPLUS:
1052
  case OP_NOTMINPLUS:
1053
  case OP_NOTQUERY:
1054
  case OP_NOTMINQUERY:
1055
  case OP_NOTUPTO:
1056
  case OP_NOTMINUPTO:
1057
  case OP_NOTEXACT:
1058
  case OP_NOTPOSSTAR:
1059
  case OP_NOTPOSPLUS:
1060
  case OP_NOTPOSQUERY:
1061
  case OP_NOTPOSUPTO:
1062
  case OP_NOTSTARI:
1063
  case OP_NOTMINSTARI:
1064
  case OP_NOTPLUSI:
1065
  case OP_NOTMINPLUSI:
1066
  case OP_NOTQUERYI:
1067
  case OP_NOTMINQUERYI:
1068
  case OP_NOTUPTOI:
1069
  case OP_NOTMINUPTOI:
1070
  case OP_NOTEXACTI:
1071
  case OP_NOTPOSSTARI:
1072
  case OP_NOTPOSPLUSI:
1073
  case OP_NOTPOSQUERYI:
1074
  case OP_NOTPOSUPTOI:
1075
  cc += PRIV(OP_lengths)[*cc];
1076
#ifdef SUPPORT_UNICODE
1077
  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1078
#endif
1079
  return cc;
1080
1081
  /* Special cases. */
1082
  case OP_TYPESTAR:
1083
  case OP_TYPEMINSTAR:
1084
  case OP_TYPEPLUS:
1085
  case OP_TYPEMINPLUS:
1086
  case OP_TYPEQUERY:
1087
  case OP_TYPEMINQUERY:
1088
  case OP_TYPEUPTO:
1089
  case OP_TYPEMINUPTO:
1090
  case OP_TYPEEXACT:
1091
  case OP_TYPEPOSSTAR:
1092
  case OP_TYPEPOSPLUS:
1093
  case OP_TYPEPOSQUERY:
1094
  case OP_TYPEPOSUPTO:
1095
  return cc + PRIV(OP_lengths)[*cc] - 1;
1096
1097
  case OP_ANYBYTE:
1098
#ifdef SUPPORT_UNICODE
1099
  if (common->utf) return NULL;
1100
#endif
1101
  return cc + 1;
1102
1103
  case OP_CALLOUT_STR:
1104
  return cc + GET(cc, 1 + 2*LINK_SIZE);
1105
1106
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1107
  case OP_ECLASS:
1108
  case OP_XCLASS:
1109
  SLJIT_COMPILE_ASSERT(OP_XCLASS + 1 == OP_ECLASS && OP_CLASS + 1 == OP_NCLASS && OP_NCLASS < OP_XCLASS, class_byte_code_order);
1110
  return cc + GET(cc, 1);
1111
#endif
1112
1113
  case OP_MARK:
1114
  case OP_COMMIT_ARG:
1115
  case OP_PRUNE_ARG:
1116
  case OP_SKIP_ARG:
1117
  case OP_THEN_ARG:
1118
  return cc + 1 + 2 + cc[1];
1119
1120
  default:
1121
  SLJIT_UNREACHABLE();
1122
  return NULL;
1123
  }
1124
}
1125
1126
static sljit_s32 ref_update_local_size(compiler_common *common, PCRE2_SPTR cc, sljit_s32 current_locals_size)
1127
{
1128
/* Depends on do_casefulcmp(), do_caselesscmp(), and compile_ref_matchingpath() */
1129
int locals_size = 2 * SSIZE_OF(sw);
1130
SLJIT_UNUSED_ARG(common);
1131
1132
#ifdef SUPPORT_UNICODE
1133
if ((*cc == OP_REFI || *cc == OP_DNREFI) && (common->utf || common->ucp))
1134
  locals_size = 3 * SSIZE_OF(sw);
1135
#endif
1136
1137
cc += PRIV(OP_lengths)[*cc];
1138
/* Although do_casefulcmp() uses only one local, the allocate_stack()
1139
calls during the repeat destroys LOCAL1 variables. */
1140
if (*cc >= OP_CRSTAR && *cc <= OP_CRPOSRANGE)
1141
  locals_size += 2 * SSIZE_OF(sw);
1142
1143
return (current_locals_size >= locals_size) ? current_locals_size : locals_size;
1144
}
1145
1146
static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1147
{
1148
int count;
1149
PCRE2_SPTR slot;
1150
PCRE2_SPTR assert_back_end = cc - 1;
1151
PCRE2_SPTR assert_na_end = cc - 1;
1152
sljit_s32 locals_size = 2 * SSIZE_OF(sw);
1153
BOOL set_recursive_head = FALSE;
1154
BOOL set_capture_last = FALSE;
1155
BOOL set_mark = FALSE;
1156
1157
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1158
while (cc < ccend)
1159
  {
1160
  switch(*cc)
1161
    {
1162
    case OP_SET_SOM:
1163
    common->has_set_som = TRUE;
1164
    common->might_be_empty = TRUE;
1165
    cc += 1;
1166
    break;
1167
1168
    case OP_TYPEUPTO:
1169
    case OP_TYPEEXACT:
1170
    if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))
1171
      locals_size = 3 * SSIZE_OF(sw);
1172
    cc += (2 + IMM2_SIZE) - 1;
1173
    break;
1174
1175
    case OP_TYPEPOSSTAR:
1176
    case OP_TYPEPOSPLUS:
1177
    case OP_TYPEPOSQUERY:
1178
    if (cc[1] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))
1179
      locals_size = 3 * SSIZE_OF(sw);
1180
    cc += 2 - 1;
1181
    break;
1182
1183
    case OP_TYPEPOSUPTO:
1184
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1185
    if (common->utf && locals_size <= 3 * SSIZE_OF(sw))
1186
      locals_size = 3 * SSIZE_OF(sw);
1187
#endif
1188
    if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))
1189
      locals_size = 3 * SSIZE_OF(sw);
1190
    cc += (2 + IMM2_SIZE) - 1;
1191
    break;
1192
1193
    case OP_REFI:
1194
    case OP_REF:
1195
    locals_size = ref_update_local_size(common, cc, locals_size);
1196
    common->optimized_cbracket[GET2(cc, 1)] = 0;
1197
    cc += PRIV(OP_lengths)[*cc];
1198
    break;
1199
1200
    case OP_ASSERT_NA:
1201
    case OP_ASSERTBACK_NA:
1202
    case OP_ASSERT_SCS:
1203
    slot = bracketend(cc);
1204
    if (slot > assert_na_end)
1205
      assert_na_end = slot;
1206
    cc += 1 + LINK_SIZE;
1207
    break;
1208
1209
    case OP_CBRAPOS:
1210
    case OP_SCBRAPOS:
1211
    common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1212
    cc += 1 + LINK_SIZE + IMM2_SIZE;
1213
    break;
1214
1215
    case OP_COND:
1216
    case OP_SCOND:
1217
    /* Only AUTO_CALLOUT can insert this opcode. We do
1218
       not intend to support this case. */
1219
    if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1220
      return FALSE;
1221
    cc += 1 + LINK_SIZE;
1222
    break;
1223
1224
    case OP_CREF:
1225
    common->optimized_cbracket[GET2(cc, 1)] = 0;
1226
    cc += 1 + IMM2_SIZE;
1227
    break;
1228
1229
    case OP_DNREFI:
1230
    case OP_DNREF:
1231
    locals_size = ref_update_local_size(common, cc, locals_size);
1232
    /* Fall through */
1233
    case OP_DNCREF:
1234
    count = GET2(cc, 1 + IMM2_SIZE);
1235
    slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1236
    while (count-- > 0)
1237
      {
1238
      common->optimized_cbracket[GET2(slot, 0)] = 0;
1239
      slot += common->name_entry_size;
1240
      }
1241
    cc += PRIV(OP_lengths)[*cc];
1242
    break;
1243
1244
    case OP_RECURSE:
1245
    /* Set its value only once. */
1246
    set_recursive_head = TRUE;
1247
    cc += 1 + LINK_SIZE;
1248
    break;
1249
1250
    case OP_CALLOUT:
1251
    case OP_CALLOUT_STR:
1252
    set_capture_last = TRUE;
1253
    cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1254
    break;
1255
1256
    case OP_ASSERTBACK:
1257
    slot = bracketend(cc);
1258
    if (slot > assert_back_end)
1259
      assert_back_end = slot;
1260
    cc += 1 + LINK_SIZE;
1261
    break;
1262
1263
    case OP_THEN_ARG:
1264
    common->has_then = TRUE;
1265
    common->control_head_ptr = 1;
1266
    /* Fall through. */
1267
1268
    case OP_COMMIT_ARG:
1269
    case OP_PRUNE_ARG:
1270
    case OP_MARK:
1271
    set_mark = TRUE;
1272
    cc += 1 + 2 + cc[1];
1273
    break;
1274
1275
    case OP_THEN:
1276
    common->has_then = TRUE;
1277
    common->control_head_ptr = 1;
1278
    cc += 1;
1279
    break;
1280
1281
    case OP_SKIP:
1282
    if (cc < assert_back_end)
1283
      common->has_skip_in_assert_back = TRUE;
1284
    cc += 1;
1285
    break;
1286
1287
    case OP_SKIP_ARG:
1288
    common->control_head_ptr = 1;
1289
    common->has_skip_arg = TRUE;
1290
    if (cc < assert_back_end)
1291
      common->has_skip_in_assert_back = TRUE;
1292
    cc += 1 + 2 + cc[1];
1293
    break;
1294
1295
    case OP_ASSERT_ACCEPT:
1296
    if (cc < assert_na_end)
1297
      return FALSE;
1298
    cc++;
1299
    break;
1300
1301
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1302
    case OP_CRPOSRANGE:
1303
    /* The second value can be 0 for infinite repeats. */
1304
    if (common->utf && GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE) && locals_size <= 3 * SSIZE_OF(sw))
1305
      locals_size = 3 * SSIZE_OF(sw);
1306
    cc += 1 + 2 * IMM2_SIZE;
1307
    break;
1308
1309
    case OP_POSUPTO:
1310
    case OP_POSUPTOI:
1311
    case OP_NOTPOSUPTO:
1312
    case OP_NOTPOSUPTOI:
1313
    if (common->utf && locals_size <= 3 * SSIZE_OF(sw))
1314
      locals_size = 3 * SSIZE_OF(sw);
1315
#endif
1316
    /* Fall through */
1317
    default:
1318
    cc = next_opcode(common, cc);
1319
    if (cc == NULL)
1320
      return FALSE;
1321
    break;
1322
    }
1323
  }
1324
1325
SLJIT_ASSERT((locals_size & (SSIZE_OF(sw) - 1)) == 0);
1326
#if defined SLJIT_DEBUG && SLJIT_DEBUG
1327
common->locals_size = locals_size;
1328
#endif
1329
1330
if (locals_size > 0)
1331
  common->ovector_start += locals_size;
1332
1333
if (set_mark)
1334
  {
1335
  SLJIT_ASSERT(common->mark_ptr == 0);
1336
  common->mark_ptr = common->ovector_start;
1337
  common->ovector_start += sizeof(sljit_sw);
1338
  }
1339
1340
if (set_recursive_head)
1341
  {
1342
  SLJIT_ASSERT(common->recursive_head_ptr == 0);
1343
  common->recursive_head_ptr = common->ovector_start;
1344
  common->ovector_start += sizeof(sljit_sw);
1345
  }
1346
1347
if (set_capture_last)
1348
  {
1349
  SLJIT_ASSERT(common->capture_last_ptr == 0);
1350
  common->capture_last_ptr = common->ovector_start;
1351
  common->ovector_start += sizeof(sljit_sw);
1352
  }
1353
1354
return TRUE;
1355
}
1356
1357
#define EARLY_FAIL_ENHANCE_MAX (3 + 3)
1358
1359
/*
1360
  Start represent the number of allowed early fail enhancements
1361
1362
  The 0-2 values has a special meaning:
1363
    0 - skip is allowed for all iterators
1364
    1 - fail is allowed for all iterators
1365
    2 - fail is allowed for greedy iterators
1366
    3 - only ranged early fail is allowed
1367
  >3 - (start - 3) number of remaining ranged early fails allowed
1368
1369
return: the updated value of start
1370
*/
1371
static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,
1372
   int *private_data_start, sljit_s32 depth, int start)
1373
{
1374
PCRE2_SPTR begin = cc;
1375
PCRE2_SPTR next_alt;
1376
PCRE2_SPTR end;
1377
PCRE2_SPTR accelerated_start;
1378
int result = 0;
1379
int count, prev_count;
1380
1381
SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1382
SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1383
SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1384
1385
next_alt = cc + GET(cc, 1);
1386
if (*next_alt == OP_ALT && start < 1)
1387
  start = 1;
1388
1389
do
1390
  {
1391
  count = start;
1392
  cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1393
1394
  while (TRUE)
1395
    {
1396
    accelerated_start = NULL;
1397
1398
    switch(*cc)
1399
      {
1400
      case OP_SOD:
1401
      case OP_SOM:
1402
      case OP_SET_SOM:
1403
      case OP_NOT_WORD_BOUNDARY:
1404
      case OP_WORD_BOUNDARY:
1405
      case OP_EODN:
1406
      case OP_EOD:
1407
      case OP_CIRC:
1408
      case OP_CIRCM:
1409
      case OP_DOLL:
1410
      case OP_DOLLM:
1411
      case OP_NOT_UCP_WORD_BOUNDARY:
1412
      case OP_UCP_WORD_BOUNDARY:
1413
      /* Zero width assertions. */
1414
      cc++;
1415
      continue;
1416
1417
      case OP_NOT_DIGIT:
1418
      case OP_DIGIT:
1419
      case OP_NOT_WHITESPACE:
1420
      case OP_WHITESPACE:
1421
      case OP_NOT_WORDCHAR:
1422
      case OP_WORDCHAR:
1423
      case OP_ANY:
1424
      case OP_ALLANY:
1425
      case OP_ANYBYTE:
1426
      case OP_NOT_HSPACE:
1427
      case OP_HSPACE:
1428
      case OP_NOT_VSPACE:
1429
      case OP_VSPACE:
1430
      if (count < 1)
1431
        count = 1;
1432
      cc++;
1433
      continue;
1434
1435
      case OP_ANYNL:
1436
      case OP_EXTUNI:
1437
      if (count < 3)
1438
        count = 3;
1439
      cc++;
1440
      continue;
1441
1442
      case OP_NOTPROP:
1443
      case OP_PROP:
1444
      if (count < 1)
1445
        count = 1;
1446
      cc += 1 + 2;
1447
      continue;
1448
1449
      case OP_CHAR:
1450
      case OP_CHARI:
1451
      case OP_NOT:
1452
      case OP_NOTI:
1453
      if (count < 1)
1454
        count = 1;
1455
      cc += 2;
1456
#ifdef SUPPORT_UNICODE
1457
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1458
#endif
1459
      continue;
1460
1461
      case OP_TYPEMINSTAR:
1462
      case OP_TYPEMINPLUS:
1463
      if (count == 2)
1464
        count = 3;
1465
      /* Fall through */
1466
1467
      case OP_TYPESTAR:
1468
      case OP_TYPEPLUS:
1469
      case OP_TYPEPOSSTAR:
1470
      case OP_TYPEPOSPLUS:
1471
      /* The type or prop opcode is skipped in the next iteration. */
1472
      cc += 1;
1473
1474
      if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1475
        {
1476
        accelerated_start = cc - 1;
1477
        break;
1478
        }
1479
1480
      if (count < 3)
1481
        count = 3;
1482
      continue;
1483
1484
      case OP_TYPEEXACT:
1485
      if (count < 1)
1486
        count = 1;
1487
      cc += 1 + IMM2_SIZE;
1488
      continue;
1489
1490
      case OP_TYPEUPTO:
1491
      case OP_TYPEMINUPTO:
1492
      case OP_TYPEPOSUPTO:
1493
      cc += IMM2_SIZE;
1494
      /* Fall through */
1495
1496
      case OP_TYPEQUERY:
1497
      case OP_TYPEMINQUERY:
1498
      case OP_TYPEPOSQUERY:
1499
      /* The type or prop opcode is skipped in the next iteration. */
1500
      if (count < 3)
1501
        count = 3;
1502
      cc += 1;
1503
      continue;
1504
1505
      case OP_MINSTAR:
1506
      case OP_MINPLUS:
1507
      case OP_MINSTARI:
1508
      case OP_MINPLUSI:
1509
      case OP_NOTMINSTAR:
1510
      case OP_NOTMINPLUS:
1511
      case OP_NOTMINSTARI:
1512
      case OP_NOTMINPLUSI:
1513
      if (count == 2)
1514
        count = 3;
1515
      /* Fall through */
1516
1517
      case OP_STAR:
1518
      case OP_PLUS:
1519
      case OP_POSSTAR:
1520
      case OP_POSPLUS:
1521
1522
      case OP_STARI:
1523
      case OP_PLUSI:
1524
      case OP_POSSTARI:
1525
      case OP_POSPLUSI:
1526
1527
      case OP_NOTSTAR:
1528
      case OP_NOTPLUS:
1529
      case OP_NOTPOSSTAR:
1530
      case OP_NOTPOSPLUS:
1531
1532
      case OP_NOTSTARI:
1533
      case OP_NOTPLUSI:
1534
      case OP_NOTPOSSTARI:
1535
      case OP_NOTPOSPLUSI:
1536
      accelerated_start = cc;
1537
      cc += 2;
1538
#ifdef SUPPORT_UNICODE
1539
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1540
#endif
1541
      break;
1542
1543
      case OP_EXACT:
1544
      if (count < 1)
1545
        count = 1;
1546
      cc += 2 + IMM2_SIZE;
1547
#ifdef SUPPORT_UNICODE
1548
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1549
#endif
1550
      continue;
1551
1552
      case OP_UPTO:
1553
      case OP_MINUPTO:
1554
      case OP_POSUPTO:
1555
      case OP_UPTOI:
1556
      case OP_MINUPTOI:
1557
      case OP_EXACTI:
1558
      case OP_POSUPTOI:
1559
      case OP_NOTUPTO:
1560
      case OP_NOTMINUPTO:
1561
      case OP_NOTEXACT:
1562
      case OP_NOTPOSUPTO:
1563
      case OP_NOTUPTOI:
1564
      case OP_NOTMINUPTOI:
1565
      case OP_NOTEXACTI:
1566
      case OP_NOTPOSUPTOI:
1567
      cc += IMM2_SIZE;
1568
      /* Fall through */
1569
1570
      case OP_QUERY:
1571
      case OP_MINQUERY:
1572
      case OP_POSQUERY:
1573
      case OP_QUERYI:
1574
      case OP_MINQUERYI:
1575
      case OP_POSQUERYI:
1576
      case OP_NOTQUERY:
1577
      case OP_NOTMINQUERY:
1578
      case OP_NOTPOSQUERY:
1579
      case OP_NOTQUERYI:
1580
      case OP_NOTMINQUERYI:
1581
      case OP_NOTPOSQUERYI:
1582
      if (count < 3)
1583
        count = 3;
1584
      cc += 2;
1585
#ifdef SUPPORT_UNICODE
1586
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1587
#endif
1588
      continue;
1589
1590
      case OP_CLASS:
1591
      case OP_NCLASS:
1592
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1593
      case OP_XCLASS:
1594
      case OP_ECLASS:
1595
      accelerated_start = cc;
1596
      cc += (*cc >= OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR)));
1597
#else
1598
      accelerated_start = cc;
1599
      cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1600
#endif
1601
1602
      switch (*cc)
1603
        {
1604
        case OP_CRMINSTAR:
1605
        case OP_CRMINPLUS:
1606
        if (count == 2)
1607
          count = 3;
1608
        /* Fall through */
1609
1610
        case OP_CRSTAR:
1611
        case OP_CRPLUS:
1612
        case OP_CRPOSSTAR:
1613
        case OP_CRPOSPLUS:
1614
        cc++;
1615
        break;
1616
1617
        case OP_CRRANGE:
1618
        case OP_CRMINRANGE:
1619
        case OP_CRPOSRANGE:
1620
        if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
1621
          {
1622
          /* Exact repeat. */
1623
          cc += 1 + 2 * IMM2_SIZE;
1624
          if (count < 1)
1625
            count = 1;
1626
          continue;
1627
          }
1628
1629
        cc += 2 * IMM2_SIZE;
1630
        /* Fall through */
1631
        case OP_CRQUERY:
1632
        case OP_CRMINQUERY:
1633
        case OP_CRPOSQUERY:
1634
        cc++;
1635
        if (count < 3)
1636
          count = 3;
1637
        continue;
1638
1639
        default:
1640
        /* No repeat. */
1641
        if (count < 1)
1642
          count = 1;
1643
        continue;
1644
        }
1645
      break;
1646
1647
      case OP_BRA:
1648
      case OP_CBRA:
1649
      prev_count = count;
1650
      if (count < 1)
1651
        count = 1;
1652
1653
      if (depth >= 4)
1654
        break;
1655
1656
      if (count < 3 && cc[GET(cc, 1)] == OP_ALT)
1657
        count = 3;
1658
1659
      end = bracketend(cc);
1660
      if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1661
        break;
1662
1663
      prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);
1664
1665
      if (prev_count > count)
1666
        count = prev_count;
1667
1668
      if (PRIVATE_DATA(cc) != 0)
1669
        common->private_data_ptrs[begin - common->start] = 1;
1670
1671
      if (count < EARLY_FAIL_ENHANCE_MAX)
1672
        {
1673
        cc = end;
1674
        continue;
1675
        }
1676
      break;
1677
1678
      case OP_KET:
1679
      SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1680
      if (cc >= next_alt)
1681
        break;
1682
      cc += 1 + LINK_SIZE;
1683
      continue;
1684
      }
1685
1686
    if (accelerated_start == NULL)
1687
      break;
1688
1689
    if (count == 0)
1690
      {
1691
      common->fast_forward_bc_ptr = accelerated_start;
1692
      common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1693
      *private_data_start += sizeof(sljit_sw);
1694
      count = 4;
1695
      }
1696
    else if (count < 3)
1697
      {
1698
      common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1699
1700
      if (common->early_fail_start_ptr == 0)
1701
        common->early_fail_start_ptr = *private_data_start;
1702
1703
      *private_data_start += sizeof(sljit_sw);
1704
      common->early_fail_end_ptr = *private_data_start;
1705
1706
      if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1707
        return EARLY_FAIL_ENHANCE_MAX;
1708
1709
      count = 4;
1710
      }
1711
    else
1712
      {
1713
      common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1714
1715
      if (common->early_fail_start_ptr == 0)
1716
        common->early_fail_start_ptr = *private_data_start;
1717
1718
      *private_data_start += 2 * sizeof(sljit_sw);
1719
      common->early_fail_end_ptr = *private_data_start;
1720
1721
      if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1722
        return EARLY_FAIL_ENHANCE_MAX;
1723
1724
      count++;
1725
      }
1726
1727
    /* Cannot be part of a repeat. */
1728
    common->private_data_ptrs[begin - common->start] = 1;
1729
1730
    if (count >= EARLY_FAIL_ENHANCE_MAX)
1731
      break;
1732
    }
1733
1734
  if (*cc != OP_ALT && *cc != OP_KET)
1735
    result = EARLY_FAIL_ENHANCE_MAX;
1736
  else if (result < count)
1737
    result = count;
1738
1739
  cc = next_alt;
1740
  next_alt = cc + GET(cc, 1);
1741
  }
1742
while (*cc == OP_ALT);
1743
1744
return result;
1745
}
1746
1747
static int get_class_iterator_size(PCRE2_SPTR cc)
1748
{
1749
sljit_u32 min;
1750
sljit_u32 max;
1751
switch(*cc)
1752
  {
1753
  case OP_CRSTAR:
1754
  case OP_CRPLUS:
1755
  return 2;
1756
1757
  case OP_CRMINSTAR:
1758
  case OP_CRMINPLUS:
1759
  case OP_CRQUERY:
1760
  case OP_CRMINQUERY:
1761
  return 1;
1762
1763
  case OP_CRRANGE:
1764
  case OP_CRMINRANGE:
1765
  min = GET2(cc, 1);
1766
  max = GET2(cc, 1 + IMM2_SIZE);
1767
  if (max == 0)
1768
    return (*cc == OP_CRRANGE) ? 2 : 1;
1769
  max -= min;
1770
  if (max > (sljit_u32)(*cc == OP_CRRANGE ? 0 : 1))
1771
    max = 2;
1772
  return max;
1773
1774
  default:
1775
  return 0;
1776
  }
1777
}
1778
1779
static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1780
{
1781
PCRE2_SPTR end = bracketend(begin);
1782
PCRE2_SPTR next;
1783
PCRE2_SPTR next_end;
1784
PCRE2_SPTR max_end;
1785
PCRE2_UCHAR type;
1786
sljit_sw length = end - begin;
1787
sljit_s32 min, max, i;
1788
1789
/* Detect fixed iterations first. */
1790
if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1791
  return FALSE;
1792
1793
/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1794
 * Skip the check of the second part. */
1795
if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1796
  return TRUE;
1797
1798
next = end;
1799
min = 1;
1800
while (1)
1801
  {
1802
  if (*next != *begin)
1803
    break;
1804
  next_end = bracketend(next);
1805
  if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1806
    break;
1807
  next = next_end;
1808
  min++;
1809
  }
1810
1811
if (min == 2)
1812
  return FALSE;
1813
1814
max = 0;
1815
max_end = next;
1816
if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1817
  {
1818
  type = *next;
1819
  while (1)
1820
    {
1821
    if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1822
      break;
1823
    next_end = bracketend(next + 2 + LINK_SIZE);
1824
    if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1825
      break;
1826
    next = next_end;
1827
    max++;
1828
    }
1829
1830
  if (next[0] == type && next[1] == *begin && max >= 1)
1831
    {
1832
    next_end = bracketend(next + 1);
1833
    if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1834
      {
1835
      for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1836
        if (*next_end != OP_KET)
1837
          break;
1838
1839
      if (i == max)
1840
        {
1841
        common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1842
        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1843
        /* +2 the original and the last. */
1844
        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1845
        if (min == 1)
1846
          return TRUE;
1847
        min--;
1848
        max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1849
        }
1850
      }
1851
    }
1852
  }
1853
1854
if (min >= 3)
1855
  {
1856
  common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1857
  common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1858
  common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1859
  return TRUE;
1860
  }
1861
1862
return FALSE;
1863
}
1864
1865
#define CASE_ITERATOR_PRIVATE_DATA_1 \
1866
    case OP_MINSTAR: \
1867
    case OP_MINPLUS: \
1868
    case OP_QUERY: \
1869
    case OP_MINQUERY: \
1870
    case OP_MINSTARI: \
1871
    case OP_MINPLUSI: \
1872
    case OP_QUERYI: \
1873
    case OP_MINQUERYI: \
1874
    case OP_NOTMINSTAR: \
1875
    case OP_NOTMINPLUS: \
1876
    case OP_NOTQUERY: \
1877
    case OP_NOTMINQUERY: \
1878
    case OP_NOTMINSTARI: \
1879
    case OP_NOTMINPLUSI: \
1880
    case OP_NOTQUERYI: \
1881
    case OP_NOTMINQUERYI:
1882
1883
#define CASE_ITERATOR_PRIVATE_DATA_2A \
1884
    case OP_STAR: \
1885
    case OP_PLUS: \
1886
    case OP_STARI: \
1887
    case OP_PLUSI: \
1888
    case OP_NOTSTAR: \
1889
    case OP_NOTPLUS: \
1890
    case OP_NOTSTARI: \
1891
    case OP_NOTPLUSI:
1892
1893
#define CASE_ITERATOR_PRIVATE_DATA_2B \
1894
    case OP_UPTO: \
1895
    case OP_MINUPTO: \
1896
    case OP_UPTOI: \
1897
    case OP_MINUPTOI: \
1898
    case OP_NOTUPTO: \
1899
    case OP_NOTMINUPTO: \
1900
    case OP_NOTUPTOI: \
1901
    case OP_NOTMINUPTOI:
1902
1903
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1904
    case OP_TYPEMINSTAR: \
1905
    case OP_TYPEMINPLUS: \
1906
    case OP_TYPEQUERY: \
1907
    case OP_TYPEMINQUERY:
1908
1909
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1910
    case OP_TYPESTAR: \
1911
    case OP_TYPEPLUS:
1912
1913
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1914
    case OP_TYPEUPTO: \
1915
    case OP_TYPEMINUPTO:
1916
1917
static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1918
{
1919
PCRE2_SPTR cc = common->start;
1920
PCRE2_SPTR alternative;
1921
PCRE2_SPTR end = NULL;
1922
int private_data_ptr = *private_data_start;
1923
int space, size, bracketlen;
1924
BOOL repeat_check = TRUE;
1925
1926
while (cc < ccend)
1927
  {
1928
  space = 0;
1929
  size = 0;
1930
  bracketlen = 0;
1931
  if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1932
    break;
1933
1934
  /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1935
  if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1936
    {
1937
    if (detect_repeat(common, cc))
1938
      {
1939
      /* These brackets are converted to repeats, so no global
1940
      based single character repeat is allowed. */
1941
      if (cc >= end)
1942
        end = bracketend(cc);
1943
      }
1944
    }
1945
  repeat_check = TRUE;
1946
1947
  switch(*cc)
1948
    {
1949
    case OP_KET:
1950
    if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1951
      {
1952
      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1953
      private_data_ptr += sizeof(sljit_sw);
1954
      cc += common->private_data_ptrs[cc + 1 - common->start];
1955
      }
1956
    cc += 1 + LINK_SIZE;
1957
    break;
1958
1959
    case OP_ASSERT:
1960
    case OP_ASSERT_NOT:
1961
    case OP_ASSERTBACK:
1962
    case OP_ASSERTBACK_NOT:
1963
    case OP_ASSERT_NA:
1964
    case OP_ONCE:
1965
    case OP_SCRIPT_RUN:
1966
    case OP_BRAPOS:
1967
    case OP_SBRA:
1968
    case OP_SBRAPOS:
1969
    case OP_SCOND:
1970
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1971
    private_data_ptr += sizeof(sljit_sw);
1972
    bracketlen = 1 + LINK_SIZE;
1973
    break;
1974
1975
    case OP_ASSERTBACK_NA:
1976
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1977
    private_data_ptr += sizeof(sljit_sw);
1978
1979
    if (find_vreverse(cc))
1980
      {
1981
      common->private_data_ptrs[cc + 1 - common->start] = 1;
1982
      private_data_ptr += sizeof(sljit_sw);
1983
      }
1984
1985
    bracketlen = 1 + LINK_SIZE;
1986
    break;
1987
1988
    case OP_ASSERT_SCS:
1989
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1990
    private_data_ptr += 2 * sizeof(sljit_sw);
1991
    bracketlen = 1 + LINK_SIZE;
1992
    break;
1993
1994
    case OP_CBRAPOS:
1995
    case OP_SCBRAPOS:
1996
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1997
    private_data_ptr += sizeof(sljit_sw);
1998
    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1999
    break;
2000
2001
    case OP_COND:
2002
    /* Might be a hidden SCOND. */
2003
    common->private_data_ptrs[cc - common->start] = 0;
2004
    alternative = cc + GET(cc, 1);
2005
    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2006
      {
2007
      common->private_data_ptrs[cc - common->start] = private_data_ptr;
2008
      private_data_ptr += sizeof(sljit_sw);
2009
      }
2010
    bracketlen = 1 + LINK_SIZE;
2011
    break;
2012
2013
    case OP_BRA:
2014
    bracketlen = 1 + LINK_SIZE;
2015
    break;
2016
2017
    case OP_CBRA:
2018
    case OP_SCBRA:
2019
    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
2020
    break;
2021
2022
    case OP_BRAZERO:
2023
    case OP_BRAMINZERO:
2024
    case OP_BRAPOSZERO:
2025
    size = 1;
2026
    repeat_check = FALSE;
2027
    break;
2028
2029
    CASE_ITERATOR_PRIVATE_DATA_1
2030
    size = -2;
2031
    space = 1;
2032
    break;
2033
2034
    CASE_ITERATOR_PRIVATE_DATA_2A
2035
    size = -2;
2036
    space = 2;
2037
    break;
2038
2039
    CASE_ITERATOR_PRIVATE_DATA_2B
2040
    size = -(2 + IMM2_SIZE);
2041
    space = 2;
2042
    break;
2043
2044
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2045
    size = 1;
2046
    space = 1;
2047
    break;
2048
2049
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2050
    size = 1;
2051
    if (cc[1] != OP_EXTUNI)
2052
      space = 2;
2053
    break;
2054
2055
    case OP_TYPEUPTO:
2056
    size = 1 + IMM2_SIZE;
2057
    if (cc[1 + IMM2_SIZE] != OP_EXTUNI)
2058
      space = 2;
2059
    break;
2060
2061
    case OP_TYPEMINUPTO:
2062
    size = 1 + IMM2_SIZE;
2063
    space = 2;
2064
    break;
2065
2066
    case OP_CLASS:
2067
    case OP_NCLASS:
2068
    size = 1 + 32 / sizeof(PCRE2_UCHAR);
2069
    space = get_class_iterator_size(cc + size);
2070
    break;
2071
2072
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2073
    case OP_XCLASS:
2074
    case OP_ECLASS:
2075
    size = GET(cc, 1);
2076
    space = get_class_iterator_size(cc + size);
2077
    break;
2078
#endif
2079
2080
    default:
2081
    cc = next_opcode(common, cc);
2082
    SLJIT_ASSERT(cc != NULL);
2083
    break;
2084
    }
2085
2086
  /* Character iterators, which are not inside a repeated bracket,
2087
     gets a private slot instead of allocating it on the stack. */
2088
  if (space > 0 && cc >= end)
2089
    {
2090
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
2091
    private_data_ptr += sizeof(sljit_sw) * space;
2092
    }
2093
2094
  if (size != 0)
2095
    {
2096
    if (size < 0)
2097
      {
2098
      cc += -size;
2099
#ifdef SUPPORT_UNICODE
2100
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2101
#endif
2102
      }
2103
    else
2104
      cc += size;
2105
    }
2106
2107
  if (bracketlen > 0)
2108
    {
2109
    if (cc >= end)
2110
      {
2111
      end = bracketend(cc);
2112
      if (end[-1 - LINK_SIZE] == OP_KET)
2113
        end = NULL;
2114
      }
2115
    cc += bracketlen;
2116
    }
2117
  }
2118
*private_data_start = private_data_ptr;
2119
}
2120
2121
/* Returns with a frame_types (always < 0) if no need for frame. */
2122
static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
2123
{
2124
int length = 0;
2125
int possessive = 0;
2126
BOOL stack_restore = FALSE;
2127
BOOL setsom_found = recursive;
2128
BOOL setmark_found = recursive;
2129
/* The last capture is a local variable even for recursions. */
2130
BOOL capture_last_found = FALSE;
2131
2132
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2133
SLJIT_ASSERT(common->control_head_ptr != 0);
2134
*needs_control_head = TRUE;
2135
#else
2136
*needs_control_head = FALSE;
2137
#endif
2138
2139
if (ccend == NULL)
2140
  {
2141
  ccend = bracketend(cc) - (1 + LINK_SIZE);
2142
  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
2143
    {
2144
    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
2145
    /* This is correct regardless of common->capture_last_ptr. */
2146
    capture_last_found = TRUE;
2147
    }
2148
  cc = next_opcode(common, cc);
2149
  }
2150
2151
SLJIT_ASSERT(cc != NULL);
2152
while (cc < ccend)
2153
  switch(*cc)
2154
    {
2155
    case OP_SET_SOM:
2156
    SLJIT_ASSERT(common->has_set_som);
2157
    stack_restore = TRUE;
2158
    if (!setsom_found)
2159
      {
2160
      length += 2;
2161
      setsom_found = TRUE;
2162
      }
2163
    cc += 1;
2164
    break;
2165
2166
    case OP_MARK:
2167
    case OP_COMMIT_ARG:
2168
    case OP_PRUNE_ARG:
2169
    case OP_THEN_ARG:
2170
    SLJIT_ASSERT(common->mark_ptr != 0);
2171
    stack_restore = TRUE;
2172
    if (!setmark_found)
2173
      {
2174
      length += 2;
2175
      setmark_found = TRUE;
2176
      }
2177
    if (common->control_head_ptr != 0)
2178
      *needs_control_head = TRUE;
2179
    cc += 1 + 2 + cc[1];
2180
    break;
2181
2182
    case OP_RECURSE:
2183
    stack_restore = TRUE;
2184
    if (common->has_set_som && !setsom_found)
2185
      {
2186
      length += 2;
2187
      setsom_found = TRUE;
2188
      }
2189
    if (common->mark_ptr != 0 && !setmark_found)
2190
      {
2191
      length += 2;
2192
      setmark_found = TRUE;
2193
      }
2194
    if (common->capture_last_ptr != 0 && !capture_last_found)
2195
      {
2196
      length += 2;
2197
      capture_last_found = TRUE;
2198
      }
2199
    cc += 1 + LINK_SIZE;
2200
    break;
2201
2202
    case OP_CBRA:
2203
    case OP_CBRAPOS:
2204
    case OP_SCBRA:
2205
    case OP_SCBRAPOS:
2206
    stack_restore = TRUE;
2207
    if (common->capture_last_ptr != 0 && !capture_last_found)
2208
      {
2209
      length += 2;
2210
      capture_last_found = TRUE;
2211
      }
2212
    length += 3;
2213
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2214
    break;
2215
2216
    case OP_THEN:
2217
    stack_restore = TRUE;
2218
    if (common->control_head_ptr != 0)
2219
      *needs_control_head = TRUE;
2220
    cc ++;
2221
    break;
2222
2223
    default:
2224
    stack_restore = TRUE;
2225
    /* Fall through. */
2226
2227
    case OP_NOT_WORD_BOUNDARY:
2228
    case OP_WORD_BOUNDARY:
2229
    case OP_NOT_DIGIT:
2230
    case OP_DIGIT:
2231
    case OP_NOT_WHITESPACE:
2232
    case OP_WHITESPACE:
2233
    case OP_NOT_WORDCHAR:
2234
    case OP_WORDCHAR:
2235
    case OP_ANY:
2236
    case OP_ALLANY:
2237
    case OP_ANYBYTE:
2238
    case OP_NOTPROP:
2239
    case OP_PROP:
2240
    case OP_ANYNL:
2241
    case OP_NOT_HSPACE:
2242
    case OP_HSPACE:
2243
    case OP_NOT_VSPACE:
2244
    case OP_VSPACE:
2245
    case OP_EXTUNI:
2246
    case OP_EODN:
2247
    case OP_EOD:
2248
    case OP_CIRC:
2249
    case OP_CIRCM:
2250
    case OP_DOLL:
2251
    case OP_DOLLM:
2252
    case OP_CHAR:
2253
    case OP_CHARI:
2254
    case OP_NOT:
2255
    case OP_NOTI:
2256
2257
    case OP_EXACT:
2258
    case OP_POSSTAR:
2259
    case OP_POSPLUS:
2260
    case OP_POSQUERY:
2261
    case OP_POSUPTO:
2262
2263
    case OP_EXACTI:
2264
    case OP_POSSTARI:
2265
    case OP_POSPLUSI:
2266
    case OP_POSQUERYI:
2267
    case OP_POSUPTOI:
2268
2269
    case OP_NOTEXACT:
2270
    case OP_NOTPOSSTAR:
2271
    case OP_NOTPOSPLUS:
2272
    case OP_NOTPOSQUERY:
2273
    case OP_NOTPOSUPTO:
2274
2275
    case OP_NOTEXACTI:
2276
    case OP_NOTPOSSTARI:
2277
    case OP_NOTPOSPLUSI:
2278
    case OP_NOTPOSQUERYI:
2279
    case OP_NOTPOSUPTOI:
2280
2281
    case OP_TYPEEXACT:
2282
    case OP_TYPEPOSSTAR:
2283
    case OP_TYPEPOSPLUS:
2284
    case OP_TYPEPOSQUERY:
2285
    case OP_TYPEPOSUPTO:
2286
2287
    case OP_CLASS:
2288
    case OP_NCLASS:
2289
    case OP_XCLASS:
2290
    case OP_ECLASS:
2291
2292
    case OP_CALLOUT:
2293
    case OP_CALLOUT_STR:
2294
2295
    case OP_NOT_UCP_WORD_BOUNDARY:
2296
    case OP_UCP_WORD_BOUNDARY:
2297
2298
    cc = next_opcode(common, cc);
2299
    SLJIT_ASSERT(cc != NULL);
2300
    break;
2301
    }
2302
2303
/* Possessive quantifiers can use a special case. */
2304
if (SLJIT_UNLIKELY(possessive == length))
2305
  return stack_restore ? no_frame : no_stack;
2306
2307
if (length > 0)
2308
  return length + 1;
2309
return stack_restore ? no_frame : no_stack;
2310
}
2311
2312
static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2313
{
2314
DEFINE_COMPILER;
2315
BOOL setsom_found = FALSE;
2316
BOOL setmark_found = FALSE;
2317
/* The last capture is a local variable even for recursions. */
2318
BOOL capture_last_found = FALSE;
2319
int offset;
2320
2321
/* >= 1 + shortest item size (2) */
2322
SLJIT_UNUSED_ARG(stacktop);
2323
SLJIT_ASSERT(stackpos >= stacktop + 2);
2324
2325
stackpos = STACK(stackpos);
2326
if (ccend == NULL)
2327
  {
2328
  ccend = bracketend(cc) - (1 + LINK_SIZE);
2329
  if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2330
    cc = next_opcode(common, cc);
2331
  }
2332
2333
/* The data is restored by do_revertframes(). */
2334
SLJIT_ASSERT(cc != NULL);
2335
while (cc < ccend)
2336
  switch(*cc)
2337
    {
2338
    case OP_SET_SOM:
2339
    SLJIT_ASSERT(common->has_set_som);
2340
    if (!setsom_found)
2341
      {
2342
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2343
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2344
      stackpos -= SSIZE_OF(sw);
2345
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2346
      stackpos -= SSIZE_OF(sw);
2347
      setsom_found = TRUE;
2348
      }
2349
    cc += 1;
2350
    break;
2351
2352
    case OP_MARK:
2353
    case OP_COMMIT_ARG:
2354
    case OP_PRUNE_ARG:
2355
    case OP_THEN_ARG:
2356
    SLJIT_ASSERT(common->mark_ptr != 0);
2357
    if (!setmark_found)
2358
      {
2359
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2360
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2361
      stackpos -= SSIZE_OF(sw);
2362
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2363
      stackpos -= SSIZE_OF(sw);
2364
      setmark_found = TRUE;
2365
      }
2366
    cc += 1 + 2 + cc[1];
2367
    break;
2368
2369
    case OP_RECURSE:
2370
    if (common->has_set_som && !setsom_found)
2371
      {
2372
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2373
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2374
      stackpos -= SSIZE_OF(sw);
2375
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2376
      stackpos -= SSIZE_OF(sw);
2377
      setsom_found = TRUE;
2378
      }
2379
    if (common->mark_ptr != 0 && !setmark_found)
2380
      {
2381
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2382
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2383
      stackpos -= SSIZE_OF(sw);
2384
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2385
      stackpos -= SSIZE_OF(sw);
2386
      setmark_found = TRUE;
2387
      }
2388
    if (common->capture_last_ptr != 0 && !capture_last_found)
2389
      {
2390
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2391
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2392
      stackpos -= SSIZE_OF(sw);
2393
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2394
      stackpos -= SSIZE_OF(sw);
2395
      capture_last_found = TRUE;
2396
      }
2397
    cc += 1 + LINK_SIZE;
2398
    break;
2399
2400
    case OP_CBRA:
2401
    case OP_CBRAPOS:
2402
    case OP_SCBRA:
2403
    case OP_SCBRAPOS:
2404
    if (common->capture_last_ptr != 0 && !capture_last_found)
2405
      {
2406
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2407
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2408
      stackpos -= SSIZE_OF(sw);
2409
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2410
      stackpos -= SSIZE_OF(sw);
2411
      capture_last_found = TRUE;
2412
      }
2413
    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2414
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2415
    stackpos -= SSIZE_OF(sw);
2416
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2417
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2418
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2419
    stackpos -= SSIZE_OF(sw);
2420
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2421
    stackpos -= SSIZE_OF(sw);
2422
2423
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2424
    break;
2425
2426
    default:
2427
    cc = next_opcode(common, cc);
2428
    SLJIT_ASSERT(cc != NULL);
2429
    break;
2430
    }
2431
2432
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2433
SLJIT_ASSERT(stackpos == STACK(stacktop));
2434
}
2435
2436
#define RECURSE_TMP_REG_COUNT 3
2437
2438
typedef struct delayed_mem_copy_status {
2439
  struct sljit_compiler *compiler;
2440
  int store_bases[RECURSE_TMP_REG_COUNT];
2441
  int store_offsets[RECURSE_TMP_REG_COUNT];
2442
  int tmp_regs[RECURSE_TMP_REG_COUNT];
2443
  int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2444
  int next_tmp_reg;
2445
} delayed_mem_copy_status;
2446
2447
static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2448
{
2449
int i;
2450
2451
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2452
  {
2453
  SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2454
  SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2455
2456
  status->store_bases[i] = -1;
2457
  }
2458
status->next_tmp_reg = 0;
2459
status->compiler = common->compiler;
2460
}
2461
2462
static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2463
  int store_base, sljit_sw store_offset)
2464
{
2465
struct sljit_compiler *compiler = status->compiler;
2466
int next_tmp_reg = status->next_tmp_reg;
2467
int tmp_reg = status->tmp_regs[next_tmp_reg];
2468
2469
SLJIT_ASSERT(load_base > 0 && store_base > 0);
2470
2471
if (status->store_bases[next_tmp_reg] == -1)
2472
  {
2473
  /* Preserve virtual registers. */
2474
  if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
2475
    OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2476
  }
2477
else
2478
  OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2479
2480
OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2481
status->store_bases[next_tmp_reg] = store_base;
2482
status->store_offsets[next_tmp_reg] = store_offset;
2483
2484
status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2485
}
2486
2487
static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2488
{
2489
struct sljit_compiler *compiler = status->compiler;
2490
int next_tmp_reg = status->next_tmp_reg;
2491
int tmp_reg, saved_tmp_reg, i;
2492
2493
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2494
  {
2495
  if (status->store_bases[next_tmp_reg] != -1)
2496
    {
2497
    tmp_reg = status->tmp_regs[next_tmp_reg];
2498
    saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2499
2500
    OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2501
2502
    /* Restore virtual registers. */
2503
    if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)
2504
      OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2505
    }
2506
2507
  next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2508
  }
2509
}
2510
2511
#undef RECURSE_TMP_REG_COUNT
2512
2513
static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2514
{
2515
uint8_t *byte;
2516
uint8_t mask;
2517
2518
SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2519
2520
bit_index >>= SLJIT_WORD_SHIFT;
2521
2522
SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2523
2524
mask = 1 << (bit_index & 0x7);
2525
byte = common->recurse_bitset + (bit_index >> 3);
2526
2527
if (*byte & mask)
2528
  return FALSE;
2529
2530
*byte |= mask;
2531
return TRUE;
2532
}
2533
2534
enum get_recurse_flags {
2535
  recurse_flag_quit_found = (1 << 0),
2536
  recurse_flag_accept_found = (1 << 1),
2537
  recurse_flag_setsom_found = (1 << 2),
2538
  recurse_flag_setmark_found = (1 << 3),
2539
  recurse_flag_control_head_found = (1 << 4),
2540
};
2541
2542
static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2543
{
2544
int length = 1;
2545
int size, offset;
2546
PCRE2_SPTR alternative;
2547
uint32_t recurse_flags = 0;
2548
2549
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2550
2551
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2552
SLJIT_ASSERT(common->control_head_ptr != 0);
2553
recurse_flags |= recurse_flag_control_head_found;
2554
#endif
2555
2556
/* Calculate the sum of the private machine words. */
2557
while (cc < ccend)
2558
  {
2559
  size = 0;
2560
  switch(*cc)
2561
    {
2562
    case OP_SET_SOM:
2563
    SLJIT_ASSERT(common->has_set_som);
2564
    recurse_flags |= recurse_flag_setsom_found;
2565
    cc += 1;
2566
    break;
2567
2568
    case OP_RECURSE:
2569
    if (common->has_set_som)
2570
      recurse_flags |= recurse_flag_setsom_found;
2571
    if (common->mark_ptr != 0)
2572
      recurse_flags |= recurse_flag_setmark_found;
2573
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2574
      length++;
2575
    cc += 1 + LINK_SIZE;
2576
    break;
2577
2578
    case OP_KET:
2579
    offset = PRIVATE_DATA(cc);
2580
    if (offset != 0)
2581
      {
2582
      if (recurse_check_bit(common, offset))
2583
        length++;
2584
      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2585
      cc += PRIVATE_DATA(cc + 1);
2586
      }
2587
    cc += 1 + LINK_SIZE;
2588
    break;
2589
2590
    case OP_ASSERT:
2591
    case OP_ASSERT_NOT:
2592
    case OP_ASSERTBACK:
2593
    case OP_ASSERTBACK_NOT:
2594
    case OP_ASSERT_NA:
2595
    case OP_ASSERTBACK_NA:
2596
    case OP_ONCE:
2597
    case OP_SCRIPT_RUN:
2598
    case OP_BRAPOS:
2599
    case OP_SBRA:
2600
    case OP_SBRAPOS:
2601
    case OP_SCOND:
2602
    SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2603
    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2604
      length++;
2605
    cc += 1 + LINK_SIZE;
2606
    break;
2607
2608
    case OP_ASSERT_SCS:
2609
    SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2610
    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2611
      length += 2;
2612
    cc += 1 + LINK_SIZE;
2613
    break;
2614
2615
    case OP_CBRA:
2616
    case OP_SCBRA:
2617
    offset = GET2(cc, 1 + LINK_SIZE);
2618
    if (recurse_check_bit(common, OVECTOR(offset << 1)))
2619
      {
2620
      SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2621
      length += 2;
2622
      }
2623
    if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2624
      length++;
2625
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2626
      length++;
2627
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2628
    break;
2629
2630
    case OP_CBRAPOS:
2631
    case OP_SCBRAPOS:
2632
    offset = GET2(cc, 1 + LINK_SIZE);
2633
    if (recurse_check_bit(common, OVECTOR(offset << 1)))
2634
      {
2635
      SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2636
      length += 2;
2637
      }
2638
    if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2639
      length++;
2640
    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2641
      length++;
2642
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2643
      length++;
2644
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2645
    break;
2646
2647
    case OP_COND:
2648
    /* Might be a hidden SCOND. */
2649
    alternative = cc + GET(cc, 1);
2650
    if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2651
      length++;
2652
    cc += 1 + LINK_SIZE;
2653
    break;
2654
2655
    CASE_ITERATOR_PRIVATE_DATA_1
2656
    offset = PRIVATE_DATA(cc);
2657
    if (offset != 0 && recurse_check_bit(common, offset))
2658
      length++;
2659
    cc += 2;
2660
#ifdef SUPPORT_UNICODE
2661
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2662
#endif
2663
    break;
2664
2665
    CASE_ITERATOR_PRIVATE_DATA_2A
2666
    offset = PRIVATE_DATA(cc);
2667
    if (offset != 0 && recurse_check_bit(common, offset))
2668
      {
2669
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2670
      length += 2;
2671
      }
2672
    cc += 2;
2673
#ifdef SUPPORT_UNICODE
2674
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2675
#endif
2676
    break;
2677
2678
    CASE_ITERATOR_PRIVATE_DATA_2B
2679
    offset = PRIVATE_DATA(cc);
2680
    if (offset != 0 && recurse_check_bit(common, offset))
2681
      {
2682
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2683
      length += 2;
2684
      }
2685
    cc += 2 + IMM2_SIZE;
2686
#ifdef SUPPORT_UNICODE
2687
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2688
#endif
2689
    break;
2690
2691
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2692
    offset = PRIVATE_DATA(cc);
2693
    if (offset != 0 && recurse_check_bit(common, offset))
2694
      length++;
2695
    cc += 1;
2696
    break;
2697
2698
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2699
    offset = PRIVATE_DATA(cc);
2700
    if (offset != 0 && recurse_check_bit(common, offset))
2701
      {
2702
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2703
      length += 2;
2704
      }
2705
    cc += 1;
2706
    break;
2707
2708
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2709
    offset = PRIVATE_DATA(cc);
2710
    if (offset != 0 && recurse_check_bit(common, offset))
2711
      {
2712
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2713
      length += 2;
2714
      }
2715
    cc += 1 + IMM2_SIZE;
2716
    break;
2717
2718
    case OP_CLASS:
2719
    case OP_NCLASS:
2720
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2721
    case OP_XCLASS:
2722
    case OP_ECLASS:
2723
    size = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2724
#else
2725
    size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2726
#endif
2727
2728
    offset = PRIVATE_DATA(cc);
2729
    if (offset != 0 && recurse_check_bit(common, offset))
2730
      length += get_class_iterator_size(cc + size);
2731
    cc += size;
2732
    break;
2733
2734
    case OP_MARK:
2735
    case OP_COMMIT_ARG:
2736
    case OP_PRUNE_ARG:
2737
    case OP_THEN_ARG:
2738
    SLJIT_ASSERT(common->mark_ptr != 0);
2739
    recurse_flags |= recurse_flag_setmark_found;
2740
    if (common->control_head_ptr != 0)
2741
      recurse_flags |= recurse_flag_control_head_found;
2742
    if (*cc != OP_MARK)
2743
      recurse_flags |= recurse_flag_quit_found;
2744
2745
    cc += 1 + 2 + cc[1];
2746
    break;
2747
2748
    case OP_PRUNE:
2749
    case OP_SKIP:
2750
    case OP_COMMIT:
2751
    recurse_flags |= recurse_flag_quit_found;
2752
    cc++;
2753
    break;
2754
2755
    case OP_SKIP_ARG:
2756
    recurse_flags |= recurse_flag_quit_found;
2757
    cc += 1 + 2 + cc[1];
2758
    break;
2759
2760
    case OP_THEN:
2761
    SLJIT_ASSERT(common->control_head_ptr != 0);
2762
    recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2763
    cc++;
2764
    break;
2765
2766
    case OP_ACCEPT:
2767
    case OP_ASSERT_ACCEPT:
2768
    recurse_flags |= recurse_flag_accept_found;
2769
    cc++;
2770
    break;
2771
2772
    default:
2773
    cc = next_opcode(common, cc);
2774
    SLJIT_ASSERT(cc != NULL);
2775
    break;
2776
    }
2777
  }
2778
SLJIT_ASSERT(cc == ccend);
2779
2780
if (recurse_flags & recurse_flag_control_head_found)
2781
  length++;
2782
if (recurse_flags & recurse_flag_quit_found)
2783
  {
2784
  if (recurse_flags & recurse_flag_setsom_found)
2785
    length++;
2786
  if (recurse_flags & recurse_flag_setmark_found)
2787
    length++;
2788
  }
2789
2790
*result_flags = recurse_flags;
2791
return length;
2792
}
2793
2794
enum copy_recurse_data_types {
2795
  recurse_copy_from_global,
2796
  recurse_copy_private_to_global,
2797
  recurse_copy_shared_to_global,
2798
  recurse_copy_kept_shared_to_global,
2799
  recurse_swap_global
2800
};
2801
2802
static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2803
  int type, int stackptr, int stacktop, uint32_t recurse_flags)
2804
{
2805
delayed_mem_copy_status status;
2806
PCRE2_SPTR alternative;
2807
sljit_sw private_srcw[2];
2808
sljit_sw shared_srcw[3];
2809
sljit_sw kept_shared_srcw[2];
2810
int private_count, shared_count, kept_shared_count;
2811
int from_sp, base_reg, offset, i;
2812
2813
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2814
2815
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2816
SLJIT_ASSERT(common->control_head_ptr != 0);
2817
recurse_check_bit(common, common->control_head_ptr);
2818
#endif
2819
2820
switch (type)
2821
  {
2822
  case recurse_copy_from_global:
2823
  from_sp = TRUE;
2824
  base_reg = STACK_TOP;
2825
  break;
2826
2827
  case recurse_copy_private_to_global:
2828
  case recurse_copy_shared_to_global:
2829
  case recurse_copy_kept_shared_to_global:
2830
  from_sp = FALSE;
2831
  base_reg = STACK_TOP;
2832
  break;
2833
2834
  default:
2835
  SLJIT_ASSERT(type == recurse_swap_global);
2836
  from_sp = FALSE;
2837
  base_reg = TMP2;
2838
  break;
2839
  }
2840
2841
stackptr = STACK(stackptr);
2842
stacktop = STACK(stacktop);
2843
2844
status.tmp_regs[0] = TMP1;
2845
status.saved_tmp_regs[0] = TMP1;
2846
2847
if (base_reg != TMP2)
2848
  {
2849
  status.tmp_regs[1] = TMP2;
2850
  status.saved_tmp_regs[1] = TMP2;
2851
  }
2852
else
2853
  {
2854
  status.saved_tmp_regs[1] = RETURN_ADDR;
2855
  if (HAS_VIRTUAL_REGISTERS)
2856
    status.tmp_regs[1] = STR_PTR;
2857
  else
2858
    status.tmp_regs[1] = RETURN_ADDR;
2859
  }
2860
2861
status.saved_tmp_regs[2] = TMP3;
2862
if (HAS_VIRTUAL_REGISTERS)
2863
  status.tmp_regs[2] = STR_END;
2864
else
2865
  status.tmp_regs[2] = TMP3;
2866
2867
delayed_mem_copy_init(&status, common);
2868
2869
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2870
  {
2871
  SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2872
2873
  if (!from_sp)
2874
    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2875
2876
  if (from_sp || type == recurse_swap_global)
2877
    delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2878
  }
2879
2880
stackptr += sizeof(sljit_sw);
2881
2882
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2883
if (type != recurse_copy_shared_to_global)
2884
  {
2885
  if (!from_sp)
2886
    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2887
2888
  if (from_sp || type == recurse_swap_global)
2889
    delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2890
  }
2891
2892
stackptr += sizeof(sljit_sw);
2893
#endif
2894
2895
while (cc < ccend)
2896
  {
2897
  private_count = 0;
2898
  shared_count = 0;
2899
  kept_shared_count = 0;
2900
2901
  switch(*cc)
2902
    {
2903
    case OP_SET_SOM:
2904
    SLJIT_ASSERT(common->has_set_som);
2905
    if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
2906
      {
2907
      kept_shared_srcw[0] = OVECTOR(0);
2908
      kept_shared_count = 1;
2909
      }
2910
    cc += 1;
2911
    break;
2912
2913
    case OP_RECURSE:
2914
    if (recurse_flags & recurse_flag_quit_found)
2915
      {
2916
      if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2917
        {
2918
        kept_shared_srcw[0] = OVECTOR(0);
2919
        kept_shared_count = 1;
2920
        }
2921
      if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2922
        {
2923
        kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2924
        kept_shared_count++;
2925
        }
2926
      }
2927
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2928
      {
2929
      shared_srcw[0] = common->capture_last_ptr;
2930
      shared_count = 1;
2931
      }
2932
    cc += 1 + LINK_SIZE;
2933
    break;
2934
2935
    case OP_KET:
2936
    private_srcw[0] = PRIVATE_DATA(cc);
2937
    if (private_srcw[0] != 0)
2938
      {
2939
      if (recurse_check_bit(common, private_srcw[0]))
2940
        private_count = 1;
2941
      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2942
      cc += PRIVATE_DATA(cc + 1);
2943
      }
2944
    cc += 1 + LINK_SIZE;
2945
    break;
2946
2947
    case OP_ASSERT:
2948
    case OP_ASSERT_NOT:
2949
    case OP_ASSERTBACK:
2950
    case OP_ASSERTBACK_NOT:
2951
    case OP_ASSERT_NA:
2952
    case OP_ASSERTBACK_NA:
2953
    case OP_ONCE:
2954
    case OP_SCRIPT_RUN:
2955
    case OP_BRAPOS:
2956
    case OP_SBRA:
2957
    case OP_SBRAPOS:
2958
    case OP_SCOND:
2959
    private_srcw[0] = PRIVATE_DATA(cc);
2960
    if (recurse_check_bit(common, private_srcw[0]))
2961
      private_count = 1;
2962
    cc += 1 + LINK_SIZE;
2963
    break;
2964
2965
    case OP_ASSERT_SCS:
2966
    private_srcw[0] = PRIVATE_DATA(cc);
2967
    private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2968
    if (recurse_check_bit(common, private_srcw[0]))
2969
      private_count = 2;
2970
    cc += 1 + LINK_SIZE;
2971
    break;
2972
2973
    case OP_CBRA:
2974
    case OP_SCBRA:
2975
    offset = GET2(cc, 1 + LINK_SIZE);
2976
    shared_srcw[0] = OVECTOR(offset << 1);
2977
    if (recurse_check_bit(common, shared_srcw[0]))
2978
      {
2979
      shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2980
      SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2981
      shared_count = 2;
2982
      }
2983
2984
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2985
      {
2986
      shared_srcw[shared_count] = common->capture_last_ptr;
2987
      shared_count++;
2988
      }
2989
2990
    if (common->optimized_cbracket[offset] == 0)
2991
      {
2992
      private_srcw[0] = OVECTOR_PRIV(offset);
2993
      if (recurse_check_bit(common, private_srcw[0]))
2994
        private_count = 1;
2995
      }
2996
2997
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2998
    break;
2999
3000
    case OP_CBRAPOS:
3001
    case OP_SCBRAPOS:
3002
    offset = GET2(cc, 1 + LINK_SIZE);
3003
    shared_srcw[0] = OVECTOR(offset << 1);
3004
    if (recurse_check_bit(common, shared_srcw[0]))
3005
      {
3006
      shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
3007
      SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
3008
      shared_count = 2;
3009
      }
3010
3011
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
3012
      {
3013
      shared_srcw[shared_count] = common->capture_last_ptr;
3014
      shared_count++;
3015
      }
3016
3017
    private_srcw[0] = PRIVATE_DATA(cc);
3018
    if (recurse_check_bit(common, private_srcw[0]))
3019
      private_count = 1;
3020
3021
    offset = OVECTOR_PRIV(offset);
3022
    if (recurse_check_bit(common, offset))
3023
      {
3024
      private_srcw[private_count] = offset;
3025
      private_count++;
3026
      }
3027
    cc += 1 + LINK_SIZE + IMM2_SIZE;
3028
    break;
3029
3030
    case OP_COND:
3031
    /* Might be a hidden SCOND. */
3032
    alternative = cc + GET(cc, 1);
3033
    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
3034
      {
3035
      private_srcw[0] = PRIVATE_DATA(cc);
3036
      if (recurse_check_bit(common, private_srcw[0]))
3037
        private_count = 1;
3038
      }
3039
    cc += 1 + LINK_SIZE;
3040
    break;
3041
3042
    CASE_ITERATOR_PRIVATE_DATA_1
3043
    private_srcw[0] = PRIVATE_DATA(cc);
3044
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3045
      private_count = 1;
3046
    cc += 2;
3047
#ifdef SUPPORT_UNICODE
3048
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3049
#endif
3050
    break;
3051
3052
    CASE_ITERATOR_PRIVATE_DATA_2A
3053
    private_srcw[0] = PRIVATE_DATA(cc);
3054
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3055
      {
3056
      private_count = 2;
3057
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3058
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3059
      }
3060
    cc += 2;
3061
#ifdef SUPPORT_UNICODE
3062
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3063
#endif
3064
    break;
3065
3066
    CASE_ITERATOR_PRIVATE_DATA_2B
3067
    private_srcw[0] = PRIVATE_DATA(cc);
3068
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3069
      {
3070
      private_count = 2;
3071
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3072
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3073
      }
3074
    cc += 2 + IMM2_SIZE;
3075
#ifdef SUPPORT_UNICODE
3076
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3077
#endif
3078
    break;
3079
3080
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3081
    private_srcw[0] = PRIVATE_DATA(cc);
3082
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3083
      private_count = 1;
3084
    cc += 1;
3085
    break;
3086
3087
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3088
    private_srcw[0] = PRIVATE_DATA(cc);
3089
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3090
      {
3091
      private_count = 2;
3092
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3093
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3094
      }
3095
    cc += 1;
3096
    break;
3097
3098
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3099
    private_srcw[0] = PRIVATE_DATA(cc);
3100
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
3101
      {
3102
      private_count = 2;
3103
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3104
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3105
      }
3106
    cc += 1 + IMM2_SIZE;
3107
    break;
3108
3109
    case OP_CLASS:
3110
    case OP_NCLASS:
3111
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3112
    case OP_XCLASS:
3113
    case OP_ECLASS:
3114
    i = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3115
#else
3116
    i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3117
#endif
3118
    if (PRIVATE_DATA(cc) != 0)
3119
      {
3120
      private_count = 1;
3121
      private_srcw[0] = PRIVATE_DATA(cc);
3122
      switch(get_class_iterator_size(cc + i))
3123
        {
3124
        case 1:
3125
        break;
3126
3127
        case 2:
3128
        if (recurse_check_bit(common, private_srcw[0]))
3129
          {
3130
          private_count = 2;
3131
          private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3132
          SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3133
          }
3134
        break;
3135
3136
        default:
3137
        SLJIT_UNREACHABLE();
3138
        break;
3139
        }
3140
      }
3141
    cc += i;
3142
    break;
3143
3144
    case OP_MARK:
3145
    case OP_COMMIT_ARG:
3146
    case OP_PRUNE_ARG:
3147
    case OP_THEN_ARG:
3148
    SLJIT_ASSERT(common->mark_ptr != 0);
3149
    if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
3150
      {
3151
      kept_shared_srcw[0] = common->mark_ptr;
3152
      kept_shared_count = 1;
3153
      }
3154
    if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
3155
      {
3156
      private_srcw[0] = common->control_head_ptr;
3157
      private_count = 1;
3158
      }
3159
    cc += 1 + 2 + cc[1];
3160
    break;
3161
3162
    case OP_THEN:
3163
    SLJIT_ASSERT(common->control_head_ptr != 0);
3164
    if (recurse_check_bit(common, common->control_head_ptr))
3165
      {
3166
      private_srcw[0] = common->control_head_ptr;
3167
      private_count = 1;
3168
      }
3169
    cc++;
3170
    break;
3171
3172
    default:
3173
    cc = next_opcode(common, cc);
3174
    SLJIT_ASSERT(cc != NULL);
3175
    continue;
3176
    }
3177
3178
  if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
3179
    {
3180
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
3181
3182
    for (i = 0; i < private_count; i++)
3183
      {
3184
      SLJIT_ASSERT(private_srcw[i] != 0);
3185
3186
      if (!from_sp)
3187
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
3188
3189
      if (from_sp || type == recurse_swap_global)
3190
        delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
3191
3192
      stackptr += sizeof(sljit_sw);
3193
      }
3194
    }
3195
  else
3196
    stackptr += sizeof(sljit_sw) * private_count;
3197
3198
  if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
3199
    {
3200
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
3201
3202
    for (i = 0; i < shared_count; i++)
3203
      {
3204
      SLJIT_ASSERT(shared_srcw[i] != 0);
3205
3206
      if (!from_sp)
3207
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3208
3209
      if (from_sp || type == recurse_swap_global)
3210
        delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3211
3212
      stackptr += sizeof(sljit_sw);
3213
      }
3214
    }
3215
  else
3216
    stackptr += sizeof(sljit_sw) * shared_count;
3217
3218
  if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3219
    {
3220
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3221
3222
    for (i = 0; i < kept_shared_count; i++)
3223
      {
3224
      SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3225
3226
      if (!from_sp)
3227
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3228
3229
      if (from_sp || type == recurse_swap_global)
3230
        delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3231
3232
      stackptr += sizeof(sljit_sw);
3233
      }
3234
    }
3235
  else
3236
    stackptr += sizeof(sljit_sw) * kept_shared_count;
3237
  }
3238
3239
SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3240
3241
delayed_mem_copy_finish(&status);
3242
}
3243
3244
static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3245
{
3246
PCRE2_SPTR end = bracketend(cc);
3247
BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3248
3249
/* Assert captures *THEN verb even if it has no alternatives. */
3250
if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
3251
  current_offset = NULL;
3252
else if (*cc >= OP_ASSERT_NA && *cc <= OP_ASSERT_SCS)
3253
  has_alternatives = TRUE;
3254
/* Conditional block does never capture. */
3255
else if (*cc == OP_COND || *cc == OP_SCOND)
3256
  has_alternatives = FALSE;
3257
3258
cc = next_opcode(common, cc);
3259
3260
if (has_alternatives)
3261
  {
3262
  switch (*cc)
3263
    {
3264
    case OP_REVERSE:
3265
    case OP_CREF:
3266
      cc += 1 + IMM2_SIZE;
3267
      break;
3268
    case OP_VREVERSE:
3269
    case OP_DNCREF:
3270
      cc += 1 + 2 * IMM2_SIZE;
3271
      break;
3272
    }
3273
3274
  current_offset = common->then_offsets + (cc - common->start);
3275
  }
3276
3277
while (cc < end)
3278
  {
3279
  if (*cc >= OP_ASSERT && *cc <= OP_SCOND)
3280
    {
3281
    cc = set_then_offsets(common, cc, current_offset);
3282
    continue;
3283
    }
3284
3285
  if (*cc == OP_ALT && has_alternatives)
3286
    {
3287
    cc += 1 + LINK_SIZE;
3288
3289
    if (*cc == OP_REVERSE)
3290
      cc += 1 + IMM2_SIZE;
3291
    else if (*cc == OP_VREVERSE)
3292
      cc += 1 + 2 * IMM2_SIZE;
3293
3294
    current_offset = common->then_offsets + (cc - common->start);
3295
    continue;
3296
    }
3297
3298
  if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3299
    *current_offset = 1;
3300
  cc = next_opcode(common, cc);
3301
  }
3302
3303
cc = end - 1 - LINK_SIZE;
3304
3305
/* Ignore repeats. */
3306
if (*cc == OP_KET && PRIVATE_DATA(cc) != 0)
3307
  end += PRIVATE_DATA(cc + 1);
3308
3309
return end;
3310
}
3311
3312
#undef CASE_ITERATOR_PRIVATE_DATA_1
3313
#undef CASE_ITERATOR_PRIVATE_DATA_2A
3314
#undef CASE_ITERATOR_PRIVATE_DATA_2B
3315
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3316
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3317
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3318
3319
static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3320
{
3321
return (value & (value - 1)) == 0;
3322
}
3323
3324
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3325
{
3326
while (list != NULL)
3327
  {
3328
  /* sljit_set_label is clever enough to do nothing
3329
  if either the jump or the label is NULL. */
3330
  SET_LABEL(list->jump, label);
3331
  list = list->next;
3332
  }
3333
}
3334
3335
static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3336
{
3337
jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3338
if (list_item)
3339
  {
3340
  list_item->next = *list;
3341
  list_item->jump = jump;
3342
  *list = list_item;
3343
  }
3344
}
3345
3346
static void add_stub(compiler_common *common, struct sljit_jump *start)
3347
{
3348
DEFINE_COMPILER;
3349
stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3350
3351
if (list_item)
3352
  {
3353
  list_item->start = start;
3354
  list_item->quit = LABEL();
3355
  list_item->next = common->stubs;
3356
  common->stubs = list_item;
3357
  }
3358
}
3359
3360
static void flush_stubs(compiler_common *common)
3361
{
3362
DEFINE_COMPILER;
3363
stub_list *list_item = common->stubs;
3364
3365
while (list_item)
3366
  {
3367
  JUMPHERE(list_item->start);
3368
  add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3369
  JUMPTO(SLJIT_JUMP, list_item->quit);
3370
  list_item = list_item->next;
3371
  }
3372
common->stubs = NULL;
3373
}
3374
3375
static SLJIT_INLINE void count_match(compiler_common *common)
3376
{
3377
DEFINE_COMPILER;
3378
3379
OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3380
add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3381
}
3382
3383
static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3384
{
3385
/* May destroy all locals and registers except TMP2. */
3386
DEFINE_COMPILER;
3387
3388
SLJIT_ASSERT(size > 0);
3389
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3390
#ifdef DESTROY_REGISTERS
3391
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3392
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3393
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3394
#if defined SLJIT_DEBUG && SLJIT_DEBUG
3395
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
3396
/* These two are also used by the stackalloc calls. */
3397
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, TMP1, 0);
3398
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP1, 0);
3399
#endif
3400
#endif
3401
add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3402
}
3403
3404
static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3405
{
3406
DEFINE_COMPILER;
3407
3408
SLJIT_ASSERT(size > 0);
3409
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3410
}
3411
3412
static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3413
{
3414
DEFINE_COMPILER;
3415
sljit_uw *result;
3416
3417
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3418
  return NULL;
3419
3420
result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3421
if (SLJIT_UNLIKELY(result == NULL))
3422
  {
3423
  sljit_set_compiler_memory_error(compiler);
3424
  return NULL;
3425
  }
3426
3427
*(void**)result = common->read_only_data_head;
3428
common->read_only_data_head = (void *)result;
3429
return result + 1;
3430
}
3431
3432
static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3433
{
3434
DEFINE_COMPILER;
3435
struct sljit_label *loop;
3436
sljit_s32 i;
3437
3438
/* At this point we can freely use all temporary registers. */
3439
SLJIT_ASSERT(length > 1);
3440
/* TMP1 returns with begin - 1. */
3441
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3442
if (length < 8)
3443
  {
3444
  for (i = 1; i < length; i++)
3445
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3446
  }
3447
else
3448
  {
3449
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3450
    {
3451
    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3452
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3453
    loop = LABEL();
3454
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3455
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3456
    JUMPTO(SLJIT_NOT_ZERO, loop);
3457
    }
3458
  else
3459
    {
3460
    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3461
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3462
    loop = LABEL();
3463
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3464
    OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3465
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3466
    JUMPTO(SLJIT_NOT_ZERO, loop);
3467
    }
3468
  }
3469
}
3470
3471
static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3472
{
3473
DEFINE_COMPILER;
3474
sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3475
sljit_u32 uncleared_size;
3476
sljit_s32 src = SLJIT_IMM;
3477
sljit_s32 i;
3478
struct sljit_label *loop;
3479
3480
SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3481
3482
if (size == sizeof(sljit_sw))
3483
  {
3484
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3485
  return;
3486
  }
3487
3488
if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3489
  {
3490
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3491
  src = TMP3;
3492
  }
3493
3494
if (size <= 6 * sizeof(sljit_sw))
3495
  {
3496
  for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3497
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3498
  return;
3499
  }
3500
3501
GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3502
3503
uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3504
3505
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3506
3507
loop = LABEL();
3508
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3509
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3510
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
3511
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
3512
CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3513
3514
if (uncleared_size >= sizeof(sljit_sw))
3515
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3516
3517
if (uncleared_size >= 2 * sizeof(sljit_sw))
3518
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3519
}
3520
3521
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3522
{
3523
DEFINE_COMPILER;
3524
struct sljit_label *loop;
3525
int i;
3526
3527
SLJIT_ASSERT(length > 1);
3528
/* OVECTOR(1) contains the "string begin - 1" constant. */
3529
if (length > 2)
3530
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3531
if (length < 8)
3532
  {
3533
  for (i = 2; i < length; i++)
3534
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3535
  }
3536
else
3537
  {
3538
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3539
    {
3540
    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3541
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3542
    loop = LABEL();
3543
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3544
    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3545
    JUMPTO(SLJIT_NOT_ZERO, loop);
3546
    }
3547
  else
3548
    {
3549
    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3550
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3551
    loop = LABEL();
3552
    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3553
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3554
    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3555
    JUMPTO(SLJIT_NOT_ZERO, loop);
3556
    }
3557
  }
3558
3559
if (!HAS_VIRTUAL_REGISTERS)
3560
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3561
else
3562
  OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3563
3564
if (common->mark_ptr != 0)
3565
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3566
if (common->control_head_ptr != 0)
3567
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3568
if (HAS_VIRTUAL_REGISTERS)
3569
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3570
3571
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3572
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3573
}
3574
3575
static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3576
{
3577
while (current != NULL)
3578
  {
3579
  switch (current[1])
3580
    {
3581
    case type_then_trap:
3582
    break;
3583
3584
    case type_mark:
3585
    if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3586
      return current[3];
3587
    break;
3588
3589
    default:
3590
    SLJIT_UNREACHABLE();
3591
    break;
3592
    }
3593
  SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3594
  current = (sljit_sw*)current[0];
3595
  }
3596
return 0;
3597
}
3598
3599
static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3600
{
3601
DEFINE_COMPILER;
3602
struct sljit_label *loop;
3603
BOOL has_pre;
3604
3605
/* At this point we can freely use all registers. */
3606
OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3607
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3608
3609
if (HAS_VIRTUAL_REGISTERS)
3610
  {
3611
  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3612
  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3613
  if (common->mark_ptr != 0)
3614
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3615
  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3616
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3617
  if (common->mark_ptr != 0)
3618
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3619
  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3620
    SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3621
  }
3622
else
3623
  {
3624
  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3625
  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3626
  if (common->mark_ptr != 0)
3627
    OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3628
  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3629
  OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3630
  if (common->mark_ptr != 0)
3631
    OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3632
  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3633
  }
3634
3635
has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3636
3637
GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3638
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3639
3640
loop = LABEL();
3641
3642
if (has_pre)
3643
  sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3644
else
3645
  {
3646
  OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3647
  OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3648
  }
3649
3650
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3651
OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3652
/* Copy the integer value to the output buffer */
3653
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3654
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3655
#endif
3656
3657
SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3658
OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3659
3660
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3661
JUMPTO(SLJIT_NOT_ZERO, loop);
3662
3663
/* Calculate the return value, which is the maximum ovector value. */
3664
if (topbracket > 1)
3665
  {
3666
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
3667
    {
3668
    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3669
    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3670
3671
    /* OVECTOR(0) is never equal to SLJIT_S2. */
3672
    loop = LABEL();
3673
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
3674
    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3675
    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3676
    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3677
    }
3678
  else
3679
    {
3680
    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3681
    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3682
3683
    /* OVECTOR(0) is never equal to SLJIT_S2. */
3684
    loop = LABEL();
3685
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3686
    OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
3687
    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3688
    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3689
    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3690
    }
3691
  }
3692
else
3693
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3694
}
3695
3696
static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3697
{
3698
DEFINE_COMPILER;
3699
sljit_s32 mov_opcode;
3700
sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3701
3702
SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3703
SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3704
  && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3705
3706
if (arguments_reg != ARGUMENTS)
3707
  OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3708
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3709
  common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3710
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3711
3712
/* Store match begin and end. */
3713
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3714
OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3715
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3716
3717
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3718
3719
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3720
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3721
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3722
#endif
3723
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3724
3725
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3726
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3727
OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3728
#endif
3729
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3730
3731
JUMPTO(SLJIT_JUMP, quit);
3732
}
3733
3734
static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3735
{
3736
/* May destroy TMP1. */
3737
DEFINE_COMPILER;
3738
struct sljit_jump *jump;
3739
3740
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3741
  {
3742
  /* The value of -1 must be kept for start_used_ptr! */
3743
  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3744
  /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3745
  is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3746
  jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3747
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3748
  JUMPHERE(jump);
3749
  }
3750
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3751
  {
3752
  jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3753
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3754
  JUMPHERE(jump);
3755
  }
3756
}
3757
3758
static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3759
{
3760
/* Detects if the character has an othercase. */
3761
unsigned int c;
3762
3763
#ifdef SUPPORT_UNICODE
3764
if (common->utf || common->ucp)
3765
  {
3766
  if (common->utf)
3767
    {
3768
    GETCHAR(c, cc);
3769
    }
3770
  else
3771
    c = *cc;
3772
3773
  if (c > 127)
3774
    return c != UCD_OTHERCASE(c);
3775
3776
  return common->fcc[c] != c;
3777
  }
3778
else
3779
#endif
3780
  c = *cc;
3781
return MAX_255(c) ? common->fcc[c] != c : FALSE;
3782
}
3783
3784
static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3785
{
3786
/* Returns with the othercase. */
3787
#ifdef SUPPORT_UNICODE
3788
if ((common->utf || common->ucp) && c > 127)
3789
  return UCD_OTHERCASE(c);
3790
#endif
3791
return TABLE_GET(c, common->fcc, c);
3792
}
3793
3794
static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3795
{
3796
/* Detects if the character and its othercase has only 1 bit difference. */
3797
unsigned int c, oc, bit;
3798
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3799
int n;
3800
#endif
3801
3802
#ifdef SUPPORT_UNICODE
3803
if (common->utf || common->ucp)
3804
  {
3805
  if (common->utf)
3806
    {
3807
    GETCHAR(c, cc);
3808
    }
3809
  else
3810
    c = *cc;
3811
3812
  if (c <= 127)
3813
    oc = common->fcc[c];
3814
  else
3815
    oc = UCD_OTHERCASE(c);
3816
  }
3817
else
3818
  {
3819
  c = *cc;
3820
  oc = TABLE_GET(c, common->fcc, c);
3821
  }
3822
#else
3823
c = *cc;
3824
oc = TABLE_GET(c, common->fcc, c);
3825
#endif
3826
3827
SLJIT_ASSERT(c != oc);
3828
3829
bit = c ^ oc;
3830
/* Optimized for English alphabet. */
3831
if (c <= 127 && bit == 0x20)
3832
  return (0 << 8) | 0x20;
3833
3834
/* Since c != oc, they must have at least 1 bit difference. */
3835
if (!is_powerof2(bit))
3836
  return 0;
3837
3838
#if PCRE2_CODE_UNIT_WIDTH == 8
3839
3840
#ifdef SUPPORT_UNICODE
3841
if (common->utf && c > 127)
3842
  {
3843
  n = GET_EXTRALEN(*cc);
3844
  while ((bit & 0x3f) == 0)
3845
    {
3846
    n--;
3847
    bit >>= 6;
3848
    }
3849
  return (n << 8) | bit;
3850
  }
3851
#endif /* SUPPORT_UNICODE */
3852
return (0 << 8) | bit;
3853
3854
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3855
3856
#ifdef SUPPORT_UNICODE
3857
if (common->utf && c > 65535)
3858
  {
3859
  if (bit >= (1u << 10))
3860
    bit >>= 10;
3861
  else
3862
    return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3863
  }
3864
#endif /* SUPPORT_UNICODE */
3865
return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3866
3867
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3868
}
3869
3870
static void check_partial(compiler_common *common, BOOL force)
3871
{
3872
/* Checks whether a partial matching is occurred. Does not modify registers. */
3873
DEFINE_COMPILER;
3874
struct sljit_jump *jump = NULL;
3875
3876
SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3877
3878
if (common->mode == PCRE2_JIT_COMPLETE)
3879
  return;
3880
3881
if (!force && !common->allow_empty_partial)
3882
  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3883
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3884
  jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3885
3886
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3887
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3888
else
3889
  {
3890
  if (common->partialmatchlabel != NULL)
3891
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3892
  else
3893
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3894
  }
3895
3896
if (jump != NULL)
3897
  JUMPHERE(jump);
3898
}
3899
3900
static void check_str_end(compiler_common *common, jump_list **end_reached)
3901
{
3902
/* Does not affect registers. Usually used in a tight spot. */
3903
DEFINE_COMPILER;
3904
struct sljit_jump *jump;
3905
3906
if (common->mode == PCRE2_JIT_COMPLETE)
3907
  {
3908
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3909
  return;
3910
  }
3911
3912
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3913
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3914
  {
3915
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3916
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3917
  add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3918
  }
3919
else
3920
  {
3921
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3922
  if (common->partialmatchlabel != NULL)
3923
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3924
  else
3925
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3926
  }
3927
JUMPHERE(jump);
3928
}
3929
3930
static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3931
{
3932
DEFINE_COMPILER;
3933
struct sljit_jump *jump;
3934
3935
if (common->mode == PCRE2_JIT_COMPLETE)
3936
  {
3937
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3938
  return;
3939
  }
3940
3941
/* Partial matching mode. */
3942
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3943
if (!common->allow_empty_partial)
3944
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3945
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3946
  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3947
3948
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3949
  {
3950
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3951
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3952
  }
3953
else
3954
  {
3955
  if (common->partialmatchlabel != NULL)
3956
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3957
  else
3958
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3959
  }
3960
JUMPHERE(jump);
3961
}
3962
3963
static void process_partial_match(compiler_common *common)
3964
{
3965
DEFINE_COMPILER;
3966
struct sljit_jump *jump;
3967
3968
/* Partial matching mode. */
3969
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3970
  {
3971
  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3972
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3973
  JUMPHERE(jump);
3974
  }
3975
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3976
  {
3977
  if (common->partialmatchlabel != NULL)
3978
    CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3979
  else
3980
    add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3981
  }
3982
}
3983
3984
static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3985
{
3986
DEFINE_COMPILER;
3987
3988
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3989
process_partial_match(common);
3990
}
3991
3992
static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3993
{
3994
/* Reads the character into TMP1, keeps STR_PTR.
3995
Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3996
DEFINE_COMPILER;
3997
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3998
struct sljit_jump *jump;
3999
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4000
4001
SLJIT_UNUSED_ARG(max);
4002
SLJIT_UNUSED_ARG(dst);
4003
SLJIT_UNUSED_ARG(dstw);
4004
SLJIT_UNUSED_ARG(backtracks);
4005
4006
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4007
4008
#ifdef SUPPORT_UNICODE
4009
#if PCRE2_CODE_UNIT_WIDTH == 8
4010
if (common->utf)
4011
  {
4012
  if (max < 128) return;
4013
4014
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4015
  OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
4016
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4017
  add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4018
  OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
4019
  if (backtracks && common->invalid_utf)
4020
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4021
  JUMPHERE(jump);
4022
  }
4023
#elif PCRE2_CODE_UNIT_WIDTH == 16
4024
if (common->utf)
4025
  {
4026
  if (max < 0xd800) return;
4027
4028
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4029
4030
  if (common->invalid_utf)
4031
    {
4032
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4033
    OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
4034
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4035
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4036
    OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
4037
    if (backtracks && common->invalid_utf)
4038
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4039
    }
4040
  else
4041
    {
4042
    /* TMP2 contains the high surrogate. */
4043
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4044
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4045
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4046
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4047
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4048
    }
4049
4050
  JUMPHERE(jump);
4051
  }
4052
#elif PCRE2_CODE_UNIT_WIDTH == 32
4053
if (common->invalid_utf)
4054
  {
4055
  if (max < 0xd800) return;
4056
4057
  if (backtracks != NULL)
4058
    {
4059
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4060
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4061
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4062
    }
4063
  else
4064
    {
4065
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4066
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4067
    SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4068
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4069
    SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4070
    }
4071
  }
4072
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4073
#endif /* SUPPORT_UNICODE */
4074
}
4075
4076
static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
4077
{
4078
/* Reads one character back without moving STR_PTR. TMP2 must
4079
contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
4080
DEFINE_COMPILER;
4081
4082
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4083
struct sljit_jump *jump;
4084
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
4085
4086
SLJIT_UNUSED_ARG(max);
4087
SLJIT_UNUSED_ARG(backtracks);
4088
4089
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4090
4091
#ifdef SUPPORT_UNICODE
4092
#if PCRE2_CODE_UNIT_WIDTH == 8
4093
if (common->utf)
4094
  {
4095
  if (max < 128) return;
4096
4097
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4098
  if (common->invalid_utf)
4099
    {
4100
    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
4101
    if (backtracks != NULL)
4102
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4103
    }
4104
  else
4105
    add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
4106
  JUMPHERE(jump);
4107
  }
4108
#elif PCRE2_CODE_UNIT_WIDTH == 16
4109
if (common->utf)
4110
  {
4111
  if (max < 0xd800) return;
4112
4113
  if (common->invalid_utf)
4114
    {
4115
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4116
    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
4117
    if (backtracks != NULL)
4118
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4119
    }
4120
  else
4121
    {
4122
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4123
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
4124
    /* TMP2 contains the low surrogate. */
4125
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4126
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4127
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4128
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
4129
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4130
    }
4131
    JUMPHERE(jump);
4132
  }
4133
#elif PCRE2_CODE_UNIT_WIDTH == 32
4134
if (common->invalid_utf)
4135
  {
4136
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4137
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4138
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4139
  }
4140
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4141
#endif /* SUPPORT_UNICODE */
4142
}
4143
4144
#define READ_CHAR_UPDATE_STR_PTR 0x1
4145
#define READ_CHAR_UTF8_NEWLINE 0x2
4146
#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
4147
#define READ_CHAR_VALID_UTF 0x4
4148
4149
static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
4150
  jump_list **backtracks, sljit_u32 options)
4151
{
4152
/* Reads the precise value of a character into TMP1, if the character is
4153
between min and max (c >= min && c <= max). Otherwise it returns with a value
4154
outside the range. Does not check STR_END. */
4155
DEFINE_COMPILER;
4156
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4157
struct sljit_jump *jump;
4158
#endif
4159
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4160
struct sljit_jump *jump2;
4161
#endif
4162
4163
SLJIT_UNUSED_ARG(min);
4164
SLJIT_UNUSED_ARG(max);
4165
SLJIT_UNUSED_ARG(backtracks);
4166
SLJIT_UNUSED_ARG(options);
4167
SLJIT_ASSERT(min <= max);
4168
4169
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4170
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4171
4172
#ifdef SUPPORT_UNICODE
4173
#if PCRE2_CODE_UNIT_WIDTH == 8
4174
if (common->utf)
4175
  {
4176
  if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4177
4178
  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4179
    {
4180
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4181
4182
    if (options & READ_CHAR_UTF8_NEWLINE)
4183
      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4184
    else
4185
      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4186
4187
    if (backtracks != NULL)
4188
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4189
    JUMPHERE(jump);
4190
    return;
4191
    }
4192
4193
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4194
  if (min >= 0x10000)
4195
    {
4196
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4197
    if (options & READ_CHAR_UPDATE_STR_PTR)
4198
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4199
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4200
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
4201
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4202
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4203
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4204
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4205
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4206
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4207
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4208
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4209
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4210
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4211
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4212
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4213
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4214
    JUMPHERE(jump2);
4215
    if (options & READ_CHAR_UPDATE_STR_PTR)
4216
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4217
    }
4218
  else if (min >= 0x800 && max <= 0xffff)
4219
    {
4220
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4221
    if (options & READ_CHAR_UPDATE_STR_PTR)
4222
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4223
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4224
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
4225
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4226
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4227
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4228
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4229
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4230
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4231
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4232
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4233
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4234
    JUMPHERE(jump2);
4235
    if (options & READ_CHAR_UPDATE_STR_PTR)
4236
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4237
    }
4238
  else if (max >= 0x800)
4239
    {
4240
    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4241
    }
4242
  else if (max < 128)
4243
    {
4244
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4245
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4246
    }
4247
  else
4248
    {
4249
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4250
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4251
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4252
    else
4253
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4254
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4255
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4256
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4257
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4258
    if (options & READ_CHAR_UPDATE_STR_PTR)
4259
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4260
    }
4261
  JUMPHERE(jump);
4262
  }
4263
#elif PCRE2_CODE_UNIT_WIDTH == 16
4264
if (common->utf)
4265
  {
4266
  if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4267
4268
  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4269
    {
4270
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4271
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4272
4273
    if (options & READ_CHAR_UTF8_NEWLINE)
4274
      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4275
    else
4276
      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4277
4278
    if (backtracks != NULL)
4279
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4280
    JUMPHERE(jump);
4281
    return;
4282
    }
4283
4284
  if (max >= 0x10000)
4285
    {
4286
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4287
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4288
    /* TMP2 contains the high surrogate. */
4289
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4290
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4291
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4292
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4293
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4294
    JUMPHERE(jump);
4295
    return;
4296
    }
4297
4298
  /* Skip low surrogate if necessary. */
4299
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4300
4301
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4302
    {
4303
    if (options & READ_CHAR_UPDATE_STR_PTR)
4304
      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4305
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4306
    if (options & READ_CHAR_UPDATE_STR_PTR)
4307
      SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4308
    if (max >= 0xd800)
4309
      SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);
4310
    }
4311
  else
4312
    {
4313
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4314
    if (options & READ_CHAR_UPDATE_STR_PTR)
4315
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4316
    if (max >= 0xd800)
4317
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4318
    JUMPHERE(jump);
4319
    }
4320
  }
4321
#elif PCRE2_CODE_UNIT_WIDTH == 32
4322
if (common->invalid_utf)
4323
  {
4324
  if (backtracks != NULL)
4325
    {
4326
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4327
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4328
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4329
    }
4330
  else
4331
    {
4332
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4333
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4334
    SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4335
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4336
    SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4337
    }
4338
  }
4339
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4340
#endif /* SUPPORT_UNICODE */
4341
}
4342
4343
static void skip_valid_char(compiler_common *common)
4344
{
4345
DEFINE_COMPILER;
4346
#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4347
struct sljit_jump *jump;
4348
#endif
4349
4350
#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4351
  if (common->utf)
4352
    {
4353
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4354
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4355
#if PCRE2_CODE_UNIT_WIDTH == 8
4356
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4357
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4358
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4359
#elif PCRE2_CODE_UNIT_WIDTH == 16
4360
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4361
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4362
    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
4363
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4364
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4365
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4366
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4367
    JUMPHERE(jump);
4368
    return;
4369
    }
4370
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
4371
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4372
}
4373
4374
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4375
4376
static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4377
{
4378
/* Tells whether the character codes below 128 are enough
4379
to determine a match. */
4380
const sljit_u8 value = nclass ? 0xff : 0;
4381
const sljit_u8 *end = bitset + 32;
4382
4383
bitset += 16;
4384
do
4385
  {
4386
  if (*bitset++ != value)
4387
    return FALSE;
4388
  }
4389
while (bitset < end);
4390
return TRUE;
4391
}
4392
4393
static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4394
{
4395
/* Reads the precise character type of a character into TMP1, if the character
4396
is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4397
full_read argument tells whether characters above max are accepted or not. */
4398
DEFINE_COMPILER;
4399
struct sljit_jump *jump;
4400
4401
SLJIT_ASSERT(common->utf);
4402
4403
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4404
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4405
4406
/* All values > 127 are zero in ctypes. */
4407
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4408
4409
if (negated)
4410
  {
4411
  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4412
4413
  if (common->invalid_utf)
4414
    {
4415
    OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);
4416
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4417
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4418
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4419
    }
4420
  else
4421
    {
4422
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4423
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4424
    }
4425
  JUMPHERE(jump);
4426
  }
4427
}
4428
4429
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4430
4431
static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4432
{
4433
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4434
DEFINE_COMPILER;
4435
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4436
struct sljit_jump *jump;
4437
#endif
4438
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4439
struct sljit_jump *jump2;
4440
#endif
4441
4442
SLJIT_UNUSED_ARG(backtracks);
4443
SLJIT_UNUSED_ARG(negated);
4444
4445
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4446
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4447
4448
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4449
if (common->utf)
4450
  {
4451
  /* The result of this read may be unused, but saves an "else" part. */
4452
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4453
  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4454
4455
  if (!negated)
4456
    {
4457
    if (common->invalid_utf)
4458
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4459
4460
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4461
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4462
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4463
    if (common->invalid_utf)
4464
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4465
4466
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4467
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4468
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4469
    if (common->invalid_utf)
4470
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4471
4472
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4473
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4474
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4475
    JUMPHERE(jump2);
4476
    }
4477
  else if (common->invalid_utf)
4478
    {
4479
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4480
    OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4481
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4482
4483
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4484
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4485
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4486
    JUMPHERE(jump2);
4487
    }
4488
  else
4489
    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4490
4491
  JUMPHERE(jump);
4492
  return;
4493
  }
4494
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4495
4496
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4497
if (common->invalid_utf && negated)
4498
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4499
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4500
4501
#if PCRE2_CODE_UNIT_WIDTH != 8
4502
/* The ctypes array contains only 256 values. */
4503
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4504
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4505
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4506
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4507
#if PCRE2_CODE_UNIT_WIDTH != 8
4508
JUMPHERE(jump);
4509
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4510
4511
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4512
if (common->utf && negated)
4513
  {
4514
  /* Skip low surrogate if necessary. */
4515
  if (!common->invalid_utf)
4516
    {
4517
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4518
4519
    if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4520
      {
4521
      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4522
      OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4523
      SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4524
      }
4525
    else
4526
      {
4527
      jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4528
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4529
      JUMPHERE(jump);
4530
      }
4531
    return;
4532
    }
4533
4534
  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4535
  jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4536
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4537
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4538
4539
  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4540
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4541
  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4542
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4543
4544
  JUMPHERE(jump);
4545
  return;
4546
  }
4547
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4548
}
4549
4550
static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4551
{
4552
/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4553
TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4554
and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4555
DEFINE_COMPILER;
4556
4557
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4558
struct sljit_jump *jump;
4559
#endif
4560
4561
#ifdef SUPPORT_UNICODE
4562
#if PCRE2_CODE_UNIT_WIDTH == 8
4563
struct sljit_label *label;
4564
4565
if (common->utf)
4566
  {
4567
  if (!must_be_valid && common->invalid_utf)
4568
    {
4569
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4570
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4571
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4572
    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4573
    if (backtracks != NULL)
4574
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4575
    JUMPHERE(jump);
4576
    return;
4577
    }
4578
4579
  label = LABEL();
4580
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4581
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4582
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4583
  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4584
  return;
4585
  }
4586
#elif PCRE2_CODE_UNIT_WIDTH == 16
4587
if (common->utf)
4588
  {
4589
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4590
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4591
4592
  if (!must_be_valid && common->invalid_utf)
4593
    {
4594
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4595
    jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4596
    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4597
    if (backtracks != NULL)
4598
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4599
    JUMPHERE(jump);
4600
    return;
4601
    }
4602
4603
  /* Skip low surrogate if necessary. */
4604
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4605
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4606
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4607
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4608
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4609
  return;
4610
  }
4611
#elif PCRE2_CODE_UNIT_WIDTH == 32
4612
if (common->invalid_utf && !must_be_valid)
4613
  {
4614
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4615
  if (backtracks != NULL)
4616
    {
4617
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4618
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4619
    return;
4620
    }
4621
4622
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4623
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4624
  OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4625
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4626
  return;
4627
  }
4628
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4629
#endif /* SUPPORT_UNICODE */
4630
4631
SLJIT_UNUSED_ARG(backtracks);
4632
SLJIT_UNUSED_ARG(must_be_valid);
4633
4634
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4635
}
4636
4637
static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4638
{
4639
/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4640
DEFINE_COMPILER;
4641
struct sljit_jump *jump;
4642
4643
if (nltype == NLTYPE_ANY)
4644
  {
4645
  add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4646
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
4647
  add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4648
  }
4649
else if (nltype == NLTYPE_ANYCRLF)
4650
  {
4651
  if (jumpifmatch)
4652
    {
4653
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4654
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4655
    }
4656
  else
4657
    {
4658
    jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4659
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4660
    JUMPHERE(jump);
4661
    }
4662
  }
4663
else
4664
  {
4665
  SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4666
  add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4667
  }
4668
}
4669
4670
#ifdef SUPPORT_UNICODE
4671
4672
#if PCRE2_CODE_UNIT_WIDTH == 8
4673
static void do_utfreadchar(compiler_common *common)
4674
{
4675
/* Fast decoding a UTF-8 character. TMP1 contains the first byte
4676
of the character (>= 0xc0). Return char value in TMP1. */
4677
DEFINE_COMPILER;
4678
struct sljit_jump *jump;
4679
4680
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4681
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4682
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4683
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4684
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4685
4686
/* Searching for the first zero. */
4687
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4688
jump = JUMP(SLJIT_NOT_ZERO);
4689
/* Two byte sequence. */
4690
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4691
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4692
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4693
4694
JUMPHERE(jump);
4695
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4696
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4697
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4698
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4699
4700
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4701
jump = JUMP(SLJIT_NOT_ZERO);
4702
/* Three byte sequence. */
4703
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4704
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4705
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4706
4707
/* Four byte sequence. */
4708
JUMPHERE(jump);
4709
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4710
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4711
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4712
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4713
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4714
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4715
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4716
}
4717
4718
static void do_utfreadtype8(compiler_common *common)
4719
{
4720
/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4721
of the character (>= 0xc0). Return value in TMP1. */
4722
DEFINE_COMPILER;
4723
struct sljit_jump *jump;
4724
struct sljit_jump *compare;
4725
4726
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4727
4728
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4729
jump = JUMP(SLJIT_NOT_ZERO);
4730
/* Two byte sequence. */
4731
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4732
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4733
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4734
/* The upper 5 bits are known at this point. */
4735
compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4736
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4737
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4738
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4739
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4740
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4741
4742
JUMPHERE(compare);
4743
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4744
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4745
4746
/* We only have types for characters less than 256. */
4747
JUMPHERE(jump);
4748
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4749
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4750
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4751
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4752
}
4753
4754
static void do_utfreadchar_invalid(compiler_common *common)
4755
{
4756
/* Slow decoding a UTF-8 character. TMP1 contains the first byte
4757
of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4758
undefined for invalid characters. */
4759
DEFINE_COMPILER;
4760
sljit_s32 i;
4761
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4762
struct sljit_jump *jump;
4763
struct sljit_jump *buffer_end_close;
4764
struct sljit_label *three_byte_entry;
4765
struct sljit_label *exit_invalid_label;
4766
struct sljit_jump *exit_invalid[11];
4767
4768
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4769
4770
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4771
4772
/* Usually more than 3 characters remained in the subject buffer. */
4773
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4774
4775
/* Not a valid start of a multi-byte sequence, no more bytes read. */
4776
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4777
4778
buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4779
4780
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4781
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4782
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4783
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4784
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4785
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4786
4787
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4788
jump = JUMP(SLJIT_NOT_ZERO);
4789
4790
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4791
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4792
4793
JUMPHERE(jump);
4794
4795
/* Three-byte sequence. */
4796
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4797
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4798
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4799
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4800
if (has_cmov)
4801
  {
4802
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4803
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);
4804
  exit_invalid[2] = NULL;
4805
  }
4806
else
4807
  exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4808
4809
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4810
jump = JUMP(SLJIT_NOT_ZERO);
4811
4812
three_byte_entry = LABEL();
4813
4814
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4815
if (has_cmov)
4816
  {
4817
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4818
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);
4819
  exit_invalid[3] = NULL;
4820
  }
4821
else
4822
  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4823
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4824
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4825
4826
if (has_cmov)
4827
  {
4828
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4829
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4830
  exit_invalid[4] = NULL;
4831
  }
4832
else
4833
  exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4834
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4835
4836
JUMPHERE(jump);
4837
4838
/* Four-byte sequence. */
4839
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4840
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4841
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4842
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4843
if (has_cmov)
4844
  {
4845
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4846
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);
4847
  exit_invalid[5] = NULL;
4848
  }
4849
else
4850
  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4851
4852
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4853
if (has_cmov)
4854
  {
4855
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4856
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
4857
  exit_invalid[6] = NULL;
4858
  }
4859
else
4860
  exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4861
4862
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4863
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4864
4865
JUMPHERE(buffer_end_close);
4866
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4867
exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4868
4869
/* Two-byte sequence. */
4870
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4871
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4872
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4873
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4874
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4875
exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4876
4877
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4878
jump = JUMP(SLJIT_NOT_ZERO);
4879
4880
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4881
4882
/* Three-byte sequence. */
4883
JUMPHERE(jump);
4884
exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4885
4886
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4887
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4888
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4889
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4890
if (has_cmov)
4891
  {
4892
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4893
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4894
  exit_invalid[10] = NULL;
4895
  }
4896
else
4897
  exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4898
4899
/* One will be substracted from STR_PTR later. */
4900
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4901
4902
/* Four byte sequences are not possible. */
4903
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4904
4905
exit_invalid_label = LABEL();
4906
for (i = 0; i < 11; i++)
4907
  sljit_set_label(exit_invalid[i], exit_invalid_label);
4908
4909
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4910
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4911
}
4912
4913
static void do_utfreadnewline_invalid(compiler_common *common)
4914
{
4915
/* Slow decoding a UTF-8 character, specialized for newlines.
4916
TMP1 contains the first byte of the character (>= 0xc0). Return
4917
char value in TMP1. */
4918
DEFINE_COMPILER;
4919
struct sljit_label *loop;
4920
struct sljit_label *skip_start;
4921
struct sljit_label *three_byte_exit;
4922
struct sljit_jump *jump[5];
4923
4924
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4925
4926
if (common->nltype != NLTYPE_ANY)
4927
  {
4928
  SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4929
4930
  /* All newlines are ascii, just skip intermediate octets. */
4931
  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4932
  loop = LABEL();
4933
  if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4934
    sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4935
  else
4936
    {
4937
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4938
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4939
    }
4940
4941
  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4942
  CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4943
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4944
4945
  JUMPHERE(jump[0]);
4946
4947
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4948
  OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4949
  return;
4950
  }
4951
4952
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4953
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4954
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4955
4956
jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4957
jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4958
4959
skip_start = LABEL();
4960
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4961
jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4962
4963
/* Skip intermediate octets. */
4964
loop = LABEL();
4965
jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4966
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4967
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4968
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4969
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4970
4971
JUMPHERE(jump[3]);
4972
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4973
4974
three_byte_exit = LABEL();
4975
JUMPHERE(jump[0]);
4976
JUMPHERE(jump[4]);
4977
4978
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4979
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4980
4981
/* Two byte long newline: 0x85. */
4982
JUMPHERE(jump[1]);
4983
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4984
4985
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4986
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4987
4988
/* Three byte long newlines: 0x2028 and 0x2029. */
4989
JUMPHERE(jump[2]);
4990
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4991
CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4992
4993
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4994
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4995
4996
OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4997
CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4998
4999
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
5000
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5001
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5002
}
5003
5004
static void do_utfmoveback_invalid(compiler_common *common)
5005
{
5006
/* Goes one character back. */
5007
DEFINE_COMPILER;
5008
sljit_s32 i;
5009
struct sljit_jump *jump;
5010
struct sljit_jump *buffer_start_close;
5011
struct sljit_label *exit_ok_label;
5012
struct sljit_label *exit_invalid_label;
5013
struct sljit_jump *exit_invalid[7];
5014
5015
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5016
5017
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
5018
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5019
5020
/* Two-byte sequence. */
5021
buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5022
5023
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
5024
5025
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
5026
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
5027
5028
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5029
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5030
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5031
5032
/* Three-byte sequence. */
5033
JUMPHERE(jump);
5034
exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
5035
5036
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5037
5038
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
5039
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
5040
5041
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5042
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5043
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5044
5045
/* Four-byte sequence. */
5046
JUMPHERE(jump);
5047
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
5048
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
5049
5050
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5051
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
5052
exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
5053
5054
exit_ok_label = LABEL();
5055
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5056
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5057
5058
/* Two-byte sequence. */
5059
JUMPHERE(buffer_start_close);
5060
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5061
5062
exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5063
5064
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5065
5066
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
5067
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
5068
5069
/* Three-byte sequence. */
5070
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5071
exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
5072
exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
5073
5074
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5075
5076
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
5077
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
5078
5079
/* Four-byte sequences are not possible. */
5080
5081
exit_invalid_label = LABEL();
5082
sljit_set_label(exit_invalid[5], exit_invalid_label);
5083
sljit_set_label(exit_invalid[6], exit_invalid_label);
5084
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5085
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
5086
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5087
5088
JUMPHERE(exit_invalid[4]);
5089
/* -2 + 4 = 2 */
5090
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5091
5092
exit_invalid_label = LABEL();
5093
for (i = 0; i < 4; i++)
5094
  sljit_set_label(exit_invalid[i], exit_invalid_label);
5095
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5096
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
5097
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5098
}
5099
5100
static void do_utfpeakcharback(compiler_common *common)
5101
{
5102
/* Peak a character back. Does not modify STR_PTR. */
5103
DEFINE_COMPILER;
5104
struct sljit_jump *jump[2];
5105
5106
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5107
5108
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5109
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
5110
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
5111
5112
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5113
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
5114
jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
5115
5116
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5117
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
5118
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5119
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5120
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5121
5122
JUMPHERE(jump[1]);
5123
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5124
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5125
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5126
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5127
5128
JUMPHERE(jump[0]);
5129
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5130
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5131
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5132
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5133
5134
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5135
}
5136
5137
static void do_utfpeakcharback_invalid(compiler_common *common)
5138
{
5139
/* Peak a character back. Does not modify STR_PTR. */
5140
DEFINE_COMPILER;
5141
sljit_s32 i;
5142
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
5143
struct sljit_jump *jump[2];
5144
struct sljit_label *two_byte_entry;
5145
struct sljit_label *three_byte_entry;
5146
struct sljit_label *exit_invalid_label;
5147
struct sljit_jump *exit_invalid[8];
5148
5149
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5150
5151
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
5152
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5153
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5154
5155
/* Two-byte sequence. */
5156
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5157
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5158
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
5159
5160
two_byte_entry = LABEL();
5161
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5162
/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
5163
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5164
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5165
5166
JUMPHERE(jump[1]);
5167
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5168
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5169
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5170
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5171
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5172
5173
/* Three-byte sequence. */
5174
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5175
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5176
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
5177
5178
three_byte_entry = LABEL();
5179
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5180
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5181
5182
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5183
if (has_cmov)
5184
  {
5185
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5186
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);
5187
  exit_invalid[2] = NULL;
5188
  }
5189
else
5190
  exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5191
5192
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5193
if (has_cmov)
5194
  {
5195
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5196
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
5197
  exit_invalid[3] = NULL;
5198
  }
5199
else
5200
  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5201
5202
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5203
5204
JUMPHERE(jump[1]);
5205
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
5206
exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5207
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5208
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5209
5210
/* Four-byte sequence. */
5211
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5212
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5213
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5214
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
5215
/* ADD is used instead of OR because of the SUB 0x10000 above. */
5216
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5217
5218
if (has_cmov)
5219
  {
5220
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5221
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
5222
  exit_invalid[5] = NULL;
5223
  }
5224
else
5225
  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5226
5227
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5228
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5229
5230
JUMPHERE(jump[0]);
5231
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5232
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5233
5234
/* Two-byte sequence. */
5235
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5236
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5237
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5238
5239
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5240
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5241
exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5242
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5243
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5244
5245
/* Three-byte sequence. */
5246
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5247
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5248
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
5249
5250
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5251
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5252
5253
JUMPHERE(jump[0]);
5254
exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
5255
5256
/* Two-byte sequence. */
5257
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5258
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5259
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5260
5261
exit_invalid_label = LABEL();
5262
for (i = 0; i < 8; i++)
5263
  sljit_set_label(exit_invalid[i], exit_invalid_label);
5264
5265
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5266
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5267
}
5268
5269
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5270
5271
#if PCRE2_CODE_UNIT_WIDTH == 16
5272
5273
static void do_utfreadchar_invalid(compiler_common *common)
5274
{
5275
/* Slow decoding a UTF-16 character. TMP1 contains the first half
5276
of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
5277
undefined for invalid characters. */
5278
DEFINE_COMPILER;
5279
struct sljit_jump *exit_invalid[3];
5280
5281
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5282
5283
/* TMP2 contains the high surrogate. */
5284
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5285
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5286
5287
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5288
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5289
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5290
5291
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5292
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5293
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5294
5295
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5296
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5297
5298
JUMPHERE(exit_invalid[0]);
5299
JUMPHERE(exit_invalid[1]);
5300
JUMPHERE(exit_invalid[2]);
5301
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5302
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5303
}
5304
5305
static void do_utfreadnewline_invalid(compiler_common *common)
5306
{
5307
/* Slow decoding a UTF-16 character, specialized for newlines.
5308
TMP1 contains the first half of the character (>= 0xd800). Return
5309
char value in TMP1. */
5310
5311
DEFINE_COMPILER;
5312
struct sljit_jump *exit_invalid[2];
5313
5314
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5315
5316
/* TMP2 contains the high surrogate. */
5317
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5318
5319
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5320
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5321
5322
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5323
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5324
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5325
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5326
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5327
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5328
5329
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5330
5331
JUMPHERE(exit_invalid[0]);
5332
JUMPHERE(exit_invalid[1]);
5333
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5334
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5335
}
5336
5337
static void do_utfmoveback_invalid(compiler_common *common)
5338
{
5339
/* Goes one character back. */
5340
DEFINE_COMPILER;
5341
struct sljit_jump *exit_invalid[3];
5342
5343
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5344
5345
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5346
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5347
5348
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5349
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5350
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5351
5352
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5353
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5354
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5355
5356
JUMPHERE(exit_invalid[0]);
5357
JUMPHERE(exit_invalid[1]);
5358
JUMPHERE(exit_invalid[2]);
5359
5360
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5361
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5362
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5363
}
5364
5365
static void do_utfpeakcharback_invalid(compiler_common *common)
5366
{
5367
/* Peak a character back. Does not modify STR_PTR. */
5368
DEFINE_COMPILER;
5369
struct sljit_jump *jump;
5370
struct sljit_jump *exit_invalid[3];
5371
5372
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5373
5374
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5375
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5376
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5377
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5378
5379
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5380
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5381
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5382
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5383
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5384
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5385
5386
JUMPHERE(jump);
5387
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5388
5389
JUMPHERE(exit_invalid[0]);
5390
JUMPHERE(exit_invalid[1]);
5391
JUMPHERE(exit_invalid[2]);
5392
5393
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5394
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5395
}
5396
5397
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5398
5399
/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5400
#define UCD_BLOCK_MASK 127
5401
#define UCD_BLOCK_SHIFT 7
5402
5403
static void do_getucd(compiler_common *common)
5404
{
5405
/* Search the UCD record for the character comes in TMP1.
5406
Returns chartype in TMP1 and UCD offset in TMP2. */
5407
DEFINE_COMPILER;
5408
#if PCRE2_CODE_UNIT_WIDTH == 32
5409
struct sljit_jump *jump;
5410
#endif
5411
5412
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5413
/* dummy_ucd_record */
5414
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5415
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5416
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5417
#endif
5418
5419
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5420
5421
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5422
5423
#if PCRE2_CODE_UNIT_WIDTH == 32
5424
if (!common->utf)
5425
  {
5426
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5427
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5428
  JUMPHERE(jump);
5429
  }
5430
#endif
5431
5432
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5433
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5434
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5435
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5436
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5437
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5438
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5439
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5440
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5441
}
5442
5443
static void do_getucdtype(compiler_common *common)
5444
{
5445
/* Search the UCD record for the character comes in TMP1.
5446
Returns chartype in TMP1 and UCD offset in TMP2. */
5447
DEFINE_COMPILER;
5448
#if PCRE2_CODE_UNIT_WIDTH == 32
5449
struct sljit_jump *jump;
5450
#endif
5451
5452
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5453
/* dummy_ucd_record */
5454
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5455
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5456
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5457
#endif
5458
5459
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5460
5461
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5462
5463
#if PCRE2_CODE_UNIT_WIDTH == 32
5464
if (!common->utf)
5465
  {
5466
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5467
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5468
  JUMPHERE(jump);
5469
  }
5470
#endif
5471
5472
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5473
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5474
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5475
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5476
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5477
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5478
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5479
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5480
5481
/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5482
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5483
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5484
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5485
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5486
5487
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5488
}
5489
5490
#endif /* SUPPORT_UNICODE */
5491
5492
static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5493
{
5494
DEFINE_COMPILER;
5495
struct sljit_label *mainloop;
5496
struct sljit_label *newlinelabel = NULL;
5497
struct sljit_jump *start;
5498
struct sljit_jump *end = NULL;
5499
struct sljit_jump *end2 = NULL;
5500
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5501
struct sljit_label *loop;
5502
struct sljit_jump *jump;
5503
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5504
jump_list *newline = NULL;
5505
sljit_u32 overall_options = common->re->overall_options;
5506
BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5507
BOOL newlinecheck = FALSE;
5508
BOOL readuchar = FALSE;
5509
5510
if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5511
    && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5512
  newlinecheck = TRUE;
5513
5514
SLJIT_ASSERT(common->abort_label == NULL);
5515
5516
if ((overall_options & PCRE2_FIRSTLINE) != 0)
5517
  {
5518
  /* Search for the end of the first line. */
5519
  SLJIT_ASSERT(common->match_end_ptr != 0);
5520
  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5521
5522
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5523
    {
5524
    mainloop = LABEL();
5525
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5526
    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5527
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5528
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5529
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5530
    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5531
    JUMPHERE(end);
5532
    OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5533
    }
5534
  else
5535
    {
5536
    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5537
    mainloop = LABEL();
5538
    /* Continual stores does not cause data dependency. */
5539
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5540
    read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5541
    check_newlinechar(common, common->nltype, &newline, TRUE);
5542
    CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5543
    JUMPHERE(end);
5544
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5545
    set_jumps(newline, LABEL());
5546
    }
5547
5548
  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5549
  }
5550
else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5551
  {
5552
  /* Check whether offset limit is set and valid. */
5553
  SLJIT_ASSERT(common->match_end_ptr != 0);
5554
5555
  if (HAS_VIRTUAL_REGISTERS)
5556
    {
5557
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5558
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5559
    }
5560
  else
5561
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5562
5563
  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5564
  end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5565
  if (HAS_VIRTUAL_REGISTERS)
5566
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5567
  else
5568
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5569
5570
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5571
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5572
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5573
  if (HAS_VIRTUAL_REGISTERS)
5574
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5575
5576
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5577
  end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5578
  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5579
  JUMPHERE(end2);
5580
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5581
  add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5582
  JUMPHERE(end);
5583
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5584
  }
5585
5586
start = JUMP(SLJIT_JUMP);
5587
5588
if (newlinecheck)
5589
  {
5590
  newlinelabel = LABEL();
5591
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5592
  end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5593
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5594
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5595
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5596
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5597
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5598
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5599
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5600
  end2 = JUMP(SLJIT_JUMP);
5601
  }
5602
5603
mainloop = LABEL();
5604
5605
/* Increasing the STR_PTR here requires one less jump in the most common case. */
5606
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5607
if (common->utf && !common->invalid_utf) readuchar = TRUE;
5608
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5609
if (newlinecheck) readuchar = TRUE;
5610
5611
if (readuchar)
5612
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5613
5614
if (newlinecheck)
5615
  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5616
5617
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5618
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5619
#if PCRE2_CODE_UNIT_WIDTH == 8
5620
if (common->invalid_utf)
5621
  {
5622
  /* Skip continuation code units. */
5623
  loop = LABEL();
5624
  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5625
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5626
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5627
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5628
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5629
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5630
  JUMPHERE(jump);
5631
  }
5632
else if (common->utf)
5633
  {
5634
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5635
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5636
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5637
  JUMPHERE(jump);
5638
  }
5639
#elif PCRE2_CODE_UNIT_WIDTH == 16
5640
if (common->invalid_utf)
5641
  {
5642
  /* Skip continuation code units. */
5643
  loop = LABEL();
5644
  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5645
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5646
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5647
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5648
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5649
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5650
  JUMPHERE(jump);
5651
  }
5652
else if (common->utf)
5653
  {
5654
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5655
5656
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5657
    {
5658
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5659
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5660
    SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
5661
    }
5662
  else
5663
    {
5664
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5665
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5666
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5667
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5668
    }
5669
  }
5670
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5671
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5672
JUMPHERE(start);
5673
5674
if (newlinecheck)
5675
  {
5676
  JUMPHERE(end);
5677
  JUMPHERE(end2);
5678
  }
5679
5680
return mainloop;
5681
}
5682
5683
5684
static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5685
{
5686
sljit_u32 i, count = chars->count;
5687
5688
if (count == 255)
5689
  return;
5690
5691
if (count == 0)
5692
  {
5693
  chars->count = 1;
5694
  chars->chars[0] = chr;
5695
5696
  if (last)
5697
    chars->last_count = 1;
5698
  return;
5699
  }
5700
5701
for (i = 0; i < count; i++)
5702
  if (chars->chars[i] == chr)
5703
    return;
5704
5705
if (count >= MAX_DIFF_CHARS)
5706
  {
5707
  chars->count = 255;
5708
  return;
5709
  }
5710
5711
chars->chars[count] = chr;
5712
chars->count = count + 1;
5713
5714
if (last)
5715
  chars->last_count++;
5716
}
5717
5718
/* Value can be increased if needed. Patterns
5719
such as /(a|){33}b/ can exhaust the stack.
5720
5721
Note: /(a|){29}b/ already stops scan_prefix()
5722
because it reaches the maximum step_count. */
5723
#define SCAN_PREFIX_STACK_END 32
5724
5725
/*
5726
Scan prefix stores the prefix string in the chars array.
5727
The elements of the chars array is either small character
5728
sets or "any" (count is set to 255).
5729
5730
Examples (the chars array is represented by a simple regex):
5731
5732
/(abc|xbyd)/ prefix: /[ax]b[cy]/ (length: 3)
5733
/a[a-z]b+c/ prefix: a.b (length: 3)
5734
/ab?cd/ prefix: a[bc][cd] (length: 3)
5735
/(ab|cd)|(ef|gh)/ prefix: [aceg][bdfh] (length: 2)
5736
5737
The length is returned by scan_prefix(). The length is
5738
less than or equal than the minimum length of the pattern.
5739
*/
5740
5741
static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars)
5742
{
5743
fast_forward_char_data *chars_start = chars;
5744
fast_forward_char_data *chars_end = chars + MAX_N_CHARS;
5745
PCRE2_SPTR cc_stack[SCAN_PREFIX_STACK_END];
5746
fast_forward_char_data *chars_stack[SCAN_PREFIX_STACK_END];
5747
sljit_u8 next_alternative_stack[SCAN_PREFIX_STACK_END];
5748
BOOL last, any, class, caseless;
5749
int stack_ptr, step_count, repeat, len, len_save;
5750
sljit_u32 chr; /* Any unicode character. */
5751
sljit_u8 *bytes, *bytes_end, byte;
5752
PCRE2_SPTR alternative, cc_save, oc;
5753
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5754
PCRE2_UCHAR othercase[4];
5755
#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5756
PCRE2_UCHAR othercase[2];
5757
#else
5758
PCRE2_UCHAR othercase[1];
5759
#endif
5760
5761
repeat = 1;
5762
stack_ptr = 0;
5763
step_count = 10000;
5764
while (TRUE)
5765
  {
5766
  if (--step_count == 0)
5767
    return 0;
5768
5769
  SLJIT_ASSERT(chars <= chars_start + MAX_N_CHARS);
5770
5771
  if (chars >= chars_end)
5772
    {
5773
    if (stack_ptr == 0)
5774
      return (int)(chars_end - chars_start);
5775
5776
    --stack_ptr;
5777
    cc = cc_stack[stack_ptr];
5778
    chars = chars_stack[stack_ptr];
5779
5780
    if (chars >= chars_end)
5781
      continue;
5782
5783
    if (next_alternative_stack[stack_ptr] != 0)
5784
      {
5785
      /* When an alternative is processed, the
5786
      next alternative is pushed onto the stack. */
5787
      SLJIT_ASSERT(*cc == OP_ALT);
5788
      alternative = cc + GET(cc, 1);
5789
      if (*alternative == OP_ALT)
5790
        {
5791
        SLJIT_ASSERT(stack_ptr < SCAN_PREFIX_STACK_END);
5792
        SLJIT_ASSERT(chars_stack[stack_ptr] == chars);
5793
        SLJIT_ASSERT(next_alternative_stack[stack_ptr] == 1);
5794
        cc_stack[stack_ptr] = alternative;
5795
        stack_ptr++;
5796
        }
5797
      cc += 1 + LINK_SIZE;
5798
      }
5799
    }
5800
5801
  last = TRUE;
5802
  any = FALSE;
5803
  class = FALSE;
5804
  caseless = FALSE;
5805
5806
  switch (*cc)
5807
    {
5808
    case OP_CHARI:
5809
    caseless = TRUE;
5810
    /* Fall through */
5811
    case OP_CHAR:
5812
    last = FALSE;
5813
    cc++;
5814
    break;
5815
5816
    case OP_SOD:
5817
    case OP_SOM:
5818
    case OP_SET_SOM:
5819
    case OP_NOT_WORD_BOUNDARY:
5820
    case OP_WORD_BOUNDARY:
5821
    case OP_EODN:
5822
    case OP_EOD:
5823
    case OP_CIRC:
5824
    case OP_CIRCM:
5825
    case OP_DOLL:
5826
    case OP_DOLLM:
5827
    case OP_NOT_UCP_WORD_BOUNDARY:
5828
    case OP_UCP_WORD_BOUNDARY:
5829
    /* Zero width assertions. */
5830
    cc++;
5831
    continue;
5832
5833
    case OP_ASSERT:
5834
    case OP_ASSERT_NOT:
5835
    case OP_ASSERTBACK:
5836
    case OP_ASSERTBACK_NOT:
5837
    case OP_ASSERT_NA:
5838
    case OP_ASSERTBACK_NA:
5839
    case OP_ASSERT_SCS:
5840
    cc = bracketend(cc);
5841
    continue;
5842
5843
    case OP_PLUSI:
5844
    case OP_MINPLUSI:
5845
    case OP_POSPLUSI:
5846
    caseless = TRUE;
5847
    /* Fall through */
5848
    case OP_PLUS:
5849
    case OP_MINPLUS:
5850
    case OP_POSPLUS:
5851
    cc++;
5852
    break;
5853
5854
    case OP_EXACTI:
5855
    caseless = TRUE;
5856
    /* Fall through */
5857
    case OP_EXACT:
5858
    repeat = GET2(cc, 1);
5859
    last = FALSE;
5860
    cc += 1 + IMM2_SIZE;
5861
    break;
5862
5863
    case OP_QUERYI:
5864
    case OP_MINQUERYI:
5865
    case OP_POSQUERYI:
5866
    caseless = TRUE;
5867
    /* Fall through */
5868
    case OP_QUERY:
5869
    case OP_MINQUERY:
5870
    case OP_POSQUERY:
5871
    len = 1;
5872
    cc++;
5873
#ifdef SUPPORT_UNICODE
5874
    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5875
#endif
5876
    if (stack_ptr >= SCAN_PREFIX_STACK_END)
5877
      {
5878
      chars_end = chars;
5879
      continue;
5880
      }
5881
5882
    cc_stack[stack_ptr] = cc + len;
5883
    chars_stack[stack_ptr] = chars;
5884
    next_alternative_stack[stack_ptr] = 0;
5885
    stack_ptr++;
5886
5887
    last = FALSE;
5888
    break;
5889
5890
    case OP_KET:
5891
    cc += 1 + LINK_SIZE;
5892
    continue;
5893
5894
    case OP_ALT:
5895
    cc += GET(cc, 1);
5896
    continue;
5897
5898
    case OP_ONCE:
5899
    case OP_BRA:
5900
    case OP_BRAPOS:
5901
    case OP_CBRA:
5902
    case OP_CBRAPOS:
5903
    alternative = cc + GET(cc, 1);
5904
    if (*alternative == OP_ALT)
5905
      {
5906
      if (stack_ptr >= SCAN_PREFIX_STACK_END)
5907
        {
5908
        chars_end = chars;
5909
        continue;
5910
        }
5911
5912
      cc_stack[stack_ptr] = alternative;
5913
      chars_stack[stack_ptr] = chars;
5914
      next_alternative_stack[stack_ptr] = 1;
5915
      stack_ptr++;
5916
      }
5917
5918
    if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5919
      cc += IMM2_SIZE;
5920
    cc += 1 + LINK_SIZE;
5921
    continue;
5922
5923
    case OP_CLASS:
5924
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5925
    if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5926
      {
5927
      chars_end = chars;
5928
      continue;
5929
      }
5930
#endif
5931
    class = TRUE;
5932
    break;
5933
5934
    case OP_NCLASS:
5935
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5936
    if (common->utf)
5937
      {
5938
      chars_end = chars;
5939
      continue;
5940
      }
5941
#endif
5942
    class = TRUE;
5943
    break;
5944
5945
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5946
    case OP_XCLASS:
5947
    case OP_ECLASS:
5948
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5949
    if (common->utf)
5950
      {
5951
      chars_end = chars;
5952
      continue;
5953
      }
5954
#endif
5955
    any = TRUE;
5956
    cc += GET(cc, 1);
5957
    break;
5958
#endif
5959
5960
    case OP_DIGIT:
5961
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5962
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5963
      {
5964
      chars_end = chars;
5965
      continue;
5966
      }
5967
#endif
5968
    any = TRUE;
5969
    cc++;
5970
    break;
5971
5972
    case OP_WHITESPACE:
5973
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5974
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5975
      {
5976
      chars_end = chars;
5977
      continue;
5978
      }
5979
#endif
5980
    any = TRUE;
5981
    cc++;
5982
    break;
5983
5984
    case OP_WORDCHAR:
5985
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5986
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5987
      {
5988
      chars_end = chars;
5989
      continue;
5990
      }
5991
#endif
5992
    any = TRUE;
5993
    cc++;
5994
    break;
5995
5996
    case OP_NOT:
5997
    case OP_NOTI:
5998
    cc++;
5999
    /* Fall through. */
6000
    case OP_NOT_DIGIT:
6001
    case OP_NOT_WHITESPACE:
6002
    case OP_NOT_WORDCHAR:
6003
    case OP_ANY:
6004
    case OP_ALLANY:
6005
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6006
    if (common->utf)
6007
      {
6008
      chars_end = chars;
6009
      continue;
6010
      }
6011
#endif
6012
    any = TRUE;
6013
    cc++;
6014
    break;
6015
6016
#ifdef SUPPORT_UNICODE
6017
    case OP_NOTPROP:
6018
    case OP_PROP:
6019
#if PCRE2_CODE_UNIT_WIDTH != 32
6020
    if (common->utf)
6021
      {
6022
      chars_end = chars;
6023
      continue;
6024
      }
6025
#endif
6026
    any = TRUE;
6027
    cc += 1 + 2;
6028
    break;
6029
#endif
6030
6031
    case OP_TYPEEXACT:
6032
    repeat = GET2(cc, 1);
6033
    cc += 1 + IMM2_SIZE;
6034
    continue;
6035
6036
    case OP_NOTEXACT:
6037
    case OP_NOTEXACTI:
6038
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6039
    if (common->utf)
6040
      {
6041
      chars_end = chars;
6042
      continue;
6043
      }
6044
#endif
6045
    any = TRUE;
6046
    repeat = GET2(cc, 1);
6047
    cc += 1 + IMM2_SIZE + 1;
6048
    break;
6049
6050
    default:
6051
    chars_end = chars;
6052
    continue;
6053
    }
6054
6055
  SLJIT_ASSERT(chars < chars_end);
6056
6057
  if (any)
6058
    {
6059
    do
6060
      {
6061
      chars->count = 255;
6062
      chars++;
6063
      }
6064
    while (--repeat > 0 && chars < chars_end);
6065
6066
    repeat = 1;
6067
    continue;
6068
    }
6069
6070
  if (class)
6071
    {
6072
    bytes = (sljit_u8*) (cc + 1);
6073
    cc += 1 + 32 / sizeof(PCRE2_UCHAR);
6074
6075
    SLJIT_ASSERT(last == TRUE && repeat == 1);
6076
    switch (*cc)
6077
      {
6078
      case OP_CRQUERY:
6079
      case OP_CRMINQUERY:
6080
      case OP_CRPOSQUERY:
6081
      last = FALSE;
6082
      /* Fall through */
6083
      case OP_CRSTAR:
6084
      case OP_CRMINSTAR:
6085
      case OP_CRPOSSTAR:
6086
      if (stack_ptr >= SCAN_PREFIX_STACK_END)
6087
        {
6088
        chars_end = chars;
6089
        continue;
6090
        }
6091
6092
      cc_stack[stack_ptr] = ++cc;
6093
      chars_stack[stack_ptr] = chars;
6094
      next_alternative_stack[stack_ptr] = 0;
6095
      stack_ptr++;
6096
      break;
6097
6098
      default:
6099
      case OP_CRPLUS:
6100
      case OP_CRMINPLUS:
6101
      case OP_CRPOSPLUS:
6102
      break;
6103
6104
      case OP_CRRANGE:
6105
      case OP_CRMINRANGE:
6106
      case OP_CRPOSRANGE:
6107
      repeat = GET2(cc, 1);
6108
      if (repeat <= 0)
6109
        {
6110
        chars_end = chars;
6111
        continue;
6112
        }
6113
6114
      last = (repeat != (int)GET2(cc, 1 + IMM2_SIZE));
6115
      cc += 1 + 2 * IMM2_SIZE;
6116
      break;
6117
      }
6118
6119
    do
6120
      {
6121
      if (bytes[31] & 0x80)
6122
        chars->count = 255;
6123
      else if (chars->count != 255)
6124
        {
6125
        bytes_end = bytes + 32;
6126
        chr = 0;
6127
        do
6128
          {
6129
          byte = *bytes++;
6130
          SLJIT_ASSERT((chr & 0x7) == 0);
6131
          if (byte == 0)
6132
            chr += 8;
6133
          else
6134
            {
6135
            do
6136
              {
6137
              if ((byte & 0x1) != 0)
6138
                add_prefix_char(chr, chars, TRUE);
6139
              byte >>= 1;
6140
              chr++;
6141
              }
6142
            while (byte != 0);
6143
            chr = (chr + 7) & (sljit_u32)(~7);
6144
            }
6145
          }
6146
        while (chars->count != 255 && bytes < bytes_end);
6147
        bytes = bytes_end - 32;
6148
        }
6149
6150
      chars++;
6151
      }
6152
    while (--repeat > 0 && chars < chars_end);
6153
6154
    repeat = 1;
6155
    if (last)
6156
      chars_end = chars;
6157
    continue;
6158
    }
6159
6160
  len = 1;
6161
#ifdef SUPPORT_UNICODE
6162
  if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
6163
#endif
6164
6165
  if (caseless && char_has_othercase(common, cc))
6166
    {
6167
#ifdef SUPPORT_UNICODE
6168
    if (common->utf)
6169
      {
6170
      GETCHAR(chr, cc);
6171
      if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
6172
        {
6173
        chars_end = chars;
6174
        continue;
6175
        }
6176
      }
6177
    else
6178
#endif
6179
      {
6180
      chr = *cc;
6181
#ifdef SUPPORT_UNICODE
6182
      if (common->ucp && chr > 127)
6183
        {
6184
        chr = UCD_OTHERCASE(chr);
6185
        othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc;
6186
        }
6187
      else
6188
#endif
6189
        othercase[0] = TABLE_GET(chr, common->fcc, chr);
6190
      }
6191
    }
6192
  else
6193
    {
6194
    caseless = FALSE;
6195
    othercase[0] = 0; /* Stops compiler warning - PH */
6196
    }
6197
6198
  len_save = len;
6199
  cc_save = cc;
6200
  while (TRUE)
6201
    {
6202
    oc = othercase;
6203
    do
6204
      {
6205
      len--;
6206
6207
      chr = *cc;
6208
      add_prefix_char(*cc, chars, len == 0);
6209
6210
      if (caseless)
6211
        add_prefix_char(*oc, chars, len == 0);
6212
6213
      chars++;
6214
      cc++;
6215
      oc++;
6216
      }
6217
    while (len > 0 && chars < chars_end);
6218
6219
    if (--repeat == 0 || chars >= chars_end)
6220
      break;
6221
6222
    len = len_save;
6223
    cc = cc_save;
6224
    }
6225
6226
  repeat = 1;
6227
  if (last)
6228
    chars_end = chars;
6229
  }
6230
}
6231
6232
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6233
static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
6234
{
6235
#if PCRE2_CODE_UNIT_WIDTH == 8
6236
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
6237
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
6238
#elif PCRE2_CODE_UNIT_WIDTH == 16
6239
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
6240
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
6241
#else
6242
#error "Unknown code width"
6243
#endif
6244
}
6245
#endif
6246
6247
#include "pcre2_jit_simd_inc.h"
6248
6249
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6250
6251
static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
6252
{
6253
  sljit_s32 i, j, max_i = 0, max_j = 0;
6254
  sljit_u32 max_pri = 0;
6255
  sljit_s32 max_offset = max_fast_forward_char_pair_offset();
6256
  PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
6257
6258
  for (i = max - 1; i >= 1; i--)
6259
    {
6260
    if (chars[i].last_count > 2)
6261
      {
6262
      a1 = chars[i].chars[0];
6263
      a2 = chars[i].chars[1];
6264
      a_pri = chars[i].last_count;
6265
6266
      j = i - max_offset;
6267
      if (j < 0)
6268
        j = 0;
6269
6270
      while (j < i)
6271
        {
6272
        b_pri = chars[j].last_count;
6273
        if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
6274
          {
6275
          b1 = chars[j].chars[0];
6276
          b2 = chars[j].chars[1];
6277
6278
          if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
6279
            {
6280
            max_pri = a_pri + b_pri;
6281
            max_i = i;
6282
            max_j = j;
6283
            }
6284
          }
6285
        j++;
6286
        }
6287
      }
6288
    }
6289
6290
if (max_pri == 0)
6291
  return FALSE;
6292
6293
fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
6294
return TRUE;
6295
}
6296
6297
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6298
6299
static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
6300
{
6301
DEFINE_COMPILER;
6302
struct sljit_label *start;
6303
struct sljit_jump *match;
6304
struct sljit_jump *partial_quit;
6305
PCRE2_UCHAR mask;
6306
BOOL has_match_end = (common->match_end_ptr != 0);
6307
6308
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
6309
6310
if (has_match_end)
6311
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6312
6313
if (offset > 0)
6314
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6315
6316
if (has_match_end)
6317
  {
6318
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6319
6320
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6321
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6322
  SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6323
  }
6324
6325
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6326
6327
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
6328
  {
6329
  fast_forward_char_simd(common, char1, char2, offset);
6330
6331
  if (offset > 0)
6332
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6333
6334
  if (has_match_end)
6335
    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6336
  return;
6337
  }
6338
6339
#endif
6340
6341
start = LABEL();
6342
6343
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6344
if (common->mode == PCRE2_JIT_COMPLETE)
6345
  add_jump(compiler, &common->failed_match, partial_quit);
6346
6347
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6348
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6349
6350
if (char1 == char2)
6351
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
6352
else
6353
  {
6354
  mask = char1 ^ char2;
6355
  if (is_powerof2(mask))
6356
    {
6357
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6358
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
6359
    }
6360
  else
6361
    {
6362
    match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
6363
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
6364
    JUMPHERE(match);
6365
    }
6366
  }
6367
6368
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6369
if (common->utf && offset > 0)
6370
  {
6371
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
6372
  jumpto_if_not_utf_char_start(compiler, TMP1, start);
6373
  }
6374
#endif
6375
6376
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6377
6378
if (common->mode != PCRE2_JIT_COMPLETE)
6379
  JUMPHERE(partial_quit);
6380
6381
if (has_match_end)
6382
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6383
}
6384
6385
static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6386
{
6387
DEFINE_COMPILER;
6388
struct sljit_label *start;
6389
struct sljit_jump *match;
6390
fast_forward_char_data chars[MAX_N_CHARS];
6391
sljit_s32 offset;
6392
PCRE2_UCHAR mask;
6393
PCRE2_UCHAR *char_set, *char_set_end;
6394
int i, max, from;
6395
int range_right = -1, range_len;
6396
sljit_u8 *update_table = NULL;
6397
BOOL in_range;
6398
6399
for (i = 0; i < MAX_N_CHARS; i++)
6400
  {
6401
  chars[i].count = 0;
6402
  chars[i].last_count = 0;
6403
  }
6404
6405
max = scan_prefix(common, common->start, chars);
6406
6407
if (max < 1)
6408
  return FALSE;
6409
6410
/* Convert last_count to priority. */
6411
for (i = 0; i < max; i++)
6412
  {
6413
  SLJIT_ASSERT(chars[i].last_count <= chars[i].count);
6414
6415
  switch (chars[i].count)
6416
    {
6417
    case 0:
6418
    chars[i].count = 255;
6419
    chars[i].last_count = 0;
6420
    break;
6421
6422
    case 1:
6423
    chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6424
    /* Simplifies algorithms later. */
6425
    chars[i].chars[1] = chars[i].chars[0];
6426
    break;
6427
6428
    case 2:
6429
    SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6430
6431
    if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6432
      chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6433
    else
6434
      chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6435
    break;
6436
6437
    default:
6438
    chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6439
    break;
6440
    }
6441
  }
6442
6443
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6444
if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6445
  return TRUE;
6446
#endif
6447
6448
in_range = FALSE;
6449
/* Prevent compiler "uninitialized" warning */
6450
from = 0;
6451
range_len = 4 /* minimum length */ - 1;
6452
for (i = 0; i <= max; i++)
6453
  {
6454
  if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6455
    {
6456
    range_len = i - from;
6457
    range_right = i - 1;
6458
    }
6459
6460
  if (i < max && chars[i].count < 255)
6461
    {
6462
    SLJIT_ASSERT(chars[i].count > 0);
6463
    if (!in_range)
6464
      {
6465
      in_range = TRUE;
6466
      from = i;
6467
      }
6468
    }
6469
  else
6470
    in_range = FALSE;
6471
  }
6472
6473
if (range_right >= 0)
6474
  {
6475
  update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6476
  if (update_table == NULL)
6477
    return TRUE;
6478
  memset(update_table, IN_UCHARS(range_len), 256);
6479
6480
  for (i = 0; i < range_len; i++)
6481
    {
6482
    SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6483
6484
    char_set = chars[range_right - i].chars;
6485
    char_set_end = char_set + chars[range_right - i].count;
6486
    do
6487
      {
6488
      if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6489
        update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6490
      char_set++;
6491
      }
6492
    while (char_set < char_set_end);
6493
    }
6494
  }
6495
6496
offset = -1;
6497
/* Scan forward. */
6498
for (i = 0; i < max; i++)
6499
  {
6500
  if (range_right == i)
6501
    continue;
6502
6503
  if (offset == -1)
6504
    {
6505
    if (chars[i].last_count >= 2)
6506
      offset = i;
6507
    }
6508
  else if (chars[offset].last_count < chars[i].last_count)
6509
    offset = i;
6510
  }
6511
6512
SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6513
6514
if (range_right < 0)
6515
  {
6516
  if (offset < 0)
6517
    return FALSE;
6518
  /* Works regardless the value is 1 or 2. */
6519
  fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6520
  return TRUE;
6521
  }
6522
6523
SLJIT_ASSERT(range_right != offset);
6524
6525
if (common->match_end_ptr != 0)
6526
  {
6527
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6528
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6529
  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6530
  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6531
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6532
  SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6533
  }
6534
else
6535
  {
6536
  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6537
  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6538
  }
6539
6540
SLJIT_ASSERT(range_right >= 0);
6541
6542
if (!HAS_VIRTUAL_REGISTERS)
6543
  OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6544
6545
start = LABEL();
6546
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6547
6548
#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6549
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6550
#else
6551
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6552
#endif
6553
6554
if (!HAS_VIRTUAL_REGISTERS)
6555
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6556
else
6557
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6558
6559
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6560
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6561
6562
if (offset >= 0)
6563
  {
6564
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6565
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6566
6567
  if (chars[offset].count == 1)
6568
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6569
  else
6570
    {
6571
    mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6572
    if (is_powerof2(mask))
6573
      {
6574
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6575
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6576
      }
6577
    else
6578
      {
6579
      match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6580
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6581
      JUMPHERE(match);
6582
      }
6583
    }
6584
  }
6585
6586
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6587
if (common->utf && offset != 0)
6588
  {
6589
  if (offset < 0)
6590
    {
6591
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6592
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6593
    }
6594
  else
6595
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6596
6597
  jumpto_if_not_utf_char_start(compiler, TMP1, start);
6598
6599
  if (offset < 0)
6600
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6601
  }
6602
#endif
6603
6604
if (offset >= 0)
6605
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6606
6607
if (common->match_end_ptr != 0)
6608
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6609
else
6610
  OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6611
return TRUE;
6612
}
6613
6614
static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6615
{
6616
PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6617
PCRE2_UCHAR oc;
6618
6619
oc = first_char;
6620
if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6621
  {
6622
  oc = TABLE_GET(first_char, common->fcc, first_char);
6623
#if defined SUPPORT_UNICODE
6624
  if (first_char > 127 && (common->utf || common->ucp))
6625
    oc = UCD_OTHERCASE(first_char);
6626
#endif
6627
  }
6628
6629
fast_forward_first_char2(common, first_char, oc, 0);
6630
}
6631
6632
static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6633
{
6634
DEFINE_COMPILER;
6635
struct sljit_label *loop;
6636
struct sljit_jump *lastchar = NULL;
6637
struct sljit_jump *firstchar;
6638
struct sljit_jump *quit = NULL;
6639
struct sljit_jump *foundcr = NULL;
6640
struct sljit_jump *notfoundnl;
6641
jump_list *newline = NULL;
6642
6643
if (common->match_end_ptr != 0)
6644
  {
6645
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6646
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6647
  }
6648
6649
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6650
  {
6651
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6652
  if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6653
    {
6654
    if (HAS_VIRTUAL_REGISTERS)
6655
      {
6656
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6657
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6658
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6659
      }
6660
    else
6661
      {
6662
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6663
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6664
      }
6665
    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6666
6667
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6668
    OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6669
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6670
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6671
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6672
#endif
6673
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6674
6675
    fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6676
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6677
    }
6678
  else
6679
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6680
    {
6681
    lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6682
    if (HAS_VIRTUAL_REGISTERS)
6683
      {
6684
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6685
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6686
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6687
      }
6688
    else
6689
      {
6690
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6691
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6692
      }
6693
    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6694
6695
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6696
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6697
    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6698
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6699
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6700
#endif
6701
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6702
6703
    loop = LABEL();
6704
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6705
    quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6706
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6707
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6708
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6709
    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6710
6711
    JUMPHERE(quit);
6712
    JUMPHERE(lastchar);
6713
    }
6714
6715
  JUMPHERE(firstchar);
6716
6717
  if (common->match_end_ptr != 0)
6718
    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6719
  return;
6720
  }
6721
6722
if (HAS_VIRTUAL_REGISTERS)
6723
  {
6724
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6725
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6726
  }
6727
else
6728
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6729
6730
/* Example: match /^/ to \r\n from offset 1. */
6731
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6732
6733
if (common->nltype == NLTYPE_ANY)
6734
  move_back(common, NULL, FALSE);
6735
else
6736
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6737
6738
loop = LABEL();
6739
common->ff_newline_shortcut = loop;
6740
6741
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6742
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6743
  {
6744
  if (common->nltype == NLTYPE_ANYCRLF)
6745
    {
6746
    fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6747
    if (common->mode != PCRE2_JIT_COMPLETE)
6748
      lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6749
6750
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6751
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6752
    quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6753
    }
6754
   else
6755
    {
6756
    fast_forward_char_simd(common, common->newline, common->newline, 0);
6757
6758
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6759
    if (common->mode != PCRE2_JIT_COMPLETE)
6760
      {
6761
      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6762
      SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
6763
      }
6764
    }
6765
  }
6766
else
6767
#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6768
  {
6769
  read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6770
  lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6771
  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6772
    foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6773
  check_newlinechar(common, common->nltype, &newline, FALSE);
6774
  set_jumps(newline, loop);
6775
  }
6776
6777
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6778
  {
6779
  if (quit == NULL)
6780
    {
6781
    quit = JUMP(SLJIT_JUMP);
6782
    JUMPHERE(foundcr);
6783
    }
6784
6785
  notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6786
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6787
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6788
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6789
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6790
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6791
#endif
6792
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6793
  JUMPHERE(notfoundnl);
6794
  JUMPHERE(quit);
6795
  }
6796
6797
if (lastchar)
6798
  JUMPHERE(lastchar);
6799
JUMPHERE(firstchar);
6800
6801
if (common->match_end_ptr != 0)
6802
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6803
}
6804
6805
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6806
6807
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6808
{
6809
DEFINE_COMPILER;
6810
const sljit_u8 *start_bits = common->re->start_bitmap;
6811
struct sljit_label *start;
6812
struct sljit_jump *partial_quit;
6813
#if PCRE2_CODE_UNIT_WIDTH != 8
6814
struct sljit_jump *found = NULL;
6815
#endif
6816
jump_list *matches = NULL;
6817
6818
if (common->match_end_ptr != 0)
6819
  {
6820
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6821
  OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6822
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6823
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6824
  SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6825
  }
6826
6827
start = LABEL();
6828
6829
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6830
if (common->mode == PCRE2_JIT_COMPLETE)
6831
  add_jump(compiler, &common->failed_match, partial_quit);
6832
6833
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6834
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6835
6836
if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6837
  {
6838
#if PCRE2_CODE_UNIT_WIDTH != 8
6839
  if ((start_bits[31] & 0x80) != 0)
6840
    found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6841
  else
6842
    CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6843
#elif defined SUPPORT_UNICODE
6844
  if (common->utf && is_char7_bitset(start_bits, FALSE))
6845
    CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6846
#endif
6847
  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6848
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6849
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6850
  if (!HAS_VIRTUAL_REGISTERS)
6851
    {
6852
    OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6853
    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6854
    }
6855
  else
6856
    {
6857
    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6858
    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6859
    }
6860
  JUMPTO(SLJIT_ZERO, start);
6861
  }
6862
else
6863
  set_jumps(matches, start);
6864
6865
#if PCRE2_CODE_UNIT_WIDTH != 8
6866
if (found != NULL)
6867
  JUMPHERE(found);
6868
#endif
6869
6870
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6871
6872
if (common->mode != PCRE2_JIT_COMPLETE)
6873
  JUMPHERE(partial_quit);
6874
6875
if (common->match_end_ptr != 0)
6876
  OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6877
}
6878
6879
static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6880
{
6881
DEFINE_COMPILER;
6882
struct sljit_label *loop;
6883
struct sljit_jump *toolong;
6884
struct sljit_jump *already_found;
6885
struct sljit_jump *found;
6886
struct sljit_jump *found_oc = NULL;
6887
jump_list *not_found = NULL;
6888
sljit_u32 oc, bit;
6889
6890
SLJIT_ASSERT(common->req_char_ptr != 0);
6891
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6892
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6893
toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6894
already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6895
6896
if (has_firstchar)
6897
  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6898
else
6899
  OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6900
6901
oc = req_char;
6902
if (caseless)
6903
  {
6904
  oc = TABLE_GET(req_char, common->fcc, req_char);
6905
#if defined SUPPORT_UNICODE
6906
  if (req_char > 127 && (common->utf || common->ucp))
6907
    oc = UCD_OTHERCASE(req_char);
6908
#endif
6909
  }
6910
6911
#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6912
if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6913
  {
6914
  not_found = fast_requested_char_simd(common, req_char, oc);
6915
  }
6916
else
6917
#endif
6918
  {
6919
  loop = LABEL();
6920
  add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6921
6922
  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6923
6924
  if (req_char == oc)
6925
    found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6926
  else
6927
    {
6928
    bit = req_char ^ oc;
6929
    if (is_powerof2(bit))
6930
      {
6931
       OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6932
      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6933
      }
6934
    else
6935
      {
6936
      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6937
      found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6938
      }
6939
    }
6940
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6941
  JUMPTO(SLJIT_JUMP, loop);
6942
6943
  JUMPHERE(found);
6944
  if (found_oc)
6945
    JUMPHERE(found_oc);
6946
  }
6947
6948
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6949
6950
JUMPHERE(already_found);
6951
JUMPHERE(toolong);
6952
return not_found;
6953
}
6954
6955
static void do_revertframes(compiler_common *common)
6956
{
6957
DEFINE_COMPILER;
6958
struct sljit_jump *jump;
6959
struct sljit_label *mainloop;
6960
6961
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
6962
GET_LOCAL_BASE(TMP1, 0, 0);
6963
6964
/* Drop frames until we reach STACK_TOP. */
6965
mainloop = LABEL();
6966
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
6967
OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
6968
jump = JUMP(SLJIT_SIG_LESS_EQUAL);
6969
6970
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6971
if (HAS_VIRTUAL_REGISTERS)
6972
  {
6973
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6974
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6975
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6976
  }
6977
else
6978
  {
6979
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6980
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6981
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6982
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6983
  GET_LOCAL_BASE(TMP1, 0, 0);
6984
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6985
  }
6986
JUMPTO(SLJIT_JUMP, mainloop);
6987
6988
JUMPHERE(jump);
6989
sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);
6990
jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);
6991
/* End of reverting values. */
6992
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6993
6994
JUMPHERE(jump);
6995
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, TMP2, 0);
6996
if (HAS_VIRTUAL_REGISTERS)
6997
  {
6998
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6999
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
7000
  }
7001
else
7002
  {
7003
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
7004
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
7005
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
7006
  }
7007
JUMPTO(SLJIT_JUMP, mainloop);
7008
}
7009
7010
#ifdef SUPPORT_UNICODE
7011
#define UCPCAT(bit) (1 << (bit))
7012
#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))
7013
#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
7014
#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
7015
#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
7016
#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
7017
#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)
7018
#endif
7019
7020
static void check_wordboundary(compiler_common *common, BOOL ucp)
7021
{
7022
DEFINE_COMPILER;
7023
struct sljit_jump *skipread;
7024
jump_list *skipread_list = NULL;
7025
#ifdef SUPPORT_UNICODE
7026
struct sljit_label *valid_utf;
7027
jump_list *invalid_utf1 = NULL;
7028
#endif /* SUPPORT_UNICODE */
7029
jump_list *invalid_utf2 = NULL;
7030
#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
7031
struct sljit_jump *jump;
7032
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
7033
7034
SLJIT_UNUSED_ARG(ucp);
7035
SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
7036
7037
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
7038
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7039
/* Get type of the previous char, and put it to TMP3. */
7040
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7041
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7042
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
7043
skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
7044
7045
#ifdef SUPPORT_UNICODE
7046
if (common->invalid_utf)
7047
  {
7048
  peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
7049
7050
  if (common->mode != PCRE2_JIT_COMPLETE)
7051
    {
7052
    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7053
    OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
7054
    move_back(common, NULL, TRUE);
7055
    check_start_used_ptr(common);
7056
    OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7057
    OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
7058
    }
7059
  }
7060
else
7061
#endif /* SUPPORT_UNICODE */
7062
  {
7063
  if (common->mode == PCRE2_JIT_COMPLETE)
7064
    peek_char_back(common, READ_CHAR_MAX, NULL);
7065
  else
7066
    {
7067
    move_back(common, NULL, TRUE);
7068
    check_start_used_ptr(common);
7069
    read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
7070
    }
7071
  }
7072
7073
/* Testing char type. */
7074
#ifdef SUPPORT_UNICODE
7075
if (ucp)
7076
  {
7077
  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
7078
  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
7079
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
7080
  OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);
7081
  }
7082
else
7083
#endif /* SUPPORT_UNICODE */
7084
  {
7085
#if PCRE2_CODE_UNIT_WIDTH != 8
7086
  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7087
#elif defined SUPPORT_UNICODE
7088
  /* Here TMP3 has already been zeroed. */
7089
  jump = NULL;
7090
  if (common->utf)
7091
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7092
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7093
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
7094
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
7095
  OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
7096
#if PCRE2_CODE_UNIT_WIDTH != 8
7097
  JUMPHERE(jump);
7098
#elif defined SUPPORT_UNICODE
7099
  if (jump != NULL)
7100
    JUMPHERE(jump);
7101
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7102
  }
7103
JUMPHERE(skipread);
7104
7105
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7106
check_str_end(common, &skipread_list);
7107
peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, &invalid_utf2);
7108
7109
/* Testing char type. This is a code duplication. */
7110
#ifdef SUPPORT_UNICODE
7111
7112
valid_utf = LABEL();
7113
7114
if (ucp)
7115
  {
7116
  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
7117
  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
7118
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
7119
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
7120
  }
7121
else
7122
#endif /* SUPPORT_UNICODE */
7123
  {
7124
#if PCRE2_CODE_UNIT_WIDTH != 8
7125
  /* TMP2 may be destroyed by peek_char. */
7126
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7127
  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7128
#elif defined SUPPORT_UNICODE
7129
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7130
  jump = NULL;
7131
  if (common->utf)
7132
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7133
#endif
7134
  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
7135
  OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
7136
  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7137
#if PCRE2_CODE_UNIT_WIDTH != 8
7138
  JUMPHERE(jump);
7139
#elif defined SUPPORT_UNICODE
7140
  if (jump != NULL)
7141
    JUMPHERE(jump);
7142
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7143
  }
7144
set_jumps(skipread_list, LABEL());
7145
7146
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7147
OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
7148
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7149
7150
#ifdef SUPPORT_UNICODE
7151
if (common->invalid_utf)
7152
  {
7153
  set_jumps(invalid_utf1, LABEL());
7154
7155
  peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, NULL);
7156
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
7157
7158
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7159
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
7160
  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7161
7162
  set_jumps(invalid_utf2, LABEL());
7163
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7164
  OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
7165
  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7166
  }
7167
#endif /* SUPPORT_UNICODE */
7168
}
7169
7170
static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7171
{
7172
/* May destroy TMP1. */
7173
DEFINE_COMPILER;
7174
int ranges[MAX_CLASS_RANGE_SIZE];
7175
sljit_u8 bit, cbit, all;
7176
int i, byte, length = 0;
7177
7178
bit = bits[0] & 0x1;
7179
/* All bits will be zero or one (since bit is zero or one). */
7180
all = (sljit_u8)-bit;
7181
7182
for (i = 0; i < 256; )
7183
  {
7184
  byte = i >> 3;
7185
  if ((i & 0x7) == 0 && bits[byte] == all)
7186
    i += 8;
7187
  else
7188
    {
7189
    cbit = (bits[byte] >> (i & 0x7)) & 0x1;
7190
    if (cbit != bit)
7191
      {
7192
      if (length >= MAX_CLASS_RANGE_SIZE)
7193
        return FALSE;
7194
      ranges[length] = i;
7195
      length++;
7196
      bit = cbit;
7197
      all = (sljit_u8)-cbit; /* sign extend bit into byte */
7198
      }
7199
    i++;
7200
    }
7201
  }
7202
7203
if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
7204
  {
7205
  if (length >= MAX_CLASS_RANGE_SIZE)
7206
    return FALSE;
7207
  ranges[length] = 256;
7208
  length++;
7209
  }
7210
7211
if (length < 0 || length > 4)
7212
  return FALSE;
7213
7214
bit = bits[0] & 0x1;
7215
if (invert) bit ^= 0x1;
7216
7217
/* No character is accepted. */
7218
if (length == 0 && bit == 0)
7219
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7220
7221
switch(length)
7222
  {
7223
  case 0:
7224
  /* When bit != 0, all characters are accepted. */
7225
  return TRUE;
7226
7227
  case 1:
7228
  add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7229
  return TRUE;
7230
7231
  case 2:
7232
  if (ranges[0] + 1 != ranges[1])
7233
    {
7234
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7235
    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7236
    }
7237
  else
7238
    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7239
  return TRUE;
7240
7241
  case 3:
7242
  if (bit != 0)
7243
    {
7244
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7245
    if (ranges[0] + 1 != ranges[1])
7246
      {
7247
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7248
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7249
      }
7250
    else
7251
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7252
    return TRUE;
7253
    }
7254
7255
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
7256
  if (ranges[1] + 1 != ranges[2])
7257
    {
7258
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
7259
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7260
    }
7261
  else
7262
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
7263
  return TRUE;
7264
7265
  case 4:
7266
  if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
7267
      && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
7268
      && (ranges[1] & (ranges[2] - ranges[0])) == 0
7269
      && is_powerof2(ranges[2] - ranges[0]))
7270
    {
7271
    SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
7272
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
7273
    if (ranges[2] + 1 != ranges[3])
7274
      {
7275
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
7276
      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7277
      }
7278
    else
7279
      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7280
    return TRUE;
7281
    }
7282
7283
  if (bit != 0)
7284
    {
7285
    i = 0;
7286
    if (ranges[0] + 1 != ranges[1])
7287
      {
7288
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7289
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7290
      i = ranges[0];
7291
      }
7292
    else
7293
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7294
7295
    if (ranges[2] + 1 != ranges[3])
7296
      {
7297
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
7298
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7299
      }
7300
    else
7301
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
7302
    return TRUE;
7303
    }
7304
7305
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7306
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
7307
  if (ranges[1] + 1 != ranges[2])
7308
    {
7309
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
7310
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7311
    }
7312
  else
7313
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7314
  return TRUE;
7315
7316
  default:
7317
  SLJIT_UNREACHABLE();
7318
  return FALSE;
7319
  }
7320
}
7321
7322
static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7323
{
7324
/* May destroy TMP1. */
7325
DEFINE_COMPILER;
7326
uint16_t char_list[MAX_CLASS_CHARS_SIZE];
7327
uint8_t byte;
7328
sljit_s32 type;
7329
int i, j, k, len, c;
7330
7331
if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
7332
  return FALSE;
7333
7334
len = 0;
7335
7336
for (i = 0; i < 32; i++)
7337
  {
7338
  byte = bits[i];
7339
7340
  if (nclass)
7341
    byte = (sljit_u8)~byte;
7342
7343
  j = 0;
7344
  while (byte != 0)
7345
    {
7346
    if (byte & 0x1)
7347
      {
7348
      c = i * 8 + j;
7349
7350
      k = len;
7351
7352
      if ((c & 0x20) != 0)
7353
        {
7354
        for (k = 0; k < len; k++)
7355
          if (char_list[k] == c - 0x20)
7356
            {
7357
            char_list[k] |= 0x120;
7358
            break;
7359
            }
7360
        }
7361
7362
      if (k == len)
7363
        {
7364
        if (len >= MAX_CLASS_CHARS_SIZE)
7365
          return FALSE;
7366
7367
        char_list[len++] = (uint16_t) c;
7368
        }
7369
      }
7370
7371
    byte >>= 1;
7372
    j++;
7373
    }
7374
  }
7375
7376
if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
7377
7378
i = 0;
7379
j = 0;
7380
7381
if (char_list[0] == 0)
7382
  {
7383
  i++;
7384
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
7385
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
7386
  }
7387
else
7388
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7389
7390
while (i < len)
7391
  {
7392
  if ((char_list[i] & 0x100) != 0)
7393
    j++;
7394
  else
7395
    {
7396
    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7397
    SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7398
    }
7399
  i++;
7400
  }
7401
7402
if (j != 0)
7403
  {
7404
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7405
7406
  for (i = 0; i < len; i++)
7407
    if ((char_list[i] & 0x100) != 0)
7408
      {
7409
      j--;
7410
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7411
      SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7412
      }
7413
  }
7414
7415
if (invert)
7416
  nclass = !nclass;
7417
7418
type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7419
add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7420
return TRUE;
7421
}
7422
7423
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7424
{
7425
/* May destroy TMP1. */
7426
if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7427
  return TRUE;
7428
return optimize_class_chars(common, bits, nclass, invert, backtracks);
7429
}
7430
7431
static void check_anynewline(compiler_common *common)
7432
{
7433
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7434
DEFINE_COMPILER;
7435
7436
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7437
7438
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7439
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7440
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7441
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7442
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7443
#if PCRE2_CODE_UNIT_WIDTH == 8
7444
if (common->utf)
7445
  {
7446
#endif
7447
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7448
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7449
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7450
#if PCRE2_CODE_UNIT_WIDTH == 8
7451
  }
7452
#endif
7453
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7454
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7455
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7456
}
7457
7458
static void check_hspace(compiler_common *common)
7459
{
7460
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7461
DEFINE_COMPILER;
7462
7463
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7464
7465
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
7466
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7467
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
7468
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7469
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
7470
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7471
#if PCRE2_CODE_UNIT_WIDTH == 8
7472
if (common->utf)
7473
  {
7474
#endif
7475
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7476
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7477
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7478
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7479
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7480
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7481
  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7482
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7483
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7484
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7485
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7486
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7487
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7488
#if PCRE2_CODE_UNIT_WIDTH == 8
7489
  }
7490
#endif
7491
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7492
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7493
7494
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7495
}
7496
7497
static void check_vspace(compiler_common *common)
7498
{
7499
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7500
DEFINE_COMPILER;
7501
7502
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7503
7504
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7505
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7506
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7507
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7508
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7509
#if PCRE2_CODE_UNIT_WIDTH == 8
7510
if (common->utf)
7511
  {
7512
#endif
7513
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7514
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7515
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7516
#if PCRE2_CODE_UNIT_WIDTH == 8
7517
  }
7518
#endif
7519
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7520
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7521
7522
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7523
}
7524
7525
static void do_casefulcmp(compiler_common *common)
7526
{
7527
DEFINE_COMPILER;
7528
struct sljit_jump *jump;
7529
struct sljit_label *label;
7530
int char1_reg;
7531
int char2_reg;
7532
7533
if (HAS_VIRTUAL_REGISTERS)
7534
  {
7535
  char1_reg = STR_END;
7536
  char2_reg = STACK_TOP;
7537
  }
7538
else
7539
  {
7540
  char1_reg = TMP3;
7541
  char2_reg = RETURN_ADDR;
7542
  }
7543
7544
/* Update ref_update_local_size() when this changes. */
7545
SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));
7546
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7547
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7548
7549
if (char1_reg == STR_END)
7550
  {
7551
  OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7552
  OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7553
  }
7554
7555
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7556
  {
7557
  label = LABEL();
7558
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7559
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7560
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7561
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7562
  JUMPTO(SLJIT_NOT_ZERO, label);
7563
7564
  JUMPHERE(jump);
7565
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7566
  }
7567
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7568
  {
7569
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7570
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7571
7572
  label = LABEL();
7573
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7574
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7575
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7576
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7577
  JUMPTO(SLJIT_NOT_ZERO, label);
7578
7579
  JUMPHERE(jump);
7580
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7581
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7582
  }
7583
else
7584
  {
7585
  label = LABEL();
7586
  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7587
  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7588
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7589
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7590
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7591
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7592
  JUMPTO(SLJIT_NOT_ZERO, label);
7593
7594
  JUMPHERE(jump);
7595
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7596
  }
7597
7598
if (char1_reg == STR_END)
7599
  {
7600
  OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7601
  OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7602
  }
7603
7604
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7605
}
7606
7607
static void do_caselesscmp(compiler_common *common)
7608
{
7609
DEFINE_COMPILER;
7610
struct sljit_jump *jump;
7611
struct sljit_label *label;
7612
int char1_reg = STR_END;
7613
int char2_reg;
7614
int lcc_table;
7615
int opt_type = 0;
7616
7617
if (HAS_VIRTUAL_REGISTERS)
7618
  {
7619
  char2_reg = STACK_TOP;
7620
  lcc_table = STACK_LIMIT;
7621
  }
7622
else
7623
  {
7624
  char2_reg = RETURN_ADDR;
7625
  lcc_table = TMP3;
7626
  }
7627
7628
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7629
  opt_type = 1;
7630
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7631
  opt_type = 2;
7632
7633
/* Update ref_update_local_size() when this changes. */
7634
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
7635
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7636
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7637
7638
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, char1_reg, 0);
7639
7640
if (char2_reg == STACK_TOP)
7641
  {
7642
  OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7643
  OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7644
  }
7645
7646
OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7647
7648
if (opt_type == 1)
7649
  {
7650
  label = LABEL();
7651
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7652
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7653
  }
7654
else if (opt_type == 2)
7655
  {
7656
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7657
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7658
7659
  label = LABEL();
7660
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7661
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7662
  }
7663
else
7664
  {
7665
  label = LABEL();
7666
  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7667
  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7668
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7669
  }
7670
7671
#if PCRE2_CODE_UNIT_WIDTH != 8
7672
jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7673
#endif
7674
OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7675
#if PCRE2_CODE_UNIT_WIDTH != 8
7676
JUMPHERE(jump);
7677
jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7678
#endif
7679
OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7680
#if PCRE2_CODE_UNIT_WIDTH != 8
7681
JUMPHERE(jump);
7682
#endif
7683
7684
if (opt_type == 0)
7685
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7686
7687
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7688
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7689
JUMPTO(SLJIT_NOT_ZERO, label);
7690
7691
JUMPHERE(jump);
7692
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
7693
7694
if (opt_type == 2)
7695
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7696
7697
if (char2_reg == STACK_TOP)
7698
  {
7699
  OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7700
  OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7701
  }
7702
7703
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
7704
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7705
}
7706
7707
#include "pcre2_jit_char_inc.h"
7708
7709
static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
7710
{
7711
DEFINE_COMPILER;
7712
struct sljit_jump *jump[4];
7713
7714
switch(type)
7715
  {
7716
  case OP_SOD:
7717
  if (HAS_VIRTUAL_REGISTERS)
7718
    {
7719
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7720
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7721
    }
7722
  else
7723
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
7724
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7725
  return cc;
7726
7727
  case OP_SOM:
7728
  if (HAS_VIRTUAL_REGISTERS)
7729
    {
7730
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7731
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
7732
    }
7733
  else
7734
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
7735
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
7736
  return cc;
7737
7738
  case OP_NOT_WORD_BOUNDARY:
7739
  case OP_WORD_BOUNDARY:
7740
  case OP_NOT_UCP_WORD_BOUNDARY:
7741
  case OP_UCP_WORD_BOUNDARY:
7742
  add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));
7743
#ifdef SUPPORT_UNICODE
7744
  if (common->invalid_utf)
7745
    {
7746
    add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
7747
    return cc;
7748
    }
7749
#endif /* SUPPORT_UNICODE */
7750
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
7751
  add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));
7752
  return cc;
7753
7754
  case OP_EODN:
7755
  /* Requires rather complex checks. */
7756
  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
7757
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7758
    {
7759
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7760
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7761
    if (common->mode == PCRE2_JIT_COMPLETE)
7762
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7763
    else
7764
      {
7765
      jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
7766
      OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
7767
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
7768
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
7769
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
7770
      add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
7771
      check_partial(common, TRUE);
7772
      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7773
      JUMPHERE(jump[1]);
7774
      }
7775
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7776
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7777
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7778
    }
7779
  else if (common->nltype == NLTYPE_FIXED)
7780
    {
7781
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7782
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7783
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
7784
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
7785
    }
7786
  else
7787
    {
7788
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7789
    jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
7790
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7791
    OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
7792
    jump[2] = JUMP(SLJIT_GREATER);
7793
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
7794
    /* Equal. */
7795
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7796
    jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
7797
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7798
7799
    JUMPHERE(jump[1]);
7800
    if (common->nltype == NLTYPE_ANYCRLF)
7801
      {
7802
      OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7803
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
7804
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
7805
      }
7806
    else
7807
      {
7808
      OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
7809
      read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
7810
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
7811
      add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
7812
      sljit_set_current_flags(compiler, SLJIT_SET_Z);
7813
      add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
7814
      OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
7815
      }
7816
    JUMPHERE(jump[2]);
7817
    JUMPHERE(jump[3]);
7818
    }
7819
  JUMPHERE(jump[0]);
7820
  if (common->mode != PCRE2_JIT_COMPLETE)
7821
    check_partial(common, TRUE);
7822
  return cc;
7823
7824
  case OP_EOD:
7825
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
7826
  if (common->mode != PCRE2_JIT_COMPLETE)
7827
    check_partial(common, TRUE);
7828
  return cc;
7829
7830
  case OP_DOLL:
7831
  if (HAS_VIRTUAL_REGISTERS)
7832
    {
7833
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7834
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7835
    }
7836
  else
7837
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7838
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
7839
7840
  if (!common->endonly)
7841
    compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
7842
  else
7843
    {
7844
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
7845
    check_partial(common, FALSE);
7846
    }
7847
  return cc;
7848
7849
  case OP_DOLLM:
7850
  jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
7851
  if (HAS_VIRTUAL_REGISTERS)
7852
    {
7853
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7854
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7855
    }
7856
  else
7857
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
7858
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
7859
  check_partial(common, FALSE);
7860
  jump[0] = JUMP(SLJIT_JUMP);
7861
  JUMPHERE(jump[1]);
7862
7863
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7864
    {
7865
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7866
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
7867
    if (common->mode == PCRE2_JIT_COMPLETE)
7868
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
7869
    else
7870
      {
7871
      jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
7872
      /* STR_PTR = STR_END - IN_UCHARS(1) */
7873
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7874
      check_partial(common, TRUE);
7875
      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7876
      JUMPHERE(jump[1]);
7877
      }
7878
7879
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7880
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7881
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7882
    }
7883
  else
7884
    {
7885
    peek_char(common, common->nlmax, TMP3, 0, NULL);
7886
    check_newlinechar(common, common->nltype, backtracks, FALSE);
7887
    }
7888
  JUMPHERE(jump[0]);
7889
  return cc;
7890
7891
  case OP_CIRC:
7892
  if (HAS_VIRTUAL_REGISTERS)
7893
    {
7894
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
7895
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
7896
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
7897
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
7898
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
7899
    }
7900
  else
7901
    {
7902
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
7903
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
7904
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
7905
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
7906
    }
7907
  return cc;
7908
7909
  case OP_CIRCM:
7910
  /* TMP2 might be used by peek_char_back. */
7911
  if (HAS_VIRTUAL_REGISTERS)
7912
    {
7913
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
7914
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
7915
    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
7916
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
7917
    }
7918
  else
7919
    {
7920
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
7921
    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
7922
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
7923
    }
7924
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
7925
  jump[0] = JUMP(SLJIT_JUMP);
7926
  JUMPHERE(jump[1]);
7927
7928
  if (!common->alt_circumflex)
7929
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
7930
7931
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
7932
    {
7933
    OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
7934
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
7935
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
7936
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
7937
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
7938
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
7939
    }
7940
  else
7941
    {
7942
    peek_char_back(common, common->nlmax, backtracks);
7943
    check_newlinechar(common, common->nltype, backtracks, FALSE);
7944
    }
7945
  JUMPHERE(jump[0]);
7946
  return cc;
7947
  }
7948
SLJIT_UNREACHABLE();
7949
return cc;
7950
}
7951
7952
/* Forward definitions. */
7953
static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
7954
static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
7955
7956
#define PUSH_BACKTRACK(size, ccstart, error) \
7957
  do \
7958
    { \
7959
    backtrack = sljit_alloc_memory(compiler, (size)); \
7960
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
7961
      return error; \
7962
    memset(backtrack, 0, size); \
7963
    backtrack->prev = parent->top; \
7964
    backtrack->cc = (ccstart); \
7965
    parent->top = backtrack; \
7966
    } \
7967
  while (0)
7968
7969
#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
7970
  do \
7971
    { \
7972
    backtrack = sljit_alloc_memory(compiler, (size)); \
7973
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
7974
      return; \
7975
    memset(backtrack, 0, size); \
7976
    backtrack->prev = parent->top; \
7977
    backtrack->cc = (ccstart); \
7978
    parent->top = backtrack; \
7979
    } \
7980
  while (0)
7981
7982
#define BACKTRACK_AS(type) ((type *)backtrack)
7983
7984
static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7985
{
7986
/* The OVECTOR offset goes to TMP2. */
7987
DEFINE_COMPILER;
7988
int count = GET2(cc, 1 + IMM2_SIZE);
7989
PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
7990
unsigned int offset;
7991
jump_list *found = NULL;
7992
7993
SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
7994
7995
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
7996
7997
count--;
7998
while (count-- > 0)
7999
  {
8000
  offset = GET2(slot, 0) << 1;
8001
  GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
8002
  add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
8003
  slot += common->name_entry_size;
8004
  }
8005
8006
offset = GET2(slot, 0) << 1;
8007
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
8008
if (backtracks != NULL && !common->unset_backref)
8009
  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
8010
8011
set_jumps(found, LABEL());
8012
}
8013
8014
static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
8015
{
8016
DEFINE_COMPILER;
8017
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8018
int offset = 0;
8019
struct sljit_jump *jump = NULL;
8020
struct sljit_jump *partial;
8021
struct sljit_jump *nopartial;
8022
#if defined SUPPORT_UNICODE
8023
struct sljit_label *loop;
8024
struct sljit_label *caseless_loop;
8025
struct sljit_jump *turkish_ascii_i = NULL;
8026
struct sljit_jump *turkish_non_ascii_i = NULL;
8027
jump_list *no_match = NULL;
8028
int source_reg = COUNT_MATCH;
8029
int source_end_reg = ARGUMENTS;
8030
int char1_reg = STACK_LIMIT;
8031
PCRE2_UCHAR refi_flag = 0;
8032
8033
if (*cc == OP_REFI || *cc == OP_DNREFI)
8034
  refi_flag = cc[PRIV(OP_lengths)[*cc] - 1];
8035
#endif /* SUPPORT_UNICODE */
8036
8037
if (ref)
8038
  {
8039
  offset = GET2(cc, 1) << 1;
8040
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8041
  /* OVECTOR(1) contains the "string begin - 1" constant. */
8042
  if (withchecks && !common->unset_backref)
8043
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8044
  }
8045
else
8046
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8047
8048
#if defined SUPPORT_UNICODE
8049
if ((common->utf || common->ucp) && (*cc == OP_REFI || *cc == OP_DNREFI))
8050
  {
8051
  /* Update ref_update_local_size() when this changes. */
8052
  SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));
8053
8054
  if (ref)
8055
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8056
  else
8057
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8058
8059
  if (withchecks && emptyfail)
8060
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
8061
8062
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, source_reg, 0);
8063
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, source_end_reg, 0);
8064
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, char1_reg, 0);
8065
8066
  OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
8067
  OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
8068
8069
  loop = LABEL();
8070
  jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
8071
  partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8072
8073
  /* Read original character. It must be a valid UTF character. */
8074
  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8075
  OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
8076
8077
  read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
8078
8079
  OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
8080
  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8081
  OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
8082
8083
  /* Read second character. */
8084
  read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
8085
8086
  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8087
8088
  if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) ==
8089
        REFI_FLAG_TURKISH_CASING)
8090
    {
8091
    OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x20);
8092
    turkish_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x69);
8093
8094
    OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x1);
8095
    turkish_non_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x131);
8096
    }
8097
8098
  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
8099
8100
  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
8101
8102
  OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
8103
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
8104
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
8105
8106
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
8107
8108
  OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
8109
  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
8110
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
8111
  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8112
8113
  add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8114
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
8115
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
8116
8117
  if (refi_flag & REFI_FLAG_CASELESS_RESTRICT)
8118
    add_jump(compiler, &no_match, CMP(SLJIT_LESS | SLJIT_32, SLJIT_MEM1(TMP2), 0, SLJIT_IMM, 128));
8119
8120
  caseless_loop = LABEL();
8121
  OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8122
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
8123
  OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
8124
  JUMPTO(SLJIT_EQUAL, loop);
8125
  JUMPTO(SLJIT_LESS, caseless_loop);
8126
8127
  if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) ==
8128
        REFI_FLAG_TURKISH_CASING)
8129
    {
8130
    add_jump(compiler, &no_match, JUMP(SLJIT_JUMP));
8131
    JUMPHERE(turkish_ascii_i);
8132
8133
    OP2(SLJIT_LSHR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5);
8134
    OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8135
    OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8136
    OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x130);
8137
    CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8138
8139
    add_jump(compiler, &no_match, JUMP(SLJIT_JUMP));
8140
    JUMPHERE(turkish_non_ascii_i);
8141
8142
    OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8143
    OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);
8144
    OP2(SLJIT_SHL, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5);
8145
    OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x49);
8146
    CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
8147
    }
8148
8149
  set_jumps(no_match, LABEL());
8150
  if (common->mode == PCRE2_JIT_COMPLETE)
8151
    JUMPHERE(partial);
8152
8153
  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8154
  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
8155
  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
8156
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8157
8158
  if (common->mode != PCRE2_JIT_COMPLETE)
8159
    {
8160
    JUMPHERE(partial);
8161
    OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8162
    OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
8163
    OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
8164
8165
    check_partial(common, FALSE);
8166
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8167
    }
8168
8169
  JUMPHERE(jump);
8170
  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8171
  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
8172
  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
8173
  return;
8174
  }
8175
else
8176
#endif /* SUPPORT_UNICODE */
8177
  {
8178
  if (ref)
8179
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
8180
  else
8181
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
8182
8183
  if (withchecks)
8184
    jump = JUMP(SLJIT_ZERO);
8185
8186
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
8187
  partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
8188
  if (common->mode == PCRE2_JIT_COMPLETE)
8189
    add_jump(compiler, backtracks, partial);
8190
8191
  add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
8192
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8193
8194
  if (common->mode != PCRE2_JIT_COMPLETE)
8195
    {
8196
    nopartial = JUMP(SLJIT_JUMP);
8197
    JUMPHERE(partial);
8198
    /* TMP2 -= STR_END - STR_PTR */
8199
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
8200
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
8201
    partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
8202
    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
8203
    add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
8204
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8205
    JUMPHERE(partial);
8206
    check_partial(common, FALSE);
8207
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8208
    JUMPHERE(nopartial);
8209
    }
8210
  }
8211
8212
if (jump != NULL)
8213
  {
8214
  if (emptyfail)
8215
    add_jump(compiler, backtracks, jump);
8216
  else
8217
    JUMPHERE(jump);
8218
  }
8219
}
8220
8221
static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8222
{
8223
DEFINE_COMPILER;
8224
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
8225
backtrack_common *backtrack;
8226
PCRE2_UCHAR type;
8227
int local_start = LOCAL2;
8228
int offset = 0;
8229
struct sljit_label *label;
8230
struct sljit_jump *zerolength;
8231
struct sljit_jump *jump = NULL;
8232
PCRE2_SPTR ccbegin = cc;
8233
int min = 0, max = 0;
8234
BOOL minimize;
8235
8236
PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
8237
8238
if (ref)
8239
  offset = GET2(cc, 1) << 1;
8240
else
8241
  cc += IMM2_SIZE;
8242
8243
if (*ccbegin == OP_REFI || *ccbegin == OP_DNREFI)
8244
  {
8245
  cc += 1;
8246
#ifdef SUPPORT_UNICODE
8247
  if (common->utf || common->ucp)
8248
    local_start = LOCAL3;
8249
#endif
8250
  }
8251
8252
type = cc[1 + IMM2_SIZE];
8253
8254
SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
8255
/* Update ref_update_local_size() when this changes. */
8256
SLJIT_ASSERT(local_start + 2 * SSIZE_OF(sw) <= (int)LOCAL0 + common->locals_size);
8257
minimize = (type & 0x1) != 0;
8258
switch(type)
8259
  {
8260
  case OP_CRSTAR:
8261
  case OP_CRMINSTAR:
8262
  min = 0;
8263
  max = 0;
8264
  cc += 1 + IMM2_SIZE + 1;
8265
  break;
8266
  case OP_CRPLUS:
8267
  case OP_CRMINPLUS:
8268
  min = 1;
8269
  max = 0;
8270
  cc += 1 + IMM2_SIZE + 1;
8271
  break;
8272
  case OP_CRQUERY:
8273
  case OP_CRMINQUERY:
8274
  min = 0;
8275
  max = 1;
8276
  cc += 1 + IMM2_SIZE + 1;
8277
  break;
8278
  case OP_CRRANGE:
8279
  case OP_CRMINRANGE:
8280
  min = GET2(cc, 1 + IMM2_SIZE + 1);
8281
  max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
8282
  cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
8283
  break;
8284
  default:
8285
  SLJIT_UNREACHABLE();
8286
  break;
8287
  }
8288
8289
if (!minimize)
8290
  {
8291
  if (min == 0)
8292
    {
8293
    allocate_stack(common, 2);
8294
    if (ref)
8295
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8296
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8297
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8298
    /* Temporary release of STR_PTR. */
8299
    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8300
    /* Handles both invalid and empty cases. Since the minimum repeat,
8301
    is zero the invalid case is basically the same as an empty case. */
8302
    if (ref)
8303
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8304
    else
8305
      {
8306
      compile_dnref_search(common, ccbegin, NULL);
8307
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8308
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0);
8309
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8310
      }
8311
    /* Restore if not zero length. */
8312
    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
8313
    }
8314
  else
8315
    {
8316
    allocate_stack(common, 1);
8317
    if (ref)
8318
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8319
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8320
8321
    if (ref)
8322
      {
8323
      if (!common->unset_backref)
8324
        add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8325
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8326
      }
8327
    else
8328
      {
8329
      compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
8330
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8331
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0);
8332
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8333
      }
8334
    }
8335
8336
  if (min > 1 || max > 1)
8337
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, SLJIT_IMM, 0);
8338
8339
  label = LABEL();
8340
  if (!ref)
8341
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw));
8342
  compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);
8343
8344
  if (min > 1 || max > 1)
8345
    {
8346
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), local_start);
8347
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8348
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, TMP1, 0);
8349
    if (min > 1)
8350
      CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
8351
    if (max > 1)
8352
      {
8353
      jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
8354
      allocate_stack(common, 1);
8355
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8356
      JUMPTO(SLJIT_JUMP, label);
8357
      JUMPHERE(jump);
8358
      }
8359
    }
8360
8361
  if (max == 0)
8362
    {
8363
    /* Includes min > 1 case as well. */
8364
    allocate_stack(common, 1);
8365
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8366
    JUMPTO(SLJIT_JUMP, label);
8367
    }
8368
8369
  JUMPHERE(zerolength);
8370
  BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
8371
8372
  count_match(common);
8373
  return cc;
8374
  }
8375
8376
allocate_stack(common, ref ? 2 : 3);
8377
if (ref)
8378
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
8379
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
8380
if (type != OP_CRMINSTAR)
8381
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
8382
8383
if (min == 0)
8384
  {
8385
  /* Handles both invalid and empty cases. Since the minimum repeat,
8386
  is zero the invalid case is basically the same as an empty case. */
8387
  if (ref)
8388
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8389
  else
8390
    {
8391
    compile_dnref_search(common, ccbegin, NULL);
8392
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8393
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
8394
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8395
    }
8396
  /* Length is non-zero, we can match real repeats. */
8397
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8398
  jump = JUMP(SLJIT_JUMP);
8399
  }
8400
else
8401
  {
8402
  if (ref)
8403
    {
8404
    if (!common->unset_backref)
8405
      add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
8406
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
8407
    }
8408
  else
8409
    {
8410
    compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
8411
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
8412
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
8413
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
8414
    }
8415
  }
8416
8417
BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
8418
if (max > 0)
8419
  add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
8420
8421
if (!ref)
8422
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
8423
compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);
8424
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8425
8426
if (min > 1)
8427
  {
8428
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
8429
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8430
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
8431
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
8432
  }
8433
else if (max > 0)
8434
  OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
8435
8436
if (jump != NULL)
8437
  JUMPHERE(jump);
8438
JUMPHERE(zerolength);
8439
8440
count_match(common);
8441
return cc;
8442
}
8443
8444
static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8445
{
8446
DEFINE_COMPILER;
8447
backtrack_common *backtrack;
8448
recurse_entry *entry = common->entries;
8449
recurse_entry *prev = NULL;
8450
sljit_sw start = GET(cc, 1);
8451
PCRE2_SPTR start_cc;
8452
BOOL needs_control_head;
8453
8454
PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
8455
8456
/* Inlining simple patterns. */
8457
if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
8458
  {
8459
  start_cc = common->start + start;
8460
  compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
8461
  BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
8462
  return cc + 1 + LINK_SIZE;
8463
  }
8464
8465
while (entry != NULL)
8466
  {
8467
  if (entry->start == start)
8468
    break;
8469
  prev = entry;
8470
  entry = entry->next;
8471
  }
8472
8473
if (entry == NULL)
8474
  {
8475
  entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
8476
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8477
    return NULL;
8478
  entry->next = NULL;
8479
  entry->entry_label = NULL;
8480
  entry->backtrack_label = NULL;
8481
  entry->entry_calls = NULL;
8482
  entry->backtrack_calls = NULL;
8483
  entry->start = start;
8484
8485
  if (prev != NULL)
8486
    prev->next = entry;
8487
  else
8488
    common->entries = entry;
8489
  }
8490
8491
BACKTRACK_AS(recurse_backtrack)->entry = entry;
8492
8493
if (entry->entry_label == NULL)
8494
  add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
8495
else
8496
  JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
8497
/* Leave if the match is failed. */
8498
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
8499
BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
8500
return cc + 1 + LINK_SIZE;
8501
}
8502
8503
static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
8504
{
8505
PCRE2_SPTR begin;
8506
PCRE2_SIZE *ovector;
8507
sljit_u32 oveccount, capture_top;
8508
8509
if (arguments->callout == NULL)
8510
  return 0;
8511
8512
SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
8513
8514
begin = arguments->begin;
8515
ovector = (PCRE2_SIZE*)(callout_block + 1);
8516
oveccount = callout_block->capture_top;
8517
8518
SLJIT_ASSERT(oveccount >= 1);
8519
8520
callout_block->version = 2;
8521
callout_block->callout_flags = 0;
8522
8523
/* Offsets in subject. */
8524
callout_block->subject_length = arguments->end - arguments->begin;
8525
callout_block->start_match = jit_ovector[0] - begin;
8526
callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
8527
callout_block->subject = begin;
8528
8529
/* Convert and copy the JIT offset vector to the ovector array. */
8530
callout_block->capture_top = 1;
8531
callout_block->offset_vector = ovector;
8532
8533
ovector[0] = PCRE2_UNSET;
8534
ovector[1] = PCRE2_UNSET;
8535
ovector += 2;
8536
jit_ovector += 2;
8537
capture_top = 1;
8538
8539
/* Convert pointers to sizes. */
8540
while (--oveccount != 0)
8541
  {
8542
  capture_top++;
8543
8544
  ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
8545
  ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
8546
8547
  if (ovector[0] != PCRE2_UNSET)
8548
    callout_block->capture_top = capture_top;
8549
8550
  ovector += 2;
8551
  jit_ovector += 2;
8552
  }
8553
8554
return (arguments->callout)(callout_block, arguments->callout_data);
8555
}
8556
8557
#define CALLOUT_ARG_OFFSET(arg) \
8558
    SLJIT_OFFSETOF(pcre2_callout_block, arg)
8559
8560
static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8561
{
8562
DEFINE_COMPILER;
8563
backtrack_common *backtrack;
8564
sljit_s32 mov_opcode;
8565
unsigned int callout_length = (*cc == OP_CALLOUT)
8566
    ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
8567
sljit_sw value1;
8568
sljit_sw value2;
8569
sljit_sw value3;
8570
sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
8571
8572
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
8573
8574
callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
8575
8576
allocate_stack(common, callout_arg_size);
8577
8578
SLJIT_ASSERT(common->capture_last_ptr != 0);
8579
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
8580
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8581
value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
8582
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
8583
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
8584
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
8585
8586
/* These pointer sized fields temporarly stores internal variables. */
8587
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
8588
8589
if (common->mark_ptr != 0)
8590
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
8591
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
8592
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
8593
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
8594
8595
if (*cc == OP_CALLOUT)
8596
  {
8597
  value1 = 0;
8598
  value2 = 0;
8599
  value3 = 0;
8600
  }
8601
else
8602
  {
8603
  value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
8604
  value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
8605
  value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
8606
  }
8607
8608
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
8609
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
8610
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
8611
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
8612
8613
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8614
8615
/* Needed to save important temporary registers. */
8616
SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));
8617
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
8618
/* SLJIT_R0 = arguments */
8619
OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
8620
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
8621
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
8622
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
8623
free_stack(common, callout_arg_size);
8624
8625
/* Check return value. */
8626
OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
8627
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));
8628
if (common->abort_label == NULL)
8629
  add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
8630
else
8631
  JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
8632
return cc + callout_length;
8633
}
8634
8635
#undef CALLOUT_ARG_SIZE
8636
#undef CALLOUT_ARG_OFFSET
8637
8638
static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
8639
{
8640
DEFINE_COMPILER;
8641
backtrack_common *backtrack = NULL;
8642
jump_list **reverse_failed;
8643
unsigned int lmin, lmax;
8644
#ifdef SUPPORT_UNICODE
8645
struct sljit_jump *jump;
8646
struct sljit_label *label;
8647
#endif
8648
8649
SLJIT_ASSERT(parent->top == NULL);
8650
8651
if (*cc == OP_REVERSE)
8652
  {
8653
  reverse_failed = &parent->own_backtracks;
8654
  lmin = GET2(cc, 1);
8655
  lmax = lmin;
8656
  cc += 1 + IMM2_SIZE;
8657
8658
  SLJIT_ASSERT(lmin > 0);
8659
  }
8660
else
8661
  {
8662
  SLJIT_ASSERT(*cc == OP_VREVERSE);
8663
  PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL);
8664
8665
  reverse_failed = &backtrack->own_backtracks;
8666
  lmin = GET2(cc, 1);
8667
  lmax = GET2(cc, 1 + IMM2_SIZE);
8668
  cc += 1 + 2 * IMM2_SIZE;
8669
8670
  SLJIT_ASSERT(lmin < lmax);
8671
  }
8672
8673
if (HAS_VIRTUAL_REGISTERS)
8674
  {
8675
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8676
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8677
  }
8678
else
8679
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8680
8681
#ifdef SUPPORT_UNICODE
8682
if (common->utf)
8683
  {
8684
  if (lmin > 0)
8685
    {
8686
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);
8687
    label = LABEL();
8688
    add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8689
    move_back(common, reverse_failed, FALSE);
8690
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8691
    JUMPTO(SLJIT_NOT_ZERO, label);
8692
    }
8693
8694
  if (lmin < lmax)
8695
    {
8696
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8697
8698
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);
8699
    label = LABEL();
8700
    jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
8701
    move_back(common, reverse_failed, FALSE);
8702
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8703
    JUMPTO(SLJIT_NOT_ZERO, label);
8704
8705
    JUMPHERE(jump);
8706
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
8707
    }
8708
  }
8709
else
8710
#endif
8711
  {
8712
  if (lmin > 0)
8713
    {
8714
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));
8715
    add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8716
    }
8717
8718
  if (lmin < lmax)
8719
    {
8720
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
8721
8722
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));
8723
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);
8724
    SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
8725
8726
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
8727
    }
8728
  }
8729
8730
check_start_used_ptr(common);
8731
8732
if (lmin < lmax)
8733
  BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();
8734
8735
return cc;
8736
}
8737
8738
static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
8739
{
8740
while (TRUE)
8741
  {
8742
  switch (*cc)
8743
    {
8744
    case OP_CALLOUT_STR:
8745
    cc += GET(cc, 1 + 2*LINK_SIZE);
8746
    break;
8747
8748
    case OP_NOT_WORD_BOUNDARY:
8749
    case OP_WORD_BOUNDARY:
8750
    case OP_CIRC:
8751
    case OP_CIRCM:
8752
    case OP_DOLL:
8753
    case OP_DOLLM:
8754
    case OP_CALLOUT:
8755
    case OP_ALT:
8756
    case OP_NOT_UCP_WORD_BOUNDARY:
8757
    case OP_UCP_WORD_BOUNDARY:
8758
    cc += PRIV(OP_lengths)[*cc];
8759
    break;
8760
8761
    case OP_KET:
8762
    return FALSE;
8763
8764
    default:
8765
    return TRUE;
8766
    }
8767
  }
8768
}
8769
8770
static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
8771
{
8772
DEFINE_COMPILER;
8773
int framesize;
8774
int extrasize;
8775
BOOL local_quit_available = FALSE;
8776
BOOL needs_control_head;
8777
BOOL end_block_size = 0;
8778
BOOL has_vreverse;
8779
int private_data_ptr;
8780
backtrack_common altbacktrack;
8781
PCRE2_SPTR ccbegin;
8782
PCRE2_UCHAR opcode;
8783
PCRE2_UCHAR bra = OP_BRA;
8784
jump_list *tmp = NULL;
8785
jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;
8786
jump_list **found;
8787
/* Saving previous accept variables. */
8788
BOOL save_local_quit_available = common->local_quit_available;
8789
BOOL save_in_positive_assertion = common->in_positive_assertion;
8790
sljit_s32 save_restore_end_ptr = common->restore_end_ptr;
8791
then_trap_backtrack *save_then_trap = common->then_trap;
8792
struct sljit_label *save_quit_label = common->quit_label;
8793
struct sljit_label *save_accept_label = common->accept_label;
8794
jump_list *save_quit = common->quit;
8795
jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
8796
jump_list *save_accept = common->accept;
8797
struct sljit_jump *jump;
8798
struct sljit_jump *brajump = NULL;
8799
8800
/* Assert captures then. */
8801
common->then_trap = NULL;
8802
8803
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
8804
  {
8805
  SLJIT_ASSERT(!conditional);
8806
  bra = *cc;
8807
  cc++;
8808
  }
8809
8810
private_data_ptr = PRIVATE_DATA(cc);
8811
SLJIT_ASSERT(private_data_ptr != 0);
8812
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
8813
backtrack->framesize = framesize;
8814
backtrack->private_data_ptr = private_data_ptr;
8815
opcode = *cc;
8816
SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
8817
found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
8818
ccbegin = cc;
8819
cc += GET(cc, 1);
8820
8821
if (bra == OP_BRAMINZERO)
8822
  {
8823
  /* This is a braminzero backtrack path. */
8824
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8825
  free_stack(common, 1);
8826
  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
8827
  }
8828
8829
if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))
8830
  end_block_size = 3;
8831
8832
if (framesize < 0)
8833
  {
8834
  extrasize = 1;
8835
  if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
8836
    extrasize = 0;
8837
8838
  extrasize += end_block_size;
8839
8840
  if (needs_control_head)
8841
    extrasize++;
8842
8843
  if (framesize == no_frame)
8844
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
8845
8846
  if (extrasize > 0)
8847
    allocate_stack(common, extrasize);
8848
8849
  if (needs_control_head)
8850
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8851
8852
  if (extrasize > 0)
8853
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8854
8855
  if (needs_control_head)
8856
    {
8857
    SLJIT_ASSERT(extrasize == end_block_size + 2);
8858
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
8859
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
8860
    }
8861
  }
8862
else
8863
  {
8864
  extrasize = (needs_control_head ? 3 : 2) + end_block_size;
8865
8866
  OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
8867
  allocate_stack(common, framesize + extrasize);
8868
8869
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8870
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
8871
  if (needs_control_head)
8872
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
8873
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
8874
8875
  if (needs_control_head)
8876
    {
8877
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);
8878
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);
8879
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
8880
    }
8881
  else
8882
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
8883
8884
  init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
8885
  }
8886
8887
if (end_block_size > 0)
8888
  {
8889
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);
8890
  OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
8891
  }
8892
8893
memset(&altbacktrack, 0, sizeof(backtrack_common));
8894
if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
8895
  {
8896
  /* Control verbs cannot escape from these asserts. */
8897
  local_quit_available = TRUE;
8898
  common->restore_end_ptr = 0;
8899
  common->local_quit_available = TRUE;
8900
  common->quit_label = NULL;
8901
  common->quit = NULL;
8902
  }
8903
8904
common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
8905
common->positive_assertion_quit = NULL;
8906
8907
while (1)
8908
  {
8909
  common->accept_label = NULL;
8910
  common->accept = NULL;
8911
  altbacktrack.top = NULL;
8912
  altbacktrack.own_backtracks = NULL;
8913
8914
  if (*ccbegin == OP_ALT && extrasize > 0)
8915
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
8916
8917
  altbacktrack.cc = ccbegin;
8918
  ccbegin += 1 + LINK_SIZE;
8919
8920
  has_vreverse = (*ccbegin == OP_VREVERSE);
8921
  if (*ccbegin == OP_REVERSE || has_vreverse)
8922
    ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);
8923
8924
  compile_matchingpath(common, ccbegin, cc, &altbacktrack);
8925
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
8926
    {
8927
    if (local_quit_available)
8928
      {
8929
      common->local_quit_available = save_local_quit_available;
8930
      common->quit_label = save_quit_label;
8931
      common->quit = save_quit;
8932
      }
8933
    common->in_positive_assertion = save_in_positive_assertion;
8934
    common->restore_end_ptr = save_restore_end_ptr;
8935
    common->then_trap = save_then_trap;
8936
    common->accept_label = save_accept_label;
8937
    common->positive_assertion_quit = save_positive_assertion_quit;
8938
    common->accept = save_accept;
8939
    return NULL;
8940
    }
8941
8942
  if (has_vreverse)
8943
    {
8944
    SLJIT_ASSERT(altbacktrack.top != NULL);
8945
    add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8946
    }
8947
8948
  common->accept_label = LABEL();
8949
  if (common->accept != NULL)
8950
    set_jumps(common->accept, common->accept_label);
8951
8952
  /* Reset stack. */
8953
  if (framesize < 0)
8954
    {
8955
    if (framesize == no_frame)
8956
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8957
    else if (extrasize > 0)
8958
      free_stack(common, extrasize);
8959
8960
    if (end_block_size > 0)
8961
      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
8962
8963
    if (needs_control_head)
8964
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
8965
    }
8966
  else
8967
    {
8968
    if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
8969
      {
8970
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
8971
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
8972
8973
      if (end_block_size > 0)
8974
        OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));
8975
8976
      if (needs_control_head)
8977
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
8978
      }
8979
    else
8980
      {
8981
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
8982
8983
      if (end_block_size > 0)
8984
        OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));
8985
8986
      if (needs_control_head)
8987
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
8988
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
8989
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
8990
      }
8991
    }
8992
8993
  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
8994
    {
8995
    /* We know that STR_PTR was stored on the top of the stack. */
8996
    if (conditional)
8997
      {
8998
      if (extrasize > 0)
8999
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));
9000
      }
9001
    else if (bra == OP_BRAZERO)
9002
      {
9003
      if (framesize < 0)
9004
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9005
      else
9006
        {
9007
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9008
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9009
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9010
        }
9011
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9012
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9013
      }
9014
    else if (framesize >= 0)
9015
      {
9016
      /* For OP_BRA and OP_BRAMINZERO. */
9017
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9018
      }
9019
    }
9020
  add_jump(compiler, found, JUMP(SLJIT_JUMP));
9021
9022
  compile_backtrackingpath(common, altbacktrack.top);
9023
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9024
    {
9025
    if (local_quit_available)
9026
      {
9027
      common->local_quit_available = save_local_quit_available;
9028
      common->quit_label = save_quit_label;
9029
      common->quit = save_quit;
9030
      }
9031
    common->in_positive_assertion = save_in_positive_assertion;
9032
    common->restore_end_ptr = save_restore_end_ptr;
9033
    common->then_trap = save_then_trap;
9034
    common->accept_label = save_accept_label;
9035
    common->positive_assertion_quit = save_positive_assertion_quit;
9036
    common->accept = save_accept;
9037
    return NULL;
9038
    }
9039
  set_jumps(altbacktrack.own_backtracks, LABEL());
9040
9041
  if (*cc != OP_ALT)
9042
    break;
9043
9044
  ccbegin = cc;
9045
  cc += GET(cc, 1);
9046
  }
9047
9048
if (local_quit_available)
9049
  {
9050
  SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9051
  /* Makes the check less complicated below. */
9052
  common->positive_assertion_quit = common->quit;
9053
  }
9054
9055
/* None of them matched. */
9056
if (common->positive_assertion_quit != NULL)
9057
  {
9058
  jump = JUMP(SLJIT_JUMP);
9059
  set_jumps(common->positive_assertion_quit, LABEL());
9060
  SLJIT_ASSERT(framesize != no_stack);
9061
  if (framesize < 0)
9062
    OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9063
  else
9064
    {
9065
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9066
    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9067
    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9068
    }
9069
  JUMPHERE(jump);
9070
  }
9071
9072
if (end_block_size > 0)
9073
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9074
9075
if (needs_control_head)
9076
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));
9077
9078
if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9079
  {
9080
  /* Assert is failed. */
9081
  if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9082
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9083
9084
  if (framesize < 0)
9085
    {
9086
    /* The topmost item should be 0. */
9087
    if (bra == OP_BRAZERO)
9088
      {
9089
      if (extrasize >= 2)
9090
        free_stack(common, extrasize - 1);
9091
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9092
      }
9093
    else if (extrasize > 0)
9094
      free_stack(common, extrasize);
9095
    }
9096
  else
9097
    {
9098
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9099
    /* The topmost item should be 0. */
9100
    if (bra == OP_BRAZERO)
9101
      {
9102
      free_stack(common, framesize + extrasize - 1);
9103
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9104
      }
9105
    else
9106
      free_stack(common, framesize + extrasize);
9107
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9108
    }
9109
  jump = JUMP(SLJIT_JUMP);
9110
  if (bra != OP_BRAZERO)
9111
    add_jump(compiler, target, jump);
9112
9113
  /* Assert is successful. */
9114
  set_jumps(tmp, LABEL());
9115
  if (framesize < 0)
9116
    {
9117
    /* We know that STR_PTR was stored on the top of the stack. */
9118
    if (extrasize > 0)
9119
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9120
9121
    /* Keep the STR_PTR on the top of the stack. */
9122
    if (bra == OP_BRAZERO)
9123
      {
9124
      /* This allocation is always successful. */
9125
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9126
      if (extrasize >= 2)
9127
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9128
      }
9129
    else if (bra == OP_BRAMINZERO)
9130
      {
9131
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9132
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9133
      }
9134
    }
9135
  else
9136
    {
9137
    if (bra == OP_BRA)
9138
      {
9139
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9140
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9141
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
9142
      }
9143
    else
9144
      {
9145
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9146
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));
9147
9148
      if (extrasize == 2 + end_block_size)
9149
        {
9150
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9151
        if (bra == OP_BRAMINZERO)
9152
          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9153
        }
9154
      else
9155
        {
9156
        SLJIT_ASSERT(extrasize == 3 + end_block_size);
9157
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9158
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
9159
        }
9160
      }
9161
    }
9162
9163
  if (bra == OP_BRAZERO)
9164
    {
9165
    backtrack->matchingpath = LABEL();
9166
    SET_LABEL(jump, backtrack->matchingpath);
9167
    }
9168
  else if (bra == OP_BRAMINZERO)
9169
    {
9170
    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9171
    JUMPHERE(brajump);
9172
    SLJIT_ASSERT(framesize != 0);
9173
    if (framesize > 0)
9174
      {
9175
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9176
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9177
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
9178
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9179
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9180
      }
9181
    set_jumps(backtrack->common.own_backtracks, LABEL());
9182
    }
9183
  }
9184
else
9185
  {
9186
  /* AssertNot is successful. */
9187
  if (framesize < 0)
9188
    {
9189
    if (extrasize > 0)
9190
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9191
9192
    if (bra != OP_BRA)
9193
      {
9194
      if (extrasize >= 2)
9195
        free_stack(common, extrasize - 1);
9196
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9197
      }
9198
    else if (extrasize > 0)
9199
      free_stack(common, extrasize);
9200
    }
9201
  else
9202
    {
9203
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9204
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9205
    /* The topmost item should be 0. */
9206
    if (bra != OP_BRA)
9207
      {
9208
      free_stack(common, framesize + extrasize - 1);
9209
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9210
      }
9211
    else
9212
      free_stack(common, framesize + extrasize);
9213
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9214
    }
9215
9216
  if (bra == OP_BRAZERO)
9217
    backtrack->matchingpath = LABEL();
9218
  else if (bra == OP_BRAMINZERO)
9219
    {
9220
    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9221
    JUMPHERE(brajump);
9222
    }
9223
9224
  if (bra != OP_BRA)
9225
    {
9226
    SLJIT_ASSERT(found == &backtrack->common.own_backtracks);
9227
    set_jumps(backtrack->common.own_backtracks, LABEL());
9228
    backtrack->common.own_backtracks = NULL;
9229
    }
9230
  }
9231
9232
if (local_quit_available)
9233
  {
9234
  common->local_quit_available = save_local_quit_available;
9235
  common->quit_label = save_quit_label;
9236
  common->quit = save_quit;
9237
  }
9238
9239
common->in_positive_assertion = save_in_positive_assertion;
9240
common->restore_end_ptr = save_restore_end_ptr;
9241
common->then_trap = save_then_trap;
9242
common->accept_label = save_accept_label;
9243
common->positive_assertion_quit = save_positive_assertion_quit;
9244
common->accept = save_accept;
9245
return cc + 1 + LINK_SIZE;
9246
}
9247
9248
static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
9249
{
9250
DEFINE_COMPILER;
9251
int stacksize;
9252
9253
if (framesize < 0)
9254
  {
9255
  if (framesize == no_frame)
9256
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9257
  else
9258
    {
9259
    stacksize = needs_control_head ? 1 : 0;
9260
    if (ket != OP_KET || has_alternatives)
9261
      stacksize++;
9262
9263
    if (stacksize > 0)
9264
      free_stack(common, stacksize);
9265
    }
9266
9267
  if (needs_control_head)
9268
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
9269
9270
  /* TMP2 which is set here used by OP_KETRMAX below. */
9271
  if (ket == OP_KETRMAX)
9272
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9273
  else if (ket == OP_KETRMIN)
9274
    {
9275
    /* Move the STR_PTR to the private_data_ptr. */
9276
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9277
    }
9278
  }
9279
else
9280
  {
9281
  stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
9282
  OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
9283
  if (needs_control_head)
9284
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9285
9286
  if (ket == OP_KETRMAX)
9287
    {
9288
    /* TMP2 which is set here used by OP_KETRMAX below. */
9289
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9290
    }
9291
  }
9292
if (needs_control_head)
9293
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
9294
}
9295
9296
static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
9297
{
9298
DEFINE_COMPILER;
9299
9300
if (common->capture_last_ptr != 0)
9301
  {
9302
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9303
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
9304
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9305
  stacksize++;
9306
  }
9307
if (common->optimized_cbracket[offset >> 1] == 0)
9308
  {
9309
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9310
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9311
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9312
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9313
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
9314
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9315
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
9316
  stacksize += 2;
9317
  }
9318
return stacksize;
9319
}
9320
9321
static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
9322
{
9323
  if (PRIV(script_run)(ptr, endptr, FALSE))
9324
    return endptr;
9325
  return NULL;
9326
}
9327
9328
#ifdef SUPPORT_UNICODE
9329
9330
static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
9331
{
9332
  if (PRIV(script_run)(ptr, endptr, TRUE))
9333
    return endptr;
9334
  return NULL;
9335
}
9336
9337
#endif /* SUPPORT_UNICODE */
9338
9339
static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
9340
{
9341
DEFINE_COMPILER;
9342
9343
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9344
9345
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9346
#ifdef SUPPORT_UNICODE
9347
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9348
  common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
9349
#else
9350
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
9351
#endif
9352
9353
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
9354
add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9355
}
9356
9357
/*
9358
  Handling bracketed expressions is probably the most complex part.
9359
9360
  Stack layout naming characters:
9361
    S - Push the current STR_PTR
9362
    0 - Push a 0 (NULL)
9363
    A - Push the current STR_PTR. Needed for restoring the STR_PTR
9364
        before the next alternative. Not pushed if there are no alternatives.
9365
    M - Any values pushed by the current alternative. Can be empty, or anything.
9366
    C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
9367
    L - Push the previous local (pointed by localptr) to the stack
9368
   () - opional values stored on the stack
9369
  ()* - optonal, can be stored multiple times
9370
9371
  The following list shows the regular expression templates, their PCRE byte codes
9372
  and stack layout supported by pcre-sljit.
9373
9374
  (?:)                     OP_BRA     | OP_KET                A M
9375
  ()                       OP_CBRA    | OP_KET                C M
9376
  (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
9377
                           OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
9378
  (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
9379
                           OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
9380
  ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
9381
                           OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
9382
  ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
9383
                           OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
9384
  (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
9385
  (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
9386
  ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
9387
  ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
9388
  (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
9389
           OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
9390
  (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
9391
           OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
9392
  ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
9393
           OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
9394
  ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
9395
           OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
9396
9397
9398
  Stack layout naming characters:
9399
    A - Push the alternative index (starting from 0) on the stack.
9400
        Not pushed if there is no alternatives.
9401
    M - Any values pushed by the current alternative. Can be empty, or anything.
9402
9403
  The next list shows the possible content of a bracket:
9404
  (|)     OP_*BRA    | OP_ALT ...         M A
9405
  (?()|)  OP_*COND   | OP_ALT             M A
9406
  (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
9407
                                          Or nothing, if trace is unnecessary
9408
*/
9409
9410
static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9411
{
9412
DEFINE_COMPILER;
9413
backtrack_common *backtrack;
9414
PCRE2_UCHAR opcode;
9415
int private_data_ptr = 0;
9416
int offset = 0;
9417
int i, stacksize;
9418
int repeat_ptr = 0, repeat_length = 0;
9419
int repeat_type = 0, repeat_count = 0;
9420
PCRE2_SPTR ccbegin;
9421
PCRE2_SPTR matchingpath;
9422
PCRE2_SPTR slot;
9423
PCRE2_UCHAR bra = OP_BRA;
9424
PCRE2_UCHAR ket;
9425
assert_backtrack *assert;
9426
BOOL has_alternatives;
9427
BOOL needs_control_head = FALSE;
9428
BOOL has_vreverse = FALSE;
9429
struct sljit_jump *jump;
9430
struct sljit_jump *skip;
9431
jump_list *jumplist;
9432
struct sljit_label *rmax_label = NULL;
9433
struct sljit_jump *braminzero = NULL;
9434
9435
PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
9436
9437
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9438
  {
9439
  bra = *cc;
9440
  cc++;
9441
  opcode = *cc;
9442
  }
9443
9444
opcode = *cc;
9445
ccbegin = cc;
9446
matchingpath = bracketend(cc) - 1 - LINK_SIZE;
9447
ket = *matchingpath;
9448
if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
9449
  {
9450
  repeat_ptr = PRIVATE_DATA(matchingpath);
9451
  repeat_length = PRIVATE_DATA(matchingpath + 1);
9452
  repeat_type = PRIVATE_DATA(matchingpath + 2);
9453
  repeat_count = PRIVATE_DATA(matchingpath + 3);
9454
  SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
9455
  if (repeat_type == OP_UPTO)
9456
    ket = OP_KETRMAX;
9457
  if (repeat_type == OP_MINUPTO)
9458
    ket = OP_KETRMIN;
9459
  }
9460
9461
matchingpath = ccbegin + 1 + LINK_SIZE;
9462
SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
9463
SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
9464
cc += GET(cc, 1);
9465
9466
has_alternatives = *cc == OP_ALT;
9467
if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
9468
  {
9469
  SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
9470
    compile_time_checks_must_be_grouped_together);
9471
  has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
9472
  }
9473
9474
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
9475
  opcode = OP_SCOND;
9476
9477
if (opcode == OP_CBRA || opcode == OP_SCBRA)
9478
  {
9479
  /* Capturing brackets has a pre-allocated space. */
9480
  offset = GET2(ccbegin, 1 + LINK_SIZE);
9481
  if (common->optimized_cbracket[offset] == 0)
9482
    {
9483
    private_data_ptr = OVECTOR_PRIV(offset);
9484
    offset <<= 1;
9485
    }
9486
  else
9487
    {
9488
    offset <<= 1;
9489
    private_data_ptr = OVECTOR(offset);
9490
    }
9491
  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
9492
  matchingpath += IMM2_SIZE;
9493
  }
9494
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE
9495
         || opcode == OP_ASSERT_SCS || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
9496
  {
9497
  /* Other brackets simply allocate the next entry. */
9498
  private_data_ptr = PRIVATE_DATA(ccbegin);
9499
  SLJIT_ASSERT(private_data_ptr != 0);
9500
  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
9501
  if (opcode == OP_ONCE)
9502
    BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
9503
  }
9504
9505
/* Instructions before the first alternative. */
9506
stacksize = 0;
9507
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
9508
  stacksize++;
9509
if (bra == OP_BRAZERO)
9510
  stacksize++;
9511
9512
if (stacksize > 0)
9513
  allocate_stack(common, stacksize);
9514
9515
stacksize = 0;
9516
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
9517
  {
9518
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9519
  stacksize++;
9520
  }
9521
9522
if (bra == OP_BRAZERO)
9523
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9524
9525
if (bra == OP_BRAMINZERO)
9526
  {
9527
  /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
9528
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9529
  if (ket != OP_KETRMIN)
9530
    {
9531
    free_stack(common, 1);
9532
    braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9533
    }
9534
  else if (opcode == OP_ONCE || opcode >= OP_SBRA)
9535
    {
9536
    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9537
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9538
    /* Nothing stored during the first run. */
9539
    skip = JUMP(SLJIT_JUMP);
9540
    JUMPHERE(jump);
9541
    /* Checking zero-length iteration. */
9542
    if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
9543
      {
9544
      /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
9545
      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9546
      }
9547
    else
9548
      {
9549
      /* Except when the whole stack frame must be saved. */
9550
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9551
      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
9552
      }
9553
    JUMPHERE(skip);
9554
    }
9555
  else
9556
    {
9557
    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9558
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9559
    JUMPHERE(jump);
9560
    }
9561
  }
9562
9563
if (repeat_type != 0)
9564
  {
9565
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
9566
  if (repeat_type == OP_EXACT)
9567
    rmax_label = LABEL();
9568
  }
9569
9570
if (ket == OP_KETRMIN)
9571
  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
9572
9573
if (ket == OP_KETRMAX)
9574
  {
9575
  rmax_label = LABEL();
9576
  if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
9577
    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
9578
  }
9579
9580
/* Handling capturing brackets and alternatives. */
9581
if (opcode == OP_ONCE)
9582
  {
9583
  stacksize = 0;
9584
  if (needs_control_head)
9585
    {
9586
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9587
    stacksize++;
9588
    }
9589
9590
  if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
9591
    {
9592
    /* Neither capturing brackets nor recursions are found in the block. */
9593
    if (ket == OP_KETRMIN)
9594
      {
9595
      stacksize += 2;
9596
      if (!needs_control_head)
9597
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9598
      }
9599
    else
9600
      {
9601
      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
9602
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9603
      if (ket == OP_KETRMAX || has_alternatives)
9604
        stacksize++;
9605
      }
9606
9607
    if (stacksize > 0)
9608
      allocate_stack(common, stacksize);
9609
9610
    stacksize = 0;
9611
    if (needs_control_head)
9612
      {
9613
      stacksize++;
9614
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9615
      }
9616
9617
    if (ket == OP_KETRMIN)
9618
      {
9619
      if (needs_control_head)
9620
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9621
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9622
      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
9623
        OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
9624
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
9625
      }
9626
    else if (ket == OP_KETRMAX || has_alternatives)
9627
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9628
    }
9629
  else
9630
    {
9631
    if (ket != OP_KET || has_alternatives)
9632
      stacksize++;
9633
9634
    stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
9635
    allocate_stack(common, stacksize);
9636
9637
    if (needs_control_head)
9638
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9639
9640
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9641
    OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
9642
9643
    stacksize = needs_control_head ? 1 : 0;
9644
    if (ket != OP_KET || has_alternatives)
9645
      {
9646
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9647
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9648
      stacksize++;
9649
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9650
      }
9651
    else
9652
      {
9653
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9654
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
9655
      }
9656
    init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
9657
    }
9658
  }
9659
else if (opcode == OP_CBRA || opcode == OP_SCBRA)
9660
  {
9661
  /* Saving the previous values. */
9662
  if (common->optimized_cbracket[offset >> 1] != 0)
9663
    {
9664
    SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
9665
    allocate_stack(common, 2);
9666
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9667
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9668
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9669
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9670
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9671
    }
9672
  else
9673
    {
9674
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9675
    allocate_stack(common, 1);
9676
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9677
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9678
    }
9679
  }
9680
else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
9681
  {
9682
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9683
  allocate_stack(common, 4);
9684
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9685
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9686
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
9687
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9688
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9689
  OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
9690
9691
  has_vreverse = (*matchingpath == OP_VREVERSE);
9692
  if (*matchingpath == OP_REVERSE || has_vreverse)
9693
    matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
9694
  }
9695
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
9696
  {
9697
  /* Saving the previous value. */
9698
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9699
  allocate_stack(common, 1);
9700
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9701
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
9702
9703
  if (*matchingpath == OP_REVERSE)
9704
    matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
9705
  }
9706
else if (opcode == OP_ASSERT_SCS)
9707
  {
9708
  /* Nested scs blocks will not update this variable. */
9709
  if (common->restore_end_ptr == 0)
9710
    common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw);
9711
9712
  if (*matchingpath == OP_CREF && (matchingpath[1 + IMM2_SIZE] != OP_CREF && matchingpath[1 + IMM2_SIZE] != OP_DNCREF))
9713
    {
9714
    /* Optimized case for a single capture reference. */
9715
    i = OVECTOR(GET2(matchingpath, 1) << 1);
9716
9717
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), i);
9718
9719
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9720
    matchingpath += 1 + IMM2_SIZE;
9721
9722
    allocate_stack(common, has_alternatives ? 3 : 2);
9723
9724
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9725
    OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9726
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
9727
    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), i + sizeof(sljit_sw));
9728
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9729
    OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
9730
    }
9731
  else
9732
    {
9733
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9734
    jumplist = NULL;
9735
9736
    while (TRUE)
9737
      {
9738
      if (*matchingpath == OP_CREF)
9739
        {
9740
        sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(matchingpath, 1) << 1));
9741
        matchingpath += 1 + IMM2_SIZE;
9742
        }
9743
      else
9744
        {
9745
        SLJIT_ASSERT(*matchingpath == OP_DNCREF);
9746
9747
        i = GET2(matchingpath, 1 + IMM2_SIZE);
9748
        slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9749
9750
        while (i-- > 1)
9751
          {
9752
          sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1));
9753
          add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));
9754
          slot += common->name_entry_size;
9755
          }
9756
9757
        sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1));
9758
        matchingpath += 1 + 2 * IMM2_SIZE;
9759
        }
9760
9761
      if (*matchingpath != OP_CREF && *matchingpath != OP_DNCREF)
9762
        break;
9763
9764
      add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));
9765
      }
9766
9767
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture),
9768
      CMP(SLJIT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));
9769
9770
    set_jumps(jumplist, LABEL());
9771
9772
    allocate_stack(common, has_alternatives ? 3 : 2);
9773
9774
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9775
    OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9776
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
9777
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), 0);
9778
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
9779
    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9780
    }
9781
9782
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
9783
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_TMP_DEST_REG, 0);
9784
9785
  if (has_alternatives)
9786
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
9787
  }
9788
else if (has_alternatives)
9789
  {
9790
  /* Pushing the starting string pointer. */
9791
  allocate_stack(common, 1);
9792
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9793
  }
9794
9795
/* Generating code for the first alternative. */
9796
if (opcode == OP_COND || opcode == OP_SCOND)
9797
  {
9798
  if (*matchingpath == OP_CREF)
9799
    {
9800
    SLJIT_ASSERT(has_alternatives);
9801
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture),
9802
      CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9803
    matchingpath += 1 + IMM2_SIZE;
9804
    }
9805
  else if (*matchingpath == OP_DNCREF)
9806
    {
9807
    SLJIT_ASSERT(has_alternatives);
9808
9809
    i = GET2(matchingpath, 1 + IMM2_SIZE);
9810
    slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9811
    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9812
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9813
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
9814
    slot += common->name_entry_size;
9815
    i--;
9816
    while (i-- > 0)
9817
      {
9818
      OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
9819
      OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
9820
      slot += common->name_entry_size;
9821
      }
9822
    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9823
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), JUMP(SLJIT_ZERO));
9824
    matchingpath += 1 + 2 * IMM2_SIZE;
9825
    }
9826
  else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
9827
    {
9828
    /* Never has other case. */
9829
    BACKTRACK_AS(bracket_backtrack)->u.no_capture = NULL;
9830
    SLJIT_ASSERT(!has_alternatives);
9831
9832
    if (*matchingpath == OP_TRUE)
9833
      {
9834
      stacksize = 1;
9835
      matchingpath++;
9836
      }
9837
    else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
9838
      stacksize = 0;
9839
    else if (*matchingpath == OP_RREF)
9840
      {
9841
      stacksize = GET2(matchingpath, 1);
9842
      if (common->currententry == NULL)
9843
        stacksize = 0;
9844
      else if (stacksize == RREF_ANY)
9845
        stacksize = 1;
9846
      else if (common->currententry->start == 0)
9847
        stacksize = stacksize == 0;
9848
      else
9849
        stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
9850
9851
      if (stacksize != 0)
9852
        matchingpath += 1 + IMM2_SIZE;
9853
      }
9854
    else
9855
      {
9856
      if (common->currententry == NULL || common->currententry->start == 0)
9857
        stacksize = 0;
9858
      else
9859
        {
9860
        stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
9861
        slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
9862
        i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
9863
        while (stacksize > 0)
9864
          {
9865
          if ((int)GET2(slot, 0) == i)
9866
            break;
9867
          slot += common->name_entry_size;
9868
          stacksize--;
9869
          }
9870
        }
9871
9872
      if (stacksize != 0)
9873
        matchingpath += 1 + 2 * IMM2_SIZE;
9874
      }
9875
9876
      /* The stacksize == 0 is a common "else" case. */
9877
      if (stacksize == 0)
9878
        {
9879
        if (*cc == OP_ALT)
9880
          {
9881
          matchingpath = cc + 1 + LINK_SIZE;
9882
          cc += GET(cc, 1);
9883
          }
9884
        else
9885
          matchingpath = cc;
9886
        }
9887
    }
9888
  else
9889
    {
9890
    SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
9891
    /* Similar code as PUSH_BACKTRACK macro. */
9892
    assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
9893
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9894
      return NULL;
9895
    memset(assert, 0, sizeof(assert_backtrack));
9896
    assert->common.cc = matchingpath;
9897
    BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
9898
    matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
9899
    }
9900
  }
9901
9902
compile_matchingpath(common, matchingpath, cc, backtrack);
9903
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9904
  return NULL;
9905
9906
switch (opcode)
9907
  {
9908
  case OP_ASSERTBACK_NA:
9909
    if (has_vreverse)
9910
      {
9911
      SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));
9912
      add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
9913
      }
9914
9915
    if (PRIVATE_DATA(ccbegin + 1))
9916
      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
9917
    break;
9918
  case OP_ONCE:
9919
    match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
9920
    break;
9921
  case OP_SCRIPT_RUN:
9922
    match_script_run_common(common, private_data_ptr, backtrack);
9923
    break;
9924
  }
9925
9926
stacksize = 0;
9927
if (repeat_type == OP_MINUPTO)
9928
  {
9929
  /* We need to preserve the counter. TMP2 will be used below. */
9930
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
9931
  stacksize++;
9932
  }
9933
if (ket != OP_KET || bra != OP_BRA)
9934
  stacksize++;
9935
if (offset != 0)
9936
  {
9937
  if (common->capture_last_ptr != 0)
9938
    stacksize++;
9939
  if (common->optimized_cbracket[offset >> 1] == 0)
9940
    stacksize += 2;
9941
  }
9942
if (has_alternatives && opcode != OP_ONCE)
9943
  stacksize++;
9944
9945
if (stacksize > 0)
9946
  allocate_stack(common, stacksize);
9947
9948
stacksize = 0;
9949
if (repeat_type == OP_MINUPTO)
9950
  {
9951
  /* TMP2 was set above. */
9952
  OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
9953
  stacksize++;
9954
  }
9955
9956
if (ket != OP_KET || bra != OP_BRA)
9957
  {
9958
  if (ket != OP_KET)
9959
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
9960
  else
9961
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9962
  stacksize++;
9963
  }
9964
9965
if (offset != 0)
9966
  stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
9967
9968
/* Skip and count the other alternatives. */
9969
i = 1;
9970
while (*cc == OP_ALT)
9971
  {
9972
  cc += GET(cc, 1);
9973
  i++;
9974
  }
9975
9976
if (has_alternatives)
9977
  {
9978
  if (opcode != OP_ONCE)
9979
    {
9980
    if (i <= 3)
9981
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
9982
    else
9983
      BACKTRACK_AS(bracket_backtrack)->matching_mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
9984
    }
9985
  if (ket != OP_KETRMAX)
9986
    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
9987
  }
9988
9989
/* Must be after the matchingpath label. */
9990
if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
9991
  {
9992
  SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
9993
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
9994
  }
9995
else switch (opcode)
9996
  {
9997
  case OP_ASSERT_NA:
9998
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9999
    break;
10000
  case OP_ASSERT_SCS:
10001
    OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);
10002
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10003
    OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10004
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0);
10005
10006
    /* Nested scs blocks will not update this variable. */
10007
    if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw))
10008
      common->restore_end_ptr = 0;
10009
    break;
10010
  }
10011
10012
if (ket == OP_KETRMAX)
10013
  {
10014
  if (repeat_type != 0)
10015
    {
10016
    if (has_alternatives)
10017
      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10018
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10019
    JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10020
    /* Drop STR_PTR for greedy plus quantifier. */
10021
    if (opcode != OP_ONCE)
10022
      free_stack(common, 1);
10023
    }
10024
  else if (opcode < OP_BRA || opcode >= OP_SBRA)
10025
    {
10026
    if (has_alternatives)
10027
      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10028
10029
    /* Checking zero-length iteration. */
10030
    if (opcode != OP_ONCE)
10031
      {
10032
      /* This case includes opcodes such as OP_SCRIPT_RUN. */
10033
      CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10034
      /* Drop STR_PTR for greedy plus quantifier. */
10035
      if (bra != OP_BRAZERO)
10036
        free_stack(common, 1);
10037
      }
10038
    else
10039
      /* TMP2 must contain the starting STR_PTR. */
10040
      CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10041
    }
10042
  else
10043
    JUMPTO(SLJIT_JUMP, rmax_label);
10044
  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10045
  }
10046
10047
if (repeat_type == OP_EXACT)
10048
  {
10049
  count_match(common);
10050
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10051
  JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10052
  }
10053
else if (repeat_type == OP_UPTO)
10054
  {
10055
  /* We need to preserve the counter. */
10056
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10057
  allocate_stack(common, 1);
10058
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10059
  }
10060
10061
if (bra == OP_BRAZERO)
10062
  BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10063
10064
if (bra == OP_BRAMINZERO)
10065
  {
10066
  /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10067
  JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10068
  if (braminzero != NULL)
10069
    {
10070
    JUMPHERE(braminzero);
10071
    /* We need to release the end pointer to perform the
10072
    backtrack for the zero-length iteration. When
10073
    framesize is < 0, OP_ONCE will do the release itself. */
10074
    if (opcode == OP_ONCE)
10075
      {
10076
      int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;
10077
10078
      SLJIT_ASSERT(framesize != 0);
10079
      if (framesize > 0)
10080
        {
10081
        OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10082
        add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10083
        OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10084
        }
10085
      }
10086
    else if (ket == OP_KETRMIN)
10087
      free_stack(common, 1);
10088
    }
10089
  /* Continue to the normal backtrack. */
10090
  }
10091
10092
if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT))
10093
  count_match(common);
10094
10095
cc += 1 + LINK_SIZE;
10096
10097
if (opcode == OP_ONCE)
10098
  {
10099
  int data;
10100
  int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;
10101
10102
  SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2);
10103
  /* We temporarily encode the needs_control_head in the lowest bit.
10104
     The real value should be short enough for this operation to work
10105
     without triggering Undefined Behaviour. */
10106
  data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0));
10107
  BACKTRACK_AS(bracket_backtrack)->u.framesize = data;
10108
  }
10109
return cc + repeat_length;
10110
}
10111
10112
static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10113
{
10114
DEFINE_COMPILER;
10115
backtrack_common *backtrack;
10116
PCRE2_UCHAR opcode;
10117
int private_data_ptr;
10118
int cbraprivptr = 0;
10119
BOOL needs_control_head;
10120
int framesize;
10121
int stacksize;
10122
int offset = 0;
10123
BOOL zero = FALSE;
10124
PCRE2_SPTR ccbegin = NULL;
10125
int stack; /* Also contains the offset of control head. */
10126
struct sljit_label *loop = NULL;
10127
struct jump_list *emptymatch = NULL;
10128
10129
PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10130
if (*cc == OP_BRAPOSZERO)
10131
  {
10132
  zero = TRUE;
10133
  cc++;
10134
  }
10135
10136
opcode = *cc;
10137
private_data_ptr = PRIVATE_DATA(cc);
10138
SLJIT_ASSERT(private_data_ptr != 0);
10139
BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10140
switch(opcode)
10141
  {
10142
  case OP_BRAPOS:
10143
  case OP_SBRAPOS:
10144
  ccbegin = cc + 1 + LINK_SIZE;
10145
  break;
10146
10147
  case OP_CBRAPOS:
10148
  case OP_SCBRAPOS:
10149
  offset = GET2(cc, 1 + LINK_SIZE);
10150
  /* This case cannot be optimized in the same way as
10151
  normal capturing brackets. */
10152
  SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
10153
  cbraprivptr = OVECTOR_PRIV(offset);
10154
  offset <<= 1;
10155
  ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10156
  break;
10157
10158
  default:
10159
  SLJIT_UNREACHABLE();
10160
  break;
10161
  }
10162
10163
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10164
BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10165
if (framesize < 0)
10166
  {
10167
  if (offset != 0)
10168
    {
10169
    stacksize = 2;
10170
    if (common->capture_last_ptr != 0)
10171
      stacksize++;
10172
    }
10173
  else
10174
    stacksize = 1;
10175
10176
  if (needs_control_head)
10177
    stacksize++;
10178
  if (!zero)
10179
    stacksize++;
10180
10181
  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10182
  allocate_stack(common, stacksize);
10183
  if (framesize == no_frame)
10184
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10185
10186
  stack = 0;
10187
  if (offset != 0)
10188
    {
10189
    stack = 2;
10190
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10191
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10192
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10193
    if (common->capture_last_ptr != 0)
10194
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10195
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10196
    if (needs_control_head)
10197
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10198
    if (common->capture_last_ptr != 0)
10199
      {
10200
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10201
      stack = 3;
10202
      }
10203
    }
10204
  else
10205
    {
10206
    if (needs_control_head)
10207
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10208
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10209
    stack = 1;
10210
    }
10211
10212
  if (needs_control_head)
10213
    stack++;
10214
  if (!zero)
10215
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10216
  if (needs_control_head)
10217
    {
10218
    stack--;
10219
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10220
    }
10221
  }
10222
else
10223
  {
10224
  stacksize = framesize + 1;
10225
  if (!zero)
10226
    stacksize++;
10227
  if (needs_control_head)
10228
    stacksize++;
10229
  if (offset == 0)
10230
    stacksize++;
10231
  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10232
10233
  allocate_stack(common, stacksize);
10234
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10235
  if (needs_control_head)
10236
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10237
  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10238
10239
  stack = 0;
10240
  if (!zero)
10241
    {
10242
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
10243
    stack = 1;
10244
    }
10245
  if (needs_control_head)
10246
    {
10247
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10248
    stack++;
10249
    }
10250
  if (offset == 0)
10251
    {
10252
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
10253
    stack++;
10254
    }
10255
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
10256
  init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
10257
  stack -= 1 + (offset == 0);
10258
  }
10259
10260
if (offset != 0)
10261
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10262
10263
loop = LABEL();
10264
while (*cc != OP_KETRPOS)
10265
  {
10266
  backtrack->top = NULL;
10267
  backtrack->own_backtracks = NULL;
10268
  cc += GET(cc, 1);
10269
10270
  compile_matchingpath(common, ccbegin, cc, backtrack);
10271
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10272
    return NULL;
10273
10274
  if (framesize < 0)
10275
    {
10276
    if (framesize == no_frame)
10277
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10278
10279
    if (offset != 0)
10280
      {
10281
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10282
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10283
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10284
      if (common->capture_last_ptr != 0)
10285
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10286
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10287
      }
10288
    else
10289
      {
10290
      if (opcode == OP_SBRAPOS)
10291
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10292
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10293
      }
10294
10295
    /* Even if the match is empty, we need to reset the control head. */
10296
    if (needs_control_head)
10297
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10298
10299
    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10300
      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10301
10302
    if (!zero)
10303
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10304
    }
10305
  else
10306
    {
10307
    if (offset != 0)
10308
      {
10309
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10310
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10311
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10312
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10313
      if (common->capture_last_ptr != 0)
10314
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10315
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10316
      }
10317
    else
10318
      {
10319
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10320
      OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10321
      if (opcode == OP_SBRAPOS)
10322
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10323
      OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
10324
      }
10325
10326
    /* Even if the match is empty, we need to reset the control head. */
10327
    if (needs_control_head)
10328
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10329
10330
    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10331
      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10332
10333
    if (!zero)
10334
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10335
    }
10336
10337
  JUMPTO(SLJIT_JUMP, loop);
10338
  flush_stubs(common);
10339
10340
  compile_backtrackingpath(common, backtrack->top);
10341
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10342
    return NULL;
10343
  set_jumps(backtrack->own_backtracks, LABEL());
10344
10345
  if (framesize < 0)
10346
    {
10347
    if (offset != 0)
10348
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10349
    else
10350
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10351
    }
10352
  else
10353
    {
10354
    if (offset != 0)
10355
      {
10356
      /* Last alternative. */
10357
      if (*cc == OP_KETRPOS)
10358
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10359
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10360
      }
10361
    else
10362
      {
10363
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10364
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10365
      }
10366
    }
10367
10368
  if (*cc == OP_KETRPOS)
10369
    break;
10370
  ccbegin = cc + 1 + LINK_SIZE;
10371
  }
10372
10373
/* We don't have to restore the control head in case of a failed match. */
10374
10375
backtrack->own_backtracks = NULL;
10376
if (!zero)
10377
  {
10378
  if (framesize < 0)
10379
    add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
10380
  else /* TMP2 is set to [private_data_ptr] above. */
10381
    add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
10382
  }
10383
10384
/* None of them matched. */
10385
set_jumps(emptymatch, LABEL());
10386
count_match(common);
10387
return cc + 1 + LINK_SIZE;
10388
}
10389
10390
static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
10391
{
10392
int class_len;
10393
10394
*opcode = *cc;
10395
*exact = 0;
10396
10397
if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
10398
  {
10399
  cc++;
10400
  *type = OP_CHAR;
10401
  }
10402
else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
10403
  {
10404
  cc++;
10405
  *type = OP_CHARI;
10406
  *opcode -= OP_STARI - OP_STAR;
10407
  }
10408
else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
10409
  {
10410
  cc++;
10411
  *type = OP_NOT;
10412
  *opcode -= OP_NOTSTAR - OP_STAR;
10413
  }
10414
else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
10415
  {
10416
  cc++;
10417
  *type = OP_NOTI;
10418
  *opcode -= OP_NOTSTARI - OP_STAR;
10419
  }
10420
else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
10421
  {
10422
  cc++;
10423
  *opcode -= OP_TYPESTAR - OP_STAR;
10424
  *type = OP_END;
10425
  }
10426
else
10427
  {
10428
  SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS || *opcode == OP_ECLASS);
10429
  *type = *opcode;
10430
  class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 1);
10431
  *opcode = cc[class_len];
10432
  cc++;
10433
10434
  if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
10435
    {
10436
    *opcode -= OP_CRSTAR - OP_STAR;
10437
    *end = cc + class_len;
10438
10439
    if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
10440
      {
10441
      *exact = 1;
10442
      *opcode -= OP_PLUS - OP_STAR;
10443
      }
10444
    return cc;
10445
    }
10446
10447
  if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
10448
    {
10449
    *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
10450
    *end = cc + class_len;
10451
10452
    if (*opcode == OP_POSPLUS)
10453
      {
10454
      *exact = 1;
10455
      *opcode = OP_POSSTAR;
10456
      }
10457
    return cc;
10458
    }
10459
10460
  SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
10461
  *max = GET2(cc, (class_len + IMM2_SIZE));
10462
  *exact = GET2(cc, class_len);
10463
  *end = cc + class_len + 2 * IMM2_SIZE;
10464
10465
  if (*max == 0)
10466
    {
10467
    SLJIT_ASSERT(*exact > 1);
10468
    if (*opcode == OP_CRRANGE)
10469
      *opcode = OP_UPTO;
10470
    else if (*opcode == OP_CRPOSRANGE)
10471
      *opcode = OP_POSUPTO;
10472
    else
10473
      *opcode = OP_MINSTAR;
10474
    return cc;
10475
    }
10476
10477
  *max -= *exact;
10478
  if (*max == 0)
10479
    *opcode = OP_EXACT;
10480
  else
10481
    {
10482
    SLJIT_ASSERT(*exact > 0 || *max > 1);
10483
    if (*opcode == OP_CRRANGE)
10484
      *opcode = OP_UPTO;
10485
    else if (*opcode == OP_CRPOSRANGE)
10486
      *opcode = OP_POSUPTO;
10487
    else if (*max == 1)
10488
      *opcode = OP_MINQUERY;
10489
    else
10490
      *opcode = OP_MINUPTO;
10491
    }
10492
  return cc;
10493
  }
10494
10495
switch(*opcode)
10496
  {
10497
  case OP_EXACT:
10498
  *exact = GET2(cc, 0);
10499
  cc += IMM2_SIZE;
10500
  break;
10501
10502
  case OP_PLUS:
10503
  case OP_MINPLUS:
10504
  *exact = 1;
10505
  *opcode -= OP_PLUS - OP_STAR;
10506
  break;
10507
10508
  case OP_POSPLUS:
10509
  *exact = 1;
10510
  *opcode = OP_POSSTAR;
10511
  break;
10512
10513
  case OP_UPTO:
10514
  case OP_MINUPTO:
10515
  case OP_POSUPTO:
10516
  *max = GET2(cc, 0);
10517
  cc += IMM2_SIZE;
10518
  break;
10519
  }
10520
10521
if (*type == OP_END)
10522
  {
10523
  *type = *cc;
10524
  *end = next_opcode(common, cc);
10525
  cc++;
10526
  return cc;
10527
  }
10528
10529
*end = cc + 1;
10530
#ifdef SUPPORT_UNICODE
10531
if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
10532
#endif
10533
return cc;
10534
}
10535
10536
static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent, jump_list **prev_backtracks)
10537
{
10538
DEFINE_COMPILER;
10539
backtrack_common *backtrack = NULL;
10540
PCRE2_SPTR begin = cc;
10541
PCRE2_UCHAR opcode;
10542
PCRE2_UCHAR type;
10543
sljit_u32 max = 0, exact;
10544
sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
10545
sljit_s32 early_fail_type;
10546
BOOL charpos_enabled, use_tmp;
10547
PCRE2_UCHAR charpos_char;
10548
unsigned int charpos_othercasebit;
10549
PCRE2_SPTR end;
10550
jump_list *no_match = NULL;
10551
jump_list *no_char1_match = NULL;
10552
struct sljit_jump *jump = NULL;
10553
struct sljit_label *label;
10554
int private_data_ptr = PRIVATE_DATA(cc);
10555
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
10556
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
10557
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
10558
int tmp_base, tmp_offset;
10559
10560
early_fail_type = (early_fail_ptr & 0x7);
10561
early_fail_ptr >>= 3;
10562
10563
/* During recursion, these optimizations are disabled. */
10564
if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
10565
  {
10566
  early_fail_ptr = 0;
10567
  early_fail_type = type_skip;
10568
  }
10569
10570
SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
10571
  || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
10572
10573
if (early_fail_type == type_fail)
10574
  add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
10575
10576
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
10577
10578
if (type != OP_EXTUNI)
10579
  {
10580
  tmp_base = TMP3;
10581
  tmp_offset = 0;
10582
  }
10583
else
10584
  {
10585
  tmp_base = SLJIT_MEM1(SLJIT_SP);
10586
  tmp_offset = LOCAL2;
10587
  }
10588
10589
if (opcode == OP_EXACT)
10590
  {
10591
  SLJIT_ASSERT(early_fail_ptr == 0 && exact >= 2);
10592
10593
  if (common->mode == PCRE2_JIT_COMPLETE
10594
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10595
      && !common->utf
10596
#endif
10597
      && type != OP_ANYNL && type != OP_EXTUNI)
10598
    {
10599
    OP2(SLJIT_SUB, TMP1, 0, STR_END, 0, STR_PTR, 0);
10600
    add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, IN_UCHARS(exact)));
10601
10602
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
10603
    if (type == OP_ALLANY && !common->invalid_utf)
10604
#else
10605
    if (type == OP_ALLANY)
10606
#endif
10607
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
10608
    else
10609
      {
10610
      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
10611
      label = LABEL();
10612
      compile_char1_matchingpath(common, type, cc, prev_backtracks, FALSE);
10613
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10614
      JUMPTO(SLJIT_NOT_ZERO, label);
10615
      }
10616
    }
10617
  else
10618
    {
10619
    SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
10620
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
10621
    label = LABEL();
10622
    compile_char1_matchingpath(common, type, cc, prev_backtracks, TRUE);
10623
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
10624
    JUMPTO(SLJIT_NOT_ZERO, label);
10625
    }
10626
  }
10627
10628
if (early_fail_type == type_fail_range)
10629
  {
10630
  /* Range end first, followed by range start. */
10631
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
10632
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
10633
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
10634
  OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
10635
  add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
10636
10637
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
10638
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
10639
  }
10640
10641
if (opcode < OP_EXACT)
10642
  PUSH_BACKTRACK(sizeof(char_iterator_backtrack), begin, NULL);
10643
10644
switch(opcode)
10645
  {
10646
  case OP_STAR:
10647
  case OP_UPTO:
10648
  SLJIT_ASSERT(backtrack != NULL && (early_fail_ptr == 0 || opcode == OP_STAR));
10649
  max += exact;
10650
10651
  if (type == OP_EXTUNI)
10652
    {
10653
    SLJIT_ASSERT(private_data_ptr == 0);
10654
    SLJIT_ASSERT(early_fail_ptr == 0);
10655
10656
    if (exact == 1)
10657
      {
10658
      SLJIT_ASSERT(opcode == OP_STAR);
10659
      allocate_stack(common, 1);
10660
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10661
      }
10662
    else
10663
      {
10664
      /* If OP_EXTUNI is present, it has a separate EXACT opcode. */
10665
      SLJIT_ASSERT(exact == 0);
10666
10667
      allocate_stack(common, 2);
10668
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10669
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
10670
      }
10671
10672
    if (opcode == OP_UPTO)
10673
      {
10674
      SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));
10675
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, SLJIT_IMM, max);
10676
      }
10677
10678
    label = LABEL();
10679
    compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
10680
    if (opcode == OP_UPTO)
10681
      {
10682
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
10683
      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
10684
      jump = JUMP(SLJIT_ZERO);
10685
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, TMP1, 0);
10686
      }
10687
10688
    /* We cannot use TMP3 because of allocate_stack. */
10689
    allocate_stack(common, 1);
10690
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10691
    JUMPTO(SLJIT_JUMP, label);
10692
    if (jump != NULL)
10693
      JUMPHERE(jump);
10694
    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10695
    break;
10696
    }
10697
#ifdef SUPPORT_UNICODE
10698
  else if (type == OP_ALLANY && !common->invalid_utf)
10699
#else
10700
  else if (type == OP_ALLANY)
10701
#endif
10702
    {
10703
    if (opcode == OP_STAR)
10704
      {
10705
      if (exact == 1)
10706
        detect_partial_match(common, prev_backtracks);
10707
10708
      if (private_data_ptr == 0)
10709
        allocate_stack(common, 2);
10710
10711
      OP1(SLJIT_MOV, base, offset0, STR_END, 0);
10712
      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10713
10714
      OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
10715
      process_partial_match(common);
10716
10717
      if (early_fail_ptr != 0)
10718
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
10719
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10720
      break;
10721
      }
10722
#ifdef SUPPORT_UNICODE
10723
    else if (!common->utf)
10724
#else
10725
    else
10726
#endif
10727
      {
10728
      /* If OP_ALLANY is present, it has a separate EXACT opcode. */
10729
      SLJIT_ASSERT(exact == 0);
10730
10731
      if (private_data_ptr == 0)
10732
        allocate_stack(common, 2);
10733
10734
      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10735
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
10736
10737
      if (common->mode == PCRE2_JIT_COMPLETE)
10738
        {
10739
        OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
10740
        SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
10741
        }
10742
      else
10743
        {
10744
        jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
10745
        process_partial_match(common);
10746
        JUMPHERE(jump);
10747
        }
10748
10749
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10750
10751
      if (early_fail_ptr != 0)
10752
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
10753
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10754
      break;
10755
      }
10756
    }
10757
10758
  charpos_enabled = FALSE;
10759
  charpos_char = 0;
10760
  charpos_othercasebit = 0;
10761
10762
  SLJIT_ASSERT(tmp_base == TMP3);
10763
  if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
10764
    {
10765
#ifdef SUPPORT_UNICODE
10766
    charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
10767
#else
10768
    charpos_enabled = TRUE;
10769
#endif
10770
    if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
10771
      {
10772
      charpos_othercasebit = char_get_othercase_bit(common, end + 1);
10773
      if (charpos_othercasebit == 0)
10774
        charpos_enabled = FALSE;
10775
      }
10776
10777
    if (charpos_enabled)
10778
      {
10779
      charpos_char = end[1];
10780
      /* Consume the OP_CHAR opcode. */
10781
      end += 2;
10782
#if PCRE2_CODE_UNIT_WIDTH == 8
10783
      SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
10784
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
10785
      SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
10786
      if ((charpos_othercasebit & 0x100) != 0)
10787
        charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
10788
#endif
10789
      if (charpos_othercasebit != 0)
10790
        charpos_char |= charpos_othercasebit;
10791
10792
      BACKTRACK_AS(char_iterator_backtrack)->charpos.charpos_enabled = TRUE;
10793
      BACKTRACK_AS(char_iterator_backtrack)->charpos.chr = charpos_char;
10794
      BACKTRACK_AS(char_iterator_backtrack)->charpos.othercasebit = charpos_othercasebit;
10795
10796
      if (private_data_ptr == 0)
10797
        allocate_stack(common, 2);
10798
10799
      use_tmp = (opcode == OP_STAR);
10800
10801
      if (use_tmp)
10802
        {
10803
        OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
10804
        OP1(SLJIT_MOV, base, offset0, TMP3, 0);
10805
        }
10806
      else
10807
        {
10808
        OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0);
10809
        OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, 0);
10810
        OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0);
10811
        OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : (max + 1));
10812
        }
10813
10814
      /* Search the first instance of charpos_char. */
10815
      if (exact > 0)
10816
        detect_partial_match(common, &no_match);
10817
      else
10818
        jump = JUMP(SLJIT_JUMP);
10819
10820
      label = LABEL();
10821
10822
      if (opcode == OP_UPTO)
10823
        {
10824
        if (exact == max)
10825
          OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10826
        else
10827
          {
10828
          OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10829
          add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
10830
          }
10831
        }
10832
10833
      compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
10834
10835
      if (early_fail_ptr != 0)
10836
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
10837
10838
      if (exact == 0)
10839
        JUMPHERE(jump);
10840
10841
      detect_partial_match(common, &no_match);
10842
10843
      if (opcode == OP_UPTO && exact > 0)
10844
        {
10845
        if (exact == max)
10846
          CMPTO(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact, label);
10847
        else
10848
          CMPTO(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, (max + 1) - exact, label);
10849
        }
10850
10851
      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
10852
      if (charpos_othercasebit != 0)
10853
        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
10854
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
10855
10856
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10857
      if (use_tmp)
10858
        {
10859
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, SLJIT_IMM, 0);
10860
        SELECT(SLJIT_EQUAL, TMP3, STR_PTR, 0, TMP3);
10861
        }
10862
      else
10863
        {
10864
        OP2U(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, SLJIT_IMM, 0);
10865
        SELECT(SLJIT_EQUAL, COUNT_MATCH, STR_PTR, 0, COUNT_MATCH);
10866
        }
10867
      JUMPTO(SLJIT_JUMP, label);
10868
10869
      set_jumps(no_match, LABEL());
10870
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
10871
      if (use_tmp)
10872
        OP1(SLJIT_MOV, base, offset1, TMP3, 0);
10873
      else
10874
        {
10875
        OP1(SLJIT_MOV, TMP1, 0, base, offset1);
10876
        OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0);
10877
        OP1(SLJIT_MOV, COUNT_MATCH, 0, TMP1, 0);
10878
        }
10879
10880
      add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
10881
10882
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10883
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10884
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10885
      break;
10886
      }
10887
    }
10888
10889
  if (private_data_ptr == 0)
10890
    allocate_stack(common, 2);
10891
10892
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10893
  use_tmp = (opcode == OP_STAR);
10894
10895
  if (common->utf)
10896
    {
10897
    if (!use_tmp)
10898
      OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0);
10899
10900
    OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);
10901
    }
10902
#endif
10903
10904
  if (opcode == OP_UPTO)
10905
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? -(sljit_sw)exact : (sljit_sw)max);
10906
10907
  if (opcode == OP_UPTO && exact > 0)
10908
    {
10909
    label = LABEL();
10910
    detect_partial_match(common, &no_match);
10911
    compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
10912
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10913
    if (common->utf)
10914
      OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);
10915
#endif
10916
10917
    if (exact == max)
10918
      {
10919
      OP2(SLJIT_ADD | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10920
      JUMPTO(SLJIT_NOT_ZERO, label);
10921
      }
10922
    else
10923
      {
10924
      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10925
      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
10926
      CMPTO(SLJIT_NOT_EQUAL, TMP3, 0, SLJIT_IMM, max - exact, label);
10927
      }
10928
10929
    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10930
    JUMPTO(SLJIT_JUMP, label);
10931
    }
10932
  else
10933
    {
10934
    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
10935
10936
    detect_partial_match(common, &no_match);
10937
    label = LABEL();
10938
    compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
10939
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10940
    if (common->utf)
10941
      OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);
10942
#endif
10943
10944
    if (opcode == OP_UPTO)
10945
      {
10946
      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10947
      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
10948
      }
10949
10950
    detect_partial_match_to(common, label);
10951
    }
10952
10953
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10954
  if (common->utf)
10955
    {
10956
    set_jumps(no_char1_match, LABEL());
10957
    set_jumps(no_match, LABEL());
10958
    if (use_tmp)
10959
      {
10960
      OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10961
      OP1(SLJIT_MOV, base, offset0, TMP3, 0);
10962
      }
10963
    else
10964
      {
10965
      OP1(SLJIT_MOV, STR_PTR, 0, COUNT_MATCH, 0);
10966
      OP1(SLJIT_MOV, COUNT_MATCH, 0, base, offset0);
10967
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10968
      }
10969
    }
10970
  else
10971
#endif
10972
    {
10973
    if (opcode != OP_UPTO || exact == 0)
10974
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10975
    set_jumps(no_char1_match, LABEL());
10976
10977
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
10978
    set_jumps(no_match, LABEL());
10979
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10980
    }
10981
10982
  if (opcode == OP_UPTO)
10983
    {
10984
    if (exact > 0)
10985
      {
10986
      if (max == exact)
10987
        jump = CMP(SLJIT_GREATER_EQUAL, TMP3, 0, SLJIT_IMM, -(sljit_sw)exact);
10988
      else
10989
        jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact);
10990
10991
      add_jump(compiler, &backtrack->own_backtracks, jump);
10992
      }
10993
    }
10994
  else if (exact == 1)
10995
    add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, base, offset1, STR_PTR, 0));
10996
10997
  if (early_fail_ptr != 0)
10998
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
10999
11000
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11001
  break;
11002
11003
  case OP_QUERY:
11004
  SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0);
11005
  if (private_data_ptr == 0)
11006
    allocate_stack(common, 1);
11007
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11008
  compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11009
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11010
  break;
11011
11012
  case OP_MINSTAR:
11013
  case OP_MINQUERY:
11014
  SLJIT_ASSERT(backtrack != NULL && (opcode == OP_MINSTAR || early_fail_ptr == 0));
11015
  if (private_data_ptr == 0)
11016
    allocate_stack(common, 1);
11017
11018
  if (exact >= 1)
11019
    {
11020
    if (exact >= 2)
11021
      {
11022
      /* Extuni has a separate exact opcode. */
11023
      SLJIT_ASSERT(tmp_base == TMP3 && early_fail_ptr == 0);
11024
      OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);
11025
      }
11026
11027
    if (opcode == OP_MINQUERY)
11028
      OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, -1);
11029
11030
    label = LABEL();
11031
    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;
11032
11033
    compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11034
11035
    if (exact >= 2)
11036
      {
11037
      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11038
      JUMPTO(SLJIT_NOT_ZERO, label);
11039
      }
11040
11041
    if (opcode == OP_MINQUERY)
11042
      OP2(SLJIT_AND, base, offset0, base, offset0, STR_PTR, 0);
11043
    else
11044
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11045
    }
11046
  else
11047
    {
11048
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11049
    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11050
    }
11051
11052
  if (early_fail_ptr != 0)
11053
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11054
  break;
11055
11056
  case OP_MINUPTO:
11057
  SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0);
11058
  if (private_data_ptr == 0)
11059
    allocate_stack(common, 2);
11060
11061
  OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11062
11063
  if (exact == 0)
11064
    {
11065
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11066
    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11067
    break;
11068
    }
11069
11070
  if (exact >= 2)
11071
    {
11072
    /* Extuni has a separate exact opcode. */
11073
    SLJIT_ASSERT(tmp_base == TMP3);
11074
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);
11075
    }
11076
11077
  label = LABEL();
11078
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;
11079
11080
  compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11081
11082
  if (exact >= 2)
11083
    {
11084
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11085
    JUMPTO(SLJIT_NOT_ZERO, label);
11086
    }
11087
11088
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11089
  break;
11090
11091
  case OP_EXACT:
11092
  SLJIT_ASSERT(backtrack == NULL);
11093
  break;
11094
11095
  case OP_POSSTAR:
11096
  SLJIT_ASSERT(backtrack == NULL);
11097
#if defined SUPPORT_UNICODE
11098
  if (type == OP_ALLANY && !common->invalid_utf)
11099
#else
11100
  if (type == OP_ALLANY)
11101
#endif
11102
    {
11103
    if (exact == 1)
11104
      detect_partial_match(common, prev_backtracks);
11105
11106
    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11107
    process_partial_match(common);
11108
    if (early_fail_ptr != 0)
11109
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11110
    break;
11111
    }
11112
11113
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11114
  if (common->utf)
11115
    {
11116
    SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
11117
11118
    if (tmp_base != TMP3)
11119
      {
11120
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0);
11121
      tmp_base = COUNT_MATCH;
11122
      }
11123
11124
    OP1(SLJIT_MOV, tmp_base, 0, exact == 1 ? SLJIT_IMM : STR_PTR, 0);
11125
    detect_partial_match(common, &no_match);
11126
    label = LABEL();
11127
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11128
    OP1(SLJIT_MOV, tmp_base, 0, STR_PTR, 0);
11129
    detect_partial_match_to(common, label);
11130
11131
    set_jumps(no_match, LABEL());
11132
    OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, 0);
11133
11134
    if (tmp_base != TMP3)
11135
      OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
11136
11137
    if (exact == 1)
11138
      add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
11139
11140
    if (early_fail_ptr != 0)
11141
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11142
    break;
11143
    }
11144
#endif
11145
11146
  if (exact == 1)
11147
    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11148
11149
  detect_partial_match(common, &no_match);
11150
  label = LABEL();
11151
  /* Extuni never fails, so no_char1_match is not used in that case.
11152
     Anynl optionally reads an extra character on success. */
11153
  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11154
  detect_partial_match_to(common, label);
11155
  if (type != OP_EXTUNI)
11156
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11157
11158
  set_jumps(no_char1_match, LABEL());
11159
  if (type != OP_EXTUNI)
11160
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11161
11162
  set_jumps(no_match, LABEL());
11163
11164
  if (exact == 1)
11165
    add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, tmp_base, tmp_offset, STR_PTR, 0));
11166
11167
  if (early_fail_ptr != 0)
11168
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11169
  break;
11170
11171
  case OP_POSUPTO:
11172
  SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0);
11173
  max += exact;
11174
11175
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11176
  if (type == OP_EXTUNI || common->utf)
11177
#else
11178
  if (type == OP_EXTUNI)
11179
#endif
11180
    {
11181
    SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));
11182
11183
    /* Count match is not modified by compile_char1_matchingpath. */
11184
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0);
11185
    OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, exact == max ? 0 : max);
11186
11187
    label = LABEL();
11188
    /* Extuni only modifies TMP3 on successful match. */
11189
    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11190
    compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11191
11192
    if (exact == max)
11193
      {
11194
      OP2(SLJIT_ADD, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
11195
      JUMPTO(SLJIT_JUMP, label);
11196
      }
11197
    else
11198
      {
11199
      OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
11200
      JUMPTO(SLJIT_NOT_ZERO, label);
11201
      OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11202
      }
11203
11204
    set_jumps(no_match, LABEL());
11205
11206
    if (exact > 0)
11207
      {
11208
      if (exact == max)
11209
        OP2U(SLJIT_SUB | SLJIT_SET_LESS, COUNT_MATCH, 0, SLJIT_IMM, exact);
11210
      else
11211
        OP2U(SLJIT_SUB | SLJIT_SET_GREATER, COUNT_MATCH, 0, SLJIT_IMM, max - exact);
11212
      }
11213
11214
    OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);
11215
11216
    if (exact > 0)
11217
      add_jump(compiler, prev_backtracks, JUMP(exact == max ? SLJIT_LESS : SLJIT_GREATER));
11218
    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11219
    break;
11220
    }
11221
11222
  SLJIT_ASSERT(tmp_base == TMP3);
11223
11224
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : max);
11225
11226
  detect_partial_match(common, &no_match);
11227
  label = LABEL();
11228
  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11229
11230
  if (exact == max)
11231
    OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11232
  else
11233
    {
11234
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
11235
    add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11236
    }
11237
  detect_partial_match_to(common, label);
11238
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11239
11240
  set_jumps(no_char1_match, LABEL());
11241
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11242
  set_jumps(no_match, LABEL());
11243
11244
  if (exact > 0)
11245
    {
11246
    if (exact == max)
11247
      jump = CMP(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact);
11248
    else
11249
      jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact);
11250
11251
    add_jump(compiler, prev_backtracks, jump);
11252
    }
11253
  break;
11254
11255
  case OP_POSQUERY:
11256
  SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0);
11257
  SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
11258
  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11259
  compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11260
  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11261
  set_jumps(no_match, LABEL());
11262
  OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11263
  break;
11264
11265
  default:
11266
  SLJIT_UNREACHABLE();
11267
  break;
11268
  }
11269
11270
count_match(common);
11271
return end;
11272
}
11273
11274
static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11275
{
11276
DEFINE_COMPILER;
11277
backtrack_common *backtrack;
11278
11279
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11280
11281
if (*cc == OP_FAIL)
11282
  {
11283
  add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
11284
  return cc + 1;
11285
  }
11286
11287
if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11288
  add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11289
11290
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11291
  {
11292
  /* No need to check notempty conditions. */
11293
  if (common->accept_label == NULL)
11294
    add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11295
  else
11296
    JUMPTO(SLJIT_JUMP, common->accept_label);
11297
  return cc + 1;
11298
  }
11299
11300
if (common->accept_label == NULL)
11301
  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11302
else
11303
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11304
11305
if (HAS_VIRTUAL_REGISTERS)
11306
  {
11307
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11308
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11309
  }
11310
else
11311
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11312
11313
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11314
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));
11315
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11316
if (common->accept_label == NULL)
11317
  add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11318
else
11319
  JUMPTO(SLJIT_ZERO, common->accept_label);
11320
11321
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11322
if (common->accept_label == NULL)
11323
  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11324
else
11325
  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11326
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
11327
return cc + 1;
11328
}
11329
11330
static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11331
{
11332
DEFINE_COMPILER;
11333
int offset = GET2(cc, 1);
11334
BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
11335
11336
/* Data will be discarded anyway... */
11337
if (common->currententry != NULL)
11338
  return cc + 1 + IMM2_SIZE;
11339
11340
if (!optimized_cbracket)
11341
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11342
offset <<= 1;
11343
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11344
if (!optimized_cbracket)
11345
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11346
return cc + 1 + IMM2_SIZE;
11347
}
11348
11349
static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11350
{
11351
DEFINE_COMPILER;
11352
backtrack_common *backtrack;
11353
PCRE2_UCHAR opcode = *cc;
11354
PCRE2_SPTR ccend = cc + 1;
11355
11356
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11357
    opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11358
  ccend += 2 + cc[1];
11359
11360
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11361
11362
if (opcode == OP_SKIP)
11363
  {
11364
  allocate_stack(common, 1);
11365
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11366
  return ccend;
11367
  }
11368
11369
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11370
  {
11371
  if (HAS_VIRTUAL_REGISTERS)
11372
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11373
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11374
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11375
  OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11376
  }
11377
11378
return ccend;
11379
}
11380
11381
static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11382
11383
static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11384
{
11385
DEFINE_COMPILER;
11386
backtrack_common *backtrack;
11387
BOOL needs_control_head;
11388
int size;
11389
11390
PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11391
common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11392
BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11393
BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11394
BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11395
11396
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11397
size = 3 + (size < 0 ? 0 : size);
11398
11399
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11400
allocate_stack(common, size);
11401
if (size > 3)
11402
  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11403
else
11404
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11405
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11406
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11407
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11408
11409
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11410
if (size >= 0)
11411
  init_frame(common, cc, ccend, size - 1, 0);
11412
}
11413
11414
static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11415
{
11416
DEFINE_COMPILER;
11417
backtrack_common *backtrack;
11418
BOOL has_then_trap = FALSE;
11419
then_trap_backtrack *save_then_trap = NULL;
11420
size_t op_len;
11421
11422
SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11423
11424
if (common->has_then && common->then_offsets[cc - common->start] != 0)
11425
  {
11426
  SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11427
  has_then_trap = TRUE;
11428
  save_then_trap = common->then_trap;
11429
  /* Tail item on backtrack. */
11430
  compile_then_trap_matchingpath(common, cc, ccend, parent);
11431
  }
11432
11433
while (cc < ccend)
11434
  {
11435
  switch(*cc)
11436
    {
11437
    case OP_SOD:
11438
    case OP_SOM:
11439
    case OP_NOT_WORD_BOUNDARY:
11440
    case OP_WORD_BOUNDARY:
11441
    case OP_EODN:
11442
    case OP_EOD:
11443
    case OP_DOLL:
11444
    case OP_DOLLM:
11445
    case OP_CIRC:
11446
    case OP_CIRCM:
11447
    case OP_NOT_UCP_WORD_BOUNDARY:
11448
    case OP_UCP_WORD_BOUNDARY:
11449
    cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11450
    break;
11451
11452
    case OP_NOT_DIGIT:
11453
    case OP_DIGIT:
11454
    case OP_NOT_WHITESPACE:
11455
    case OP_WHITESPACE:
11456
    case OP_NOT_WORDCHAR:
11457
    case OP_WORDCHAR:
11458
    case OP_ANY:
11459
    case OP_ALLANY:
11460
    case OP_ANYBYTE:
11461
    case OP_NOTPROP:
11462
    case OP_PROP:
11463
    case OP_ANYNL:
11464
    case OP_NOT_HSPACE:
11465
    case OP_HSPACE:
11466
    case OP_NOT_VSPACE:
11467
    case OP_VSPACE:
11468
    case OP_EXTUNI:
11469
    case OP_NOT:
11470
    case OP_NOTI:
11471
    cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11472
    break;
11473
11474
    case OP_SET_SOM:
11475
    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11476
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11477
    allocate_stack(common, 1);
11478
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11479
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11480
    cc++;
11481
    break;
11482
11483
    case OP_CHAR:
11484
    case OP_CHARI:
11485
    if (common->mode == PCRE2_JIT_COMPLETE)
11486
      cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11487
    else
11488
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11489
    break;
11490
11491
    case OP_STAR:
11492
    case OP_MINSTAR:
11493
    case OP_PLUS:
11494
    case OP_MINPLUS:
11495
    case OP_QUERY:
11496
    case OP_MINQUERY:
11497
    case OP_UPTO:
11498
    case OP_MINUPTO:
11499
    case OP_EXACT:
11500
    case OP_POSSTAR:
11501
    case OP_POSPLUS:
11502
    case OP_POSQUERY:
11503
    case OP_POSUPTO:
11504
    case OP_STARI:
11505
    case OP_MINSTARI:
11506
    case OP_PLUSI:
11507
    case OP_MINPLUSI:
11508
    case OP_QUERYI:
11509
    case OP_MINQUERYI:
11510
    case OP_UPTOI:
11511
    case OP_MINUPTOI:
11512
    case OP_EXACTI:
11513
    case OP_POSSTARI:
11514
    case OP_POSPLUSI:
11515
    case OP_POSQUERYI:
11516
    case OP_POSUPTOI:
11517
    case OP_NOTSTAR:
11518
    case OP_NOTMINSTAR:
11519
    case OP_NOTPLUS:
11520
    case OP_NOTMINPLUS:
11521
    case OP_NOTQUERY:
11522
    case OP_NOTMINQUERY:
11523
    case OP_NOTUPTO:
11524
    case OP_NOTMINUPTO:
11525
    case OP_NOTEXACT:
11526
    case OP_NOTPOSSTAR:
11527
    case OP_NOTPOSPLUS:
11528
    case OP_NOTPOSQUERY:
11529
    case OP_NOTPOSUPTO:
11530
    case OP_NOTSTARI:
11531
    case OP_NOTMINSTARI:
11532
    case OP_NOTPLUSI:
11533
    case OP_NOTMINPLUSI:
11534
    case OP_NOTQUERYI:
11535
    case OP_NOTMINQUERYI:
11536
    case OP_NOTUPTOI:
11537
    case OP_NOTMINUPTOI:
11538
    case OP_NOTEXACTI:
11539
    case OP_NOTPOSSTARI:
11540
    case OP_NOTPOSPLUSI:
11541
    case OP_NOTPOSQUERYI:
11542
    case OP_NOTPOSUPTOI:
11543
    case OP_TYPESTAR:
11544
    case OP_TYPEMINSTAR:
11545
    case OP_TYPEPLUS:
11546
    case OP_TYPEMINPLUS:
11547
    case OP_TYPEQUERY:
11548
    case OP_TYPEMINQUERY:
11549
    case OP_TYPEUPTO:
11550
    case OP_TYPEMINUPTO:
11551
    case OP_TYPEEXACT:
11552
    case OP_TYPEPOSSTAR:
11553
    case OP_TYPEPOSPLUS:
11554
    case OP_TYPEPOSQUERY:
11555
    case OP_TYPEPOSUPTO:
11556
    cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11557
    break;
11558
11559
    case OP_CLASS:
11560
    case OP_NCLASS:
11561
    if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
11562
      cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11563
    else
11564
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11565
    break;
11566
11567
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11568
    case OP_XCLASS:
11569
    case OP_ECLASS:
11570
    op_len = GET(cc, 1);
11571
    if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
11572
      cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11573
    else
11574
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
11575
    break;
11576
#endif
11577
11578
    case OP_REF:
11579
    case OP_REFI:
11580
    op_len = PRIV(OP_lengths)[*cc];
11581
    if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
11582
      cc = compile_ref_iterator_matchingpath(common, cc, parent);
11583
    else
11584
      {
11585
      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
11586
      cc += op_len;
11587
      }
11588
    break;
11589
11590
    case OP_DNREF:
11591
    case OP_DNREFI:
11592
    op_len = PRIV(OP_lengths)[*cc];
11593
    if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)
11594
      cc = compile_ref_iterator_matchingpath(common, cc, parent);
11595
    else
11596
      {
11597
      compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
11598
      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
11599
      cc += op_len;
11600
      }
11601
    break;
11602
11603
    case OP_RECURSE:
11604
    cc = compile_recurse_matchingpath(common, cc, parent);
11605
    break;
11606
11607
    case OP_CALLOUT:
11608
    case OP_CALLOUT_STR:
11609
    cc = compile_callout_matchingpath(common, cc, parent);
11610
    break;
11611
11612
    case OP_ASSERT:
11613
    case OP_ASSERT_NOT:
11614
    case OP_ASSERTBACK:
11615
    case OP_ASSERTBACK_NOT:
11616
    PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
11617
    cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
11618
    break;
11619
11620
    case OP_BRAMINZERO:
11621
    PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
11622
    cc = bracketend(cc + 1);
11623
    if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
11624
      {
11625
      allocate_stack(common, 1);
11626
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11627
      }
11628
    else
11629
      {
11630
      allocate_stack(common, 2);
11631
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11632
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
11633
      }
11634
    BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
11635
    count_match(common);
11636
    break;
11637
11638
    case OP_ASSERT_NA:
11639
    case OP_ASSERTBACK_NA:
11640
    case OP_ASSERT_SCS:
11641
    case OP_ONCE:
11642
    case OP_SCRIPT_RUN:
11643
    case OP_BRA:
11644
    case OP_CBRA:
11645
    case OP_COND:
11646
    case OP_SBRA:
11647
    case OP_SCBRA:
11648
    case OP_SCOND:
11649
    cc = compile_bracket_matchingpath(common, cc, parent);
11650
    break;
11651
11652
    case OP_BRAZERO:
11653
    if (cc[1] > OP_ASSERTBACK_NOT)
11654
      cc = compile_bracket_matchingpath(common, cc, parent);
11655
    else
11656
      {
11657
      PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
11658
      cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
11659
      }
11660
    break;
11661
11662
    case OP_BRAPOS:
11663
    case OP_CBRAPOS:
11664
    case OP_SBRAPOS:
11665
    case OP_SCBRAPOS:
11666
    case OP_BRAPOSZERO:
11667
    cc = compile_bracketpos_matchingpath(common, cc, parent);
11668
    break;
11669
11670
    case OP_MARK:
11671
    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11672
    SLJIT_ASSERT(common->mark_ptr != 0);
11673
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
11674
    allocate_stack(common, common->has_skip_arg ? 5 : 1);
11675
    if (HAS_VIRTUAL_REGISTERS)
11676
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11677
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
11678
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11679
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11680
    OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11681
    if (common->has_skip_arg)
11682
      {
11683
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11684
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11685
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
11686
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
11687
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
11688
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11689
      }
11690
    cc += 1 + 2 + cc[1];
11691
    break;
11692
11693
    case OP_PRUNE:
11694
    case OP_PRUNE_ARG:
11695
    case OP_SKIP:
11696
    case OP_SKIP_ARG:
11697
    case OP_THEN:
11698
    case OP_THEN_ARG:
11699
    case OP_COMMIT:
11700
    case OP_COMMIT_ARG:
11701
    cc = compile_control_verb_matchingpath(common, cc, parent);
11702
    break;
11703
11704
    case OP_FAIL:
11705
    case OP_ACCEPT:
11706
    case OP_ASSERT_ACCEPT:
11707
    cc = compile_fail_accept_matchingpath(common, cc, parent);
11708
    break;
11709
11710
    case OP_CLOSE:
11711
    cc = compile_close_matchingpath(common, cc);
11712
    break;
11713
11714
    case OP_SKIPZERO:
11715
    cc = bracketend(cc + 1);
11716
    break;
11717
11718
    default:
11719
    SLJIT_UNREACHABLE();
11720
    return;
11721
    }
11722
  if (cc == NULL)
11723
    return;
11724
  }
11725
11726
if (has_then_trap)
11727
  {
11728
  /* Head item on backtrack. */
11729
  PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11730
  BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11731
  BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
11732
  common->then_trap = save_then_trap;
11733
  }
11734
SLJIT_ASSERT(cc == ccend);
11735
}
11736
11737
#undef PUSH_BACKTRACK
11738
#undef PUSH_BACKTRACK_NOVALUE
11739
#undef BACKTRACK_AS
11740
11741
#define COMPILE_BACKTRACKINGPATH(current) \
11742
  do \
11743
    { \
11744
    compile_backtrackingpath(common, (current)); \
11745
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
11746
      return; \
11747
    } \
11748
  while (0)
11749
11750
#define CURRENT_AS(type) ((type *)current)
11751
11752
static void compile_newline_move_back(compiler_common *common)
11753
{
11754
DEFINE_COMPILER;
11755
struct sljit_jump *jump;
11756
11757
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11758
jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, TMP2, 0);
11759
/* All newlines are single byte, or their last byte
11760
is not equal to CHAR_NL/CHAR_CR even if UTF is enabled. */
11761
OP1(MOV_UCHAR, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
11762
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
11763
OP2(SLJIT_SHL, SLJIT_TMP_DEST_REG, 0, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 8);
11764
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_TMP_DEST_REG, 0);
11765
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR << 8 | CHAR_NL);
11766
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
11767
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11768
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
11769
#endif
11770
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
11771
JUMPHERE(jump);
11772
}
11773
11774
static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11775
{
11776
DEFINE_COMPILER;
11777
PCRE2_SPTR cc = current->cc;
11778
PCRE2_UCHAR opcode;
11779
PCRE2_UCHAR type;
11780
sljit_u32 max = 0, exact;
11781
struct sljit_label *label = NULL;
11782
struct sljit_jump *jump = NULL;
11783
jump_list *jumplist = NULL;
11784
PCRE2_SPTR end;
11785
int private_data_ptr = PRIVATE_DATA(cc);
11786
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11787
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11788
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
11789
11790
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11791
11792
switch(opcode)
11793
  {
11794
  case OP_STAR:
11795
  case OP_UPTO:
11796
  if (type == OP_EXTUNI)
11797
    {
11798
    SLJIT_ASSERT(private_data_ptr == 0);
11799
    set_jumps(current->own_backtracks, LABEL());
11800
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11801
    free_stack(common, 1);
11802
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11803
    }
11804
  else
11805
    {
11806
    if (CURRENT_AS(char_iterator_backtrack)->charpos.charpos_enabled)
11807
      {
11808
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11809
      OP1(SLJIT_MOV, TMP2, 0, base, offset1);
11810
11811
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
11812
      label = LABEL();
11813
      if (type == OP_ANYNL)
11814
        compile_newline_move_back(common);
11815
      move_back(common, NULL, TRUE);
11816
11817
      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11818
      if (CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit != 0)
11819
        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit);
11820
      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11821
      /* The range beginning must match, no need to compare. */
11822
      JUMPTO(SLJIT_JUMP, label);
11823
11824
      set_jumps(current->own_backtracks, LABEL());
11825
      current->own_backtracks = NULL;
11826
      }
11827
    else
11828
      {
11829
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11830
11831
      if (opcode == OP_STAR && exact == 1)
11832
        {
11833
        if (type == OP_ANYNL)
11834
          {
11835
          OP1(SLJIT_MOV, TMP2, 0, base, offset1);
11836
          compile_newline_move_back(common);
11837
          }
11838
11839
        move_back(common, NULL, TRUE);
11840
        jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
11841
        }
11842
      else
11843
        {
11844
        if (type == OP_ANYNL)
11845
          {
11846
          OP1(SLJIT_MOV, TMP2, 0, base, offset1);
11847
          jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
11848
          compile_newline_move_back(common);
11849
          }
11850
        else
11851
          jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
11852
11853
        move_back(common, NULL, TRUE);
11854
        }
11855
11856
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11857
      JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11858
11859
      set_jumps(current->own_backtracks, LABEL());
11860
      }
11861
11862
    JUMPHERE(jump);
11863
    if (private_data_ptr == 0)
11864
      free_stack(common, 2);
11865
    }
11866
  break;
11867
11868
  case OP_QUERY:
11869
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11870
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
11871
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11872
  jump = JUMP(SLJIT_JUMP);
11873
  set_jumps(current->own_backtracks, LABEL());
11874
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11875
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
11876
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11877
  JUMPHERE(jump);
11878
  if (private_data_ptr == 0)
11879
    free_stack(common, 1);
11880
  break;
11881
11882
  case OP_MINSTAR:
11883
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11884
  if (exact == 0)
11885
    {
11886
    compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
11887
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11888
    }
11889
  else if (exact > 1)
11890
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
11891
11892
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11893
  set_jumps(exact > 0 ? current->own_backtracks : jumplist, LABEL());
11894
  if (private_data_ptr == 0)
11895
    free_stack(common, 1);
11896
  break;
11897
11898
  case OP_MINUPTO:
11899
  OP1(SLJIT_MOV, TMP1, 0, base, offset1);
11900
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11901
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11902
11903
  if (exact == 0)
11904
    {
11905
    add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
11906
11907
    OP1(SLJIT_MOV, base, offset1, TMP1, 0);
11908
    compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
11909
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11910
    JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11911
11912
    set_jumps(jumplist, LABEL());
11913
    }
11914
  else
11915
    {
11916
    if (exact > 1)
11917
      OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
11918
    OP1(SLJIT_MOV, base, offset1, TMP1, 0);
11919
    JUMPTO(SLJIT_NOT_ZERO, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11920
11921
    set_jumps(current->own_backtracks, LABEL());
11922
    }
11923
11924
  if (private_data_ptr == 0)
11925
    free_stack(common, 2);
11926
  break;
11927
11928
  case OP_MINQUERY:
11929
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11930
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
11931
11932
  if (exact >= 1)
11933
    {
11934
    if (exact >= 2)
11935
      OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
11936
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11937
    set_jumps(current->own_backtracks, LABEL());
11938
    }
11939
  else
11940
    {
11941
    jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
11942
    compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
11943
    JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11944
    set_jumps(jumplist, LABEL());
11945
    JUMPHERE(jump);
11946
    }
11947
11948
  if (private_data_ptr == 0)
11949
    free_stack(common, 1);
11950
  break;
11951
11952
  default:
11953
  SLJIT_UNREACHABLE();
11954
  break;
11955
  }
11956
}
11957
11958
static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11959
{
11960
DEFINE_COMPILER;
11961
PCRE2_SPTR cc = current->cc;
11962
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
11963
PCRE2_UCHAR type;
11964
11965
type = cc[PRIV(OP_lengths)[*cc]];
11966
11967
if ((type & 0x1) == 0)
11968
  {
11969
  /* Maximize case. */
11970
  set_jumps(current->own_backtracks, LABEL());
11971
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11972
  free_stack(common, 1);
11973
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
11974
  return;
11975
  }
11976
11977
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11978
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
11979
set_jumps(current->own_backtracks, LABEL());
11980
free_stack(common, ref ? 2 : 3);
11981
}
11982
11983
static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
11984
{
11985
DEFINE_COMPILER;
11986
recurse_entry *entry;
11987
11988
if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
11989
  {
11990
  entry = CURRENT_AS(recurse_backtrack)->entry;
11991
  if (entry->backtrack_label == NULL)
11992
    add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
11993
  else
11994
    JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
11995
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
11996
  }
11997
else
11998
  compile_backtrackingpath(common, current->top);
11999
12000
set_jumps(current->own_backtracks, LABEL());
12001
}
12002
12003
static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12004
{
12005
DEFINE_COMPILER;
12006
PCRE2_SPTR cc = current->cc;
12007
PCRE2_UCHAR bra = OP_BRA;
12008
struct sljit_jump *brajump = NULL;
12009
12010
SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12011
if (*cc == OP_BRAZERO)
12012
  {
12013
  bra = *cc;
12014
  cc++;
12015
  }
12016
12017
if (bra == OP_BRAZERO)
12018
  {
12019
  SLJIT_ASSERT(current->own_backtracks == NULL);
12020
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12021
  }
12022
12023
if (CURRENT_AS(assert_backtrack)->framesize < 0)
12024
  {
12025
  set_jumps(current->own_backtracks, LABEL());
12026
12027
  if (bra == OP_BRAZERO)
12028
    {
12029
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12030
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12031
    free_stack(common, 1);
12032
    }
12033
  return;
12034
  }
12035
12036
if (bra == OP_BRAZERO)
12037
  {
12038
  if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12039
    {
12040
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12041
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12042
    free_stack(common, 1);
12043
    return;
12044
    }
12045
  free_stack(common, 1);
12046
  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12047
  }
12048
12049
if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12050
  {
12051
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12052
  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12053
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12054
  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12055
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12056
12057
  set_jumps(current->own_backtracks, LABEL());
12058
  }
12059
else
12060
  set_jumps(current->own_backtracks, LABEL());
12061
12062
if (bra == OP_BRAZERO)
12063
  {
12064
  /* We know there is enough place on the stack. */
12065
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12066
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12067
  JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12068
  JUMPHERE(brajump);
12069
  }
12070
}
12071
12072
static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12073
{
12074
DEFINE_COMPILER;
12075
int opcode, stacksize, alt_count, alt_max;
12076
int offset = 0;
12077
int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12078
int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12079
PCRE2_SPTR cc = current->cc;
12080
PCRE2_SPTR ccbegin;
12081
PCRE2_SPTR ccprev;
12082
PCRE2_UCHAR bra = OP_BRA;
12083
PCRE2_UCHAR ket;
12084
const assert_backtrack *assert;
12085
BOOL has_alternatives;
12086
BOOL needs_control_head = FALSE;
12087
BOOL has_vreverse;
12088
struct sljit_jump *brazero = NULL;
12089
struct sljit_jump *next_alt = NULL;
12090
struct sljit_jump *once = NULL;
12091
struct sljit_jump *cond = NULL;
12092
struct sljit_label *rmin_label = NULL;
12093
struct sljit_label *exact_label = NULL;
12094
struct sljit_jump *mov_addr = NULL;
12095
12096
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12097
  {
12098
  bra = *cc;
12099
  cc++;
12100
  }
12101
12102
opcode = *cc;
12103
ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12104
ket = *ccbegin;
12105
if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12106
  {
12107
  repeat_ptr = PRIVATE_DATA(ccbegin);
12108
  repeat_type = PRIVATE_DATA(ccbegin + 2);
12109
  repeat_count = PRIVATE_DATA(ccbegin + 3);
12110
  SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12111
  if (repeat_type == OP_UPTO)
12112
    ket = OP_KETRMAX;
12113
  if (repeat_type == OP_MINUPTO)
12114
    ket = OP_KETRMIN;
12115
  }
12116
ccbegin = cc;
12117
cc += GET(cc, 1);
12118
has_alternatives = *cc == OP_ALT;
12119
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12120
  has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.no_capture != NULL;
12121
if (opcode == OP_CBRA || opcode == OP_SCBRA)
12122
  offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12123
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12124
  opcode = OP_SCOND;
12125
12126
alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12127
12128
/* Decoding the needs_control_head in framesize. */
12129
if (opcode == OP_ONCE)
12130
  {
12131
  needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12132
  CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12133
  }
12134
12135
if (ket != OP_KET && repeat_type != 0)
12136
  {
12137
  /* TMP1 is used in OP_KETRMIN below. */
12138
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12139
  free_stack(common, 1);
12140
  if (repeat_type == OP_UPTO)
12141
    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12142
  else
12143
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12144
  }
12145
12146
if (ket == OP_KETRMAX)
12147
  {
12148
  if (bra == OP_BRAZERO)
12149
    {
12150
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12151
    free_stack(common, 1);
12152
    brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12153
    }
12154
  }
12155
else if (ket == OP_KETRMIN)
12156
  {
12157
  if (bra != OP_BRAMINZERO)
12158
    {
12159
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12160
    if (repeat_type != 0)
12161
      {
12162
      /* TMP1 was set a few lines above. */
12163
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12164
      /* Drop STR_PTR for non-greedy plus quantifier. */
12165
      if (opcode != OP_ONCE)
12166
        free_stack(common, 1);
12167
      }
12168
    else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12169
      {
12170
      /* Checking zero-length iteration. */
12171
      if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12172
        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12173
      else
12174
        {
12175
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12176
        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12177
        }
12178
      /* Drop STR_PTR for non-greedy plus quantifier. */
12179
      if (opcode != OP_ONCE)
12180
        free_stack(common, 1);
12181
      }
12182
    else
12183
      JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12184
    }
12185
  rmin_label = LABEL();
12186
  if (repeat_type != 0)
12187
    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12188
  }
12189
else if (bra == OP_BRAZERO)
12190
  {
12191
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12192
  free_stack(common, 1);
12193
  brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12194
  }
12195
else if (repeat_type == OP_EXACT)
12196
  {
12197
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12198
  exact_label = LABEL();
12199
  }
12200
12201
if (offset != 0)
12202
  {
12203
  if (common->capture_last_ptr != 0)
12204
    {
12205
    SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12206
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12207
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12208
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12209
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12210
    free_stack(common, 3);
12211
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12212
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12213
    }
12214
  else if (common->optimized_cbracket[offset >> 1] == 0)
12215
    {
12216
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12217
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12218
    free_stack(common, 2);
12219
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12220
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12221
    }
12222
  }
12223
else if (SLJIT_UNLIKELY(opcode == OP_ASSERT_SCS))
12224
  {
12225
  OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);
12226
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12227
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0);
12228
12229
  /* Nested scs blocks will not update this variable. */
12230
  if (common->restore_end_ptr == 0)
12231
    common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw);
12232
  }
12233
12234
if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12235
  {
12236
  int framesize = CURRENT_AS(bracket_backtrack)->u.framesize;
12237
12238
  SLJIT_ASSERT(framesize != 0);
12239
  if (framesize > 0)
12240
    {
12241
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12242
    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12243
    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
12244
    }
12245
  once = JUMP(SLJIT_JUMP);
12246
  }
12247
else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12248
  {
12249
  if (has_alternatives)
12250
    {
12251
    /* Always exactly one alternative. */
12252
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12253
    free_stack(common, 1);
12254
12255
    alt_max = 2;
12256
    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12257
    }
12258
  }
12259
else if (has_alternatives)
12260
  {
12261
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12262
  free_stack(common, 1);
12263
12264
  if (alt_max > 3)
12265
    {
12266
    sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12267
12268
    SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->matching_mov_addr != NULL);
12269
    sljit_set_label(CURRENT_AS(bracket_backtrack)->matching_mov_addr, LABEL());
12270
    sljit_emit_op0(compiler, SLJIT_ENDBR);
12271
    }
12272
  else
12273
    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12274
  }
12275
12276
COMPILE_BACKTRACKINGPATH(current->top);
12277
if (current->own_backtracks)
12278
  set_jumps(current->own_backtracks, LABEL());
12279
12280
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12281
  {
12282
  /* Conditional block always has at most one alternative. */
12283
  if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12284
    {
12285
    SLJIT_ASSERT(has_alternatives);
12286
    assert = CURRENT_AS(bracket_backtrack)->u.assert;
12287
    SLJIT_ASSERT(assert->framesize != 0);
12288
    if (assert->framesize > 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12289
      {
12290
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12291
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12292
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12293
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12294
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12295
      }
12296
    cond = JUMP(SLJIT_JUMP);
12297
    set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12298
    }
12299
  else if (CURRENT_AS(bracket_backtrack)->u.no_capture != NULL)
12300
    {
12301
    SLJIT_ASSERT(has_alternatives);
12302
    cond = JUMP(SLJIT_JUMP);
12303
    set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL());
12304
    }
12305
  else
12306
    SLJIT_ASSERT(!has_alternatives);
12307
  }
12308
12309
if (has_alternatives)
12310
  {
12311
  alt_count = 1;
12312
  do
12313
    {
12314
    current->top = NULL;
12315
    current->own_backtracks = NULL;
12316
    current->simple_backtracks = NULL;
12317
    /* Conditional blocks always have an additional alternative, even if it is empty. */
12318
    if (*cc == OP_ALT)
12319
      {
12320
      ccprev = cc + 1 + LINK_SIZE;
12321
      cc += GET(cc, 1);
12322
12323
      has_vreverse = FALSE;
12324
12325
      switch (opcode)
12326
        {
12327
        case OP_ASSERTBACK:
12328
        case OP_ASSERTBACK_NA:
12329
          SLJIT_ASSERT(private_data_ptr != 0);
12330
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12331
12332
          has_vreverse = (*ccprev == OP_VREVERSE);
12333
          if (*ccprev == OP_REVERSE || has_vreverse)
12334
            ccprev = compile_reverse_matchingpath(common, ccprev, current);
12335
          break;
12336
        case OP_ASSERT_SCS:
12337
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12338
          break;
12339
        case OP_ONCE:
12340
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12341
          break;
12342
        case OP_COND:
12343
        case OP_SCOND:
12344
          break;
12345
        default:
12346
          if (private_data_ptr != 0)
12347
            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12348
          else
12349
            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12350
          break;
12351
        }
12352
12353
      compile_matchingpath(common, ccprev, cc, current);
12354
      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12355
        return;
12356
12357
      switch (opcode)
12358
        {
12359
        case OP_ASSERTBACK_NA:
12360
          if (has_vreverse)
12361
            {
12362
            SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));
12363
            add_jump(compiler, &current->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
12364
            }
12365
12366
          if (PRIVATE_DATA(ccbegin + 1))
12367
            OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12368
          break;
12369
        case OP_ASSERT_NA:
12370
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12371
          break;
12372
        case OP_SCRIPT_RUN:
12373
          match_script_run_common(common, private_data_ptr, current);
12374
          break;
12375
        }
12376
      }
12377
12378
    /* Instructions after the current alternative is successfully matched. */
12379
    /* There is a similar code in compile_bracket_matchingpath. */
12380
    if (opcode == OP_ONCE)
12381
      match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12382
12383
    stacksize = 0;
12384
    if (repeat_type == OP_MINUPTO)
12385
      {
12386
      /* We need to preserve the counter. TMP2 will be used below. */
12387
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12388
      stacksize++;
12389
      }
12390
    if (ket != OP_KET || bra != OP_BRA)
12391
      stacksize++;
12392
    if (offset != 0)
12393
      {
12394
      if (common->capture_last_ptr != 0)
12395
        stacksize++;
12396
      if (common->optimized_cbracket[offset >> 1] == 0)
12397
        stacksize += 2;
12398
      }
12399
    if (opcode != OP_ONCE)
12400
      stacksize++;
12401
12402
    if (stacksize > 0)
12403
      allocate_stack(common, stacksize);
12404
12405
    stacksize = 0;
12406
    if (repeat_type == OP_MINUPTO)
12407
      {
12408
      /* TMP2 was set above. */
12409
      OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12410
      stacksize++;
12411
      }
12412
12413
    if (ket != OP_KET || bra != OP_BRA)
12414
      {
12415
      if (ket != OP_KET)
12416
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12417
      else
12418
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12419
      stacksize++;
12420
      }
12421
12422
    if (offset != 0)
12423
      stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12424
12425
    if (opcode != OP_ONCE)
12426
      {
12427
      if (alt_max <= 3)
12428
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12429
      else
12430
        mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12431
      }
12432
12433
    if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12434
      {
12435
      /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12436
      SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12437
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12438
      }
12439
12440
    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12441
12442
    if (opcode != OP_ONCE)
12443
      {
12444
      if (alt_max <= 3)
12445
        {
12446
        JUMPHERE(next_alt);
12447
        alt_count++;
12448
        if (alt_count < alt_max)
12449
          {
12450
          SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12451
          next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12452
          }
12453
        }
12454
      else
12455
        {
12456
        sljit_set_label(mov_addr, LABEL());
12457
        sljit_emit_op0(compiler, SLJIT_ENDBR);
12458
        }
12459
      }
12460
12461
    COMPILE_BACKTRACKINGPATH(current->top);
12462
    if (current->own_backtracks)
12463
      set_jumps(current->own_backtracks, LABEL());
12464
    SLJIT_ASSERT(!current->simple_backtracks);
12465
    }
12466
  while (*cc == OP_ALT);
12467
12468
  if (cond != NULL)
12469
    {
12470
    SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12471
    if (ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT)
12472
      {
12473
      assert = CURRENT_AS(bracket_backtrack)->u.assert;
12474
      SLJIT_ASSERT(assert->framesize != 0);
12475
      if (assert->framesize > 0)
12476
        {
12477
        OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12478
        add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12479
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12480
        OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12481
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12482
        }
12483
      }
12484
    JUMPHERE(cond);
12485
    }
12486
12487
  /* Free the STR_PTR. */
12488
  if (private_data_ptr == 0)
12489
    free_stack(common, 1);
12490
  }
12491
12492
if (offset != 0)
12493
  {
12494
  /* Using both tmp register is better for instruction scheduling. */
12495
  if (common->optimized_cbracket[offset >> 1] != 0)
12496
    {
12497
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12498
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12499
    free_stack(common, 2);
12500
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12501
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12502
    }
12503
  else
12504
    {
12505
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12506
    free_stack(common, 1);
12507
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12508
    }
12509
  }
12510
else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
12511
  {
12512
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12513
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12514
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12515
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12516
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
12517
  free_stack(common, 4);
12518
  }
12519
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12520
  {
12521
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12522
  free_stack(common, 1);
12523
  }
12524
else if (opcode == OP_ASSERT_SCS)
12525
  {
12526
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12527
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12528
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
12529
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12530
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
12531
  free_stack(common, has_alternatives ? 3 : 2);
12532
12533
  set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL());
12534
12535
  /* Nested scs blocks will not update this variable. */
12536
  if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw))
12537
    common->restore_end_ptr = 0;
12538
  }
12539
else if (opcode == OP_ONCE)
12540
  {
12541
  cc = ccbegin + GET(ccbegin, 1);
12542
  stacksize = needs_control_head ? 1 : 0;
12543
12544
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12545
    {
12546
    /* Reset head and drop saved frame. */
12547
    stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12548
    }
12549
  else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12550
    {
12551
    /* The STR_PTR must be released. */
12552
    stacksize++;
12553
    }
12554
12555
  if (stacksize > 0)
12556
    free_stack(common, stacksize);
12557
12558
  JUMPHERE(once);
12559
  /* Restore previous private_data_ptr */
12560
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12561
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12562
  else if (ket == OP_KETRMIN)
12563
    {
12564
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12565
    /* See the comment below. */
12566
    free_stack(common, 2);
12567
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12568
    }
12569
  }
12570
12571
if (repeat_type == OP_EXACT)
12572
  {
12573
  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12574
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12575
  CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12576
  }
12577
else if (ket == OP_KETRMAX)
12578
  {
12579
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12580
  if (bra != OP_BRAZERO)
12581
    free_stack(common, 1);
12582
12583
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12584
  if (bra == OP_BRAZERO)
12585
    {
12586
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12587
    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12588
    JUMPHERE(brazero);
12589
    free_stack(common, 1);
12590
    }
12591
  }
12592
else if (ket == OP_KETRMIN)
12593
  {
12594
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12595
12596
  /* OP_ONCE removes everything in case of a backtrack, so we don't
12597
  need to explicitly release the STR_PTR. The extra release would
12598
  affect badly the free_stack(2) above. */
12599
  if (opcode != OP_ONCE)
12600
    free_stack(common, 1);
12601
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12602
  if (opcode == OP_ONCE)
12603
    free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12604
  else if (bra == OP_BRAMINZERO)
12605
    free_stack(common, 1);
12606
  }
12607
else if (bra == OP_BRAZERO)
12608
  {
12609
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12610
  JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12611
  JUMPHERE(brazero);
12612
  }
12613
}
12614
12615
static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12616
{
12617
DEFINE_COMPILER;
12618
int offset;
12619
struct sljit_jump *jump;
12620
PCRE2_SPTR cc;
12621
12622
/* No retry on backtrack, just drop everything. */
12623
if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12624
  {
12625
  cc = current->cc;
12626
12627
  if (*cc == OP_BRAPOSZERO)
12628
    cc++;
12629
12630
  if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)
12631
    {
12632
    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
12633
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12634
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12635
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12636
    if (common->capture_last_ptr != 0)
12637
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12638
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12639
    if (common->capture_last_ptr != 0)
12640
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12641
    }
12642
  set_jumps(current->own_backtracks, LABEL());
12643
  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12644
  return;
12645
  }
12646
12647
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12648
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12649
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12650
12651
if (current->own_backtracks)
12652
  {
12653
  jump = JUMP(SLJIT_JUMP);
12654
  set_jumps(current->own_backtracks, LABEL());
12655
  /* Drop the stack frame. */
12656
  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12657
  JUMPHERE(jump);
12658
  }
12659
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
12660
}
12661
12662
static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12663
{
12664
assert_backtrack backtrack;
12665
12666
current->top = NULL;
12667
current->own_backtracks = NULL;
12668
current->simple_backtracks = NULL;
12669
if (current->cc[1] > OP_ASSERTBACK_NOT)
12670
  {
12671
  /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
12672
  compile_bracket_matchingpath(common, current->cc, current);
12673
  compile_bracket_backtrackingpath(common, current->top);
12674
  }
12675
else
12676
  {
12677
  memset(&backtrack, 0, sizeof(backtrack));
12678
  backtrack.common.cc = current->cc;
12679
  backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
12680
  /* Manual call of compile_assert_matchingpath. */
12681
  compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
12682
  }
12683
SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);
12684
}
12685
12686
static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12687
{
12688
DEFINE_COMPILER;
12689
PCRE2_UCHAR opcode = *current->cc;
12690
struct sljit_label *loop;
12691
struct sljit_jump *jump;
12692
12693
if (opcode == OP_THEN || opcode == OP_THEN_ARG)
12694
  {
12695
  if (common->then_trap != NULL)
12696
    {
12697
    SLJIT_ASSERT(common->control_head_ptr != 0);
12698
12699
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12700
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
12701
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
12702
    jump = JUMP(SLJIT_JUMP);
12703
12704
    loop = LABEL();
12705
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12706
    JUMPHERE(jump);
12707
    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
12708
    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
12709
    add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
12710
    return;
12711
    }
12712
  else if (!common->local_quit_available && common->in_positive_assertion)
12713
    {
12714
    add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
12715
    return;
12716
    }
12717
  }
12718
12719
if (common->restore_end_ptr != 0 && opcode != OP_SKIP_ARG)
12720
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);
12721
12722
if (common->local_quit_available)
12723
  {
12724
  /* Abort match with a fail. */
12725
  if (common->quit_label == NULL)
12726
    add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12727
  else
12728
    JUMPTO(SLJIT_JUMP, common->quit_label);
12729
  return;
12730
  }
12731
12732
if (opcode == OP_SKIP_ARG)
12733
  {
12734
  SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
12735
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12736
  OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
12737
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
12738
12739
  if (common->restore_end_ptr == 0)
12740
    {
12741
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
12742
    add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
12743
    return;
12744
    }
12745
12746
  jump = CMP(SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0);
12747
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
12748
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);
12749
  add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
12750
  JUMPHERE(jump);
12751
  return;
12752
  }
12753
12754
if (opcode == OP_SKIP)
12755
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12756
else
12757
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
12758
add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
12759
}
12760
12761
static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12762
{
12763
DEFINE_COMPILER;
12764
struct sljit_jump *jump;
12765
struct sljit_label *label;
12766
12767
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12768
jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));
12769
skip_valid_char(common);
12770
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
12771
JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);
12772
12773
label = LABEL();
12774
sljit_set_label(jump, label);
12775
set_jumps(current->own_backtracks, label);
12776
}
12777
12778
static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12779
{
12780
DEFINE_COMPILER;
12781
struct sljit_jump *jump;
12782
int framesize;
12783
int size;
12784
12785
if (CURRENT_AS(then_trap_backtrack)->then_trap)
12786
  {
12787
  common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
12788
  return;
12789
  }
12790
12791
size = CURRENT_AS(then_trap_backtrack)->framesize;
12792
size = 3 + (size < 0 ? 0 : size);
12793
12794
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
12795
free_stack(common, size);
12796
jump = JUMP(SLJIT_JUMP);
12797
12798
set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
12799
12800
framesize = CURRENT_AS(then_trap_backtrack)->framesize;
12801
SLJIT_ASSERT(framesize != 0);
12802
12803
/* STACK_TOP is set by THEN. */
12804
if (framesize > 0)
12805
  {
12806
  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12807
  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
12808
  }
12809
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12810
free_stack(common, 3);
12811
12812
JUMPHERE(jump);
12813
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
12814
}
12815
12816
static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12817
{
12818
DEFINE_COMPILER;
12819
then_trap_backtrack *save_then_trap = common->then_trap;
12820
12821
while (current)
12822
  {
12823
  if (current->simple_backtracks != NULL)
12824
    set_jumps(current->simple_backtracks, LABEL());
12825
  switch(*current->cc)
12826
    {
12827
    case OP_SET_SOM:
12828
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12829
    free_stack(common, 1);
12830
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
12831
    break;
12832
12833
    case OP_STAR:
12834
    case OP_MINSTAR:
12835
    case OP_PLUS:
12836
    case OP_MINPLUS:
12837
    case OP_QUERY:
12838
    case OP_MINQUERY:
12839
    case OP_UPTO:
12840
    case OP_MINUPTO:
12841
    case OP_EXACT:
12842
    case OP_POSSTAR:
12843
    case OP_POSPLUS:
12844
    case OP_POSQUERY:
12845
    case OP_POSUPTO:
12846
    case OP_STARI:
12847
    case OP_MINSTARI:
12848
    case OP_PLUSI:
12849
    case OP_MINPLUSI:
12850
    case OP_QUERYI:
12851
    case OP_MINQUERYI:
12852
    case OP_UPTOI:
12853
    case OP_MINUPTOI:
12854
    case OP_EXACTI:
12855
    case OP_POSSTARI:
12856
    case OP_POSPLUSI:
12857
    case OP_POSQUERYI:
12858
    case OP_POSUPTOI:
12859
    case OP_NOTSTAR:
12860
    case OP_NOTMINSTAR:
12861
    case OP_NOTPLUS:
12862
    case OP_NOTMINPLUS:
12863
    case OP_NOTQUERY:
12864
    case OP_NOTMINQUERY:
12865
    case OP_NOTUPTO:
12866
    case OP_NOTMINUPTO:
12867
    case OP_NOTEXACT:
12868
    case OP_NOTPOSSTAR:
12869
    case OP_NOTPOSPLUS:
12870
    case OP_NOTPOSQUERY:
12871
    case OP_NOTPOSUPTO:
12872
    case OP_NOTSTARI:
12873
    case OP_NOTMINSTARI:
12874
    case OP_NOTPLUSI:
12875
    case OP_NOTMINPLUSI:
12876
    case OP_NOTQUERYI:
12877
    case OP_NOTMINQUERYI:
12878
    case OP_NOTUPTOI:
12879
    case OP_NOTMINUPTOI:
12880
    case OP_NOTEXACTI:
12881
    case OP_NOTPOSSTARI:
12882
    case OP_NOTPOSPLUSI:
12883
    case OP_NOTPOSQUERYI:
12884
    case OP_NOTPOSUPTOI:
12885
    case OP_TYPESTAR:
12886
    case OP_TYPEMINSTAR:
12887
    case OP_TYPEPLUS:
12888
    case OP_TYPEMINPLUS:
12889
    case OP_TYPEQUERY:
12890
    case OP_TYPEMINQUERY:
12891
    case OP_TYPEUPTO:
12892
    case OP_TYPEMINUPTO:
12893
    case OP_TYPEEXACT:
12894
    case OP_TYPEPOSSTAR:
12895
    case OP_TYPEPOSPLUS:
12896
    case OP_TYPEPOSQUERY:
12897
    case OP_TYPEPOSUPTO:
12898
    /* Since classes has no backtracking path, this
12899
    backtrackingpath was pushed by an iterator. */
12900
    case OP_CLASS:
12901
    case OP_NCLASS:
12902
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
12903
    case OP_XCLASS:
12904
    case OP_ECLASS:
12905
#endif
12906
    compile_iterator_backtrackingpath(common, current);
12907
    break;
12908
12909
    case OP_REF:
12910
    case OP_REFI:
12911
    case OP_DNREF:
12912
    case OP_DNREFI:
12913
    compile_ref_iterator_backtrackingpath(common, current);
12914
    break;
12915
12916
    case OP_RECURSE:
12917
    compile_recurse_backtrackingpath(common, current);
12918
    break;
12919
12920
    case OP_ASSERT:
12921
    case OP_ASSERT_NOT:
12922
    case OP_ASSERTBACK:
12923
    case OP_ASSERTBACK_NOT:
12924
    compile_assert_backtrackingpath(common, current);
12925
    break;
12926
12927
    case OP_ASSERT_NA:
12928
    case OP_ASSERTBACK_NA:
12929
    case OP_ASSERT_SCS:
12930
    case OP_ONCE:
12931
    case OP_SCRIPT_RUN:
12932
    case OP_BRA:
12933
    case OP_CBRA:
12934
    case OP_COND:
12935
    case OP_SBRA:
12936
    case OP_SCBRA:
12937
    case OP_SCOND:
12938
    compile_bracket_backtrackingpath(common, current);
12939
    break;
12940
12941
    case OP_BRAZERO:
12942
    if (current->cc[1] > OP_ASSERTBACK_NOT)
12943
      compile_bracket_backtrackingpath(common, current);
12944
    else
12945
      compile_assert_backtrackingpath(common, current);
12946
    break;
12947
12948
    case OP_BRAPOS:
12949
    case OP_CBRAPOS:
12950
    case OP_SBRAPOS:
12951
    case OP_SCBRAPOS:
12952
    case OP_BRAPOSZERO:
12953
    compile_bracketpos_backtrackingpath(common, current);
12954
    break;
12955
12956
    case OP_BRAMINZERO:
12957
    compile_braminzero_backtrackingpath(common, current);
12958
    break;
12959
12960
    case OP_MARK:
12961
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
12962
    if (common->has_skip_arg)
12963
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12964
    free_stack(common, common->has_skip_arg ? 5 : 1);
12965
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
12966
    if (common->has_skip_arg)
12967
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
12968
    break;
12969
12970
    case OP_THEN:
12971
    case OP_THEN_ARG:
12972
    case OP_PRUNE:
12973
    case OP_PRUNE_ARG:
12974
    case OP_SKIP:
12975
    case OP_SKIP_ARG:
12976
    compile_control_verb_backtrackingpath(common, current);
12977
    break;
12978
12979
    case OP_COMMIT:
12980
    case OP_COMMIT_ARG:
12981
    if (common->restore_end_ptr != 0)
12982
      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);
12983
12984
    if (!common->local_quit_available)
12985
      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
12986
12987
    if (common->quit_label == NULL)
12988
      add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12989
    else
12990
      JUMPTO(SLJIT_JUMP, common->quit_label);
12991
    break;
12992
12993
    case OP_CALLOUT:
12994
    case OP_CALLOUT_STR:
12995
    case OP_FAIL:
12996
    case OP_ACCEPT:
12997
    case OP_ASSERT_ACCEPT:
12998
    set_jumps(current->own_backtracks, LABEL());
12999
    break;
13000
13001
    case OP_VREVERSE:
13002
    compile_vreverse_backtrackingpath(common, current);
13003
    break;
13004
13005
    case OP_THEN_TRAP:
13006
    /* A virtual opcode for then traps. */
13007
    compile_then_trap_backtrackingpath(common, current);
13008
    break;
13009
13010
    default:
13011
    SLJIT_UNREACHABLE();
13012
    break;
13013
    }
13014
  current = current->prev;
13015
  }
13016
common->then_trap = save_then_trap;
13017
}
13018
13019
static SLJIT_INLINE void compile_recurse(compiler_common *common)
13020
{
13021
DEFINE_COMPILER;
13022
PCRE2_SPTR cc = common->start + common->currententry->start;
13023
PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13024
PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13025
uint32_t recurse_flags = 0;
13026
int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13027
int alt_count, alt_max, local_size;
13028
backtrack_common altbacktrack;
13029
jump_list *match = NULL;
13030
struct sljit_jump *next_alt = NULL;
13031
struct sljit_jump *accept_exit = NULL;
13032
struct sljit_label *quit;
13033
struct sljit_jump *mov_addr = NULL;
13034
13035
/* Recurse captures then. */
13036
common->then_trap = NULL;
13037
13038
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13039
13040
alt_max = no_alternatives(cc);
13041
alt_count = 0;
13042
13043
/* Matching path. */
13044
SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13045
common->currententry->entry_label = LABEL();
13046
set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13047
13048
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);
13049
count_match(common);
13050
13051
local_size = (alt_max > 1) ? 2 : 1;
13052
13053
/* (Reversed) stack layout:
13054
   [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13055
13056
allocate_stack(common, private_data_size + local_size);
13057
/* Save return address. */
13058
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13059
13060
copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13061
13062
/* This variable is saved and restored all time when we enter or exit from a recursive context. */
13063
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13064
13065
if (recurse_flags & recurse_flag_control_head_found)
13066
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13067
13068
if (alt_max > 1)
13069
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13070
13071
memset(&altbacktrack, 0, sizeof(backtrack_common));
13072
common->quit_label = NULL;
13073
common->accept_label = NULL;
13074
common->quit = NULL;
13075
common->accept = NULL;
13076
altbacktrack.cc = ccbegin;
13077
cc += GET(cc, 1);
13078
while (1)
13079
  {
13080
  altbacktrack.top = NULL;
13081
  altbacktrack.own_backtracks = NULL;
13082
13083
  if (altbacktrack.cc != ccbegin)
13084
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13085
13086
  compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13087
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13088
    return;
13089
13090
  allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
13091
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13092
13093
  if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
13094
    {
13095
    if (alt_max > 3)
13096
      mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13097
    else
13098
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13099
    }
13100
13101
  add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13102
13103
  if (alt_count == 0)
13104
    {
13105
    /* Backtracking path entry. */
13106
    SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13107
    common->currententry->backtrack_label = LABEL();
13108
    set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13109
13110
    sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);
13111
13112
    if (recurse_flags & recurse_flag_accept_found)
13113
      accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13114
13115
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13116
    /* Save return address. */
13117
    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13118
13119
    copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13120
13121
    if (alt_max > 1)
13122
      {
13123
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13124
      free_stack(common, 2);
13125
13126
      if (alt_max > 3)
13127
        {
13128
        sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13129
        sljit_set_label(mov_addr, LABEL());
13130
        sljit_emit_op0(compiler, SLJIT_ENDBR);
13131
        }
13132
      else
13133
        next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13134
      }
13135
    else
13136
      free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
13137
    }
13138
  else if (alt_max > 3)
13139
    {
13140
    sljit_set_label(mov_addr, LABEL());
13141
    sljit_emit_op0(compiler, SLJIT_ENDBR);
13142
    }
13143
  else
13144
    {
13145
    JUMPHERE(next_alt);
13146
    if (alt_count + 1 < alt_max)
13147
      {
13148
      SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13149
      next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13150
      }
13151
    }
13152
13153
  alt_count++;
13154
13155
  compile_backtrackingpath(common, altbacktrack.top);
13156
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13157
    return;
13158
  set_jumps(altbacktrack.own_backtracks, LABEL());
13159
13160
  if (*cc != OP_ALT)
13161
    break;
13162
13163
  altbacktrack.cc = cc + 1 + LINK_SIZE;
13164
  cc += GET(cc, 1);
13165
  }
13166
13167
/* No alternative is matched. */
13168
13169
quit = LABEL();
13170
13171
copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
13172
13173
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13174
free_stack(common, private_data_size + local_size);
13175
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13176
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13177
13178
if (common->quit != NULL)
13179
  {
13180
  SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
13181
13182
  set_jumps(common->quit, LABEL());
13183
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13184
  copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13185
  JUMPTO(SLJIT_JUMP, quit);
13186
  }
13187
13188
if (recurse_flags & recurse_flag_accept_found)
13189
  {
13190
  JUMPHERE(accept_exit);
13191
  free_stack(common, 2);
13192
13193
  /* Save return address. */
13194
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13195
13196
  copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13197
13198
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13199
  free_stack(common, private_data_size + local_size);
13200
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13201
  OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13202
  }
13203
13204
if (common->accept != NULL)
13205
  {
13206
  SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
13207
13208
  set_jumps(common->accept, LABEL());
13209
13210
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13211
  OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13212
13213
  allocate_stack(common, 2);
13214
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13215
  }
13216
13217
set_jumps(match, LABEL());
13218
13219
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13220
13221
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13222
13223
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13224
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13225
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13226
}
13227
13228
#undef COMPILE_BACKTRACKINGPATH
13229
#undef CURRENT_AS
13230
13231
#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13232
  (PCRE2_JIT_INVALID_UTF)
13233
13234
static int jit_compile(pcre2_code *code, sljit_u32 mode)
13235
{
13236
pcre2_real_code *re = (pcre2_real_code *)code;
13237
struct sljit_compiler *compiler;
13238
backtrack_common rootbacktrack;
13239
compiler_common common_data;
13240
compiler_common *common = &common_data;
13241
const sljit_u8 *tables = re->tables;
13242
void *allocator_data = &re->memctl;
13243
int private_data_size;
13244
PCRE2_SPTR ccend;
13245
executable_functions *functions;
13246
void *executable_func;
13247
sljit_uw executable_size, private_data_length, total_length;
13248
struct sljit_label *mainloop_label = NULL;
13249
struct sljit_label *continue_match_label;
13250
struct sljit_label *empty_match_found_label = NULL;
13251
struct sljit_label *empty_match_backtrack_label = NULL;
13252
struct sljit_label *reset_match_label;
13253
struct sljit_label *quit_label;
13254
struct sljit_jump *jump;
13255
struct sljit_jump *minlength_check_failed = NULL;
13256
struct sljit_jump *empty_match = NULL;
13257
struct sljit_jump *end_anchor_failed = NULL;
13258
jump_list *reqcu_not_found = NULL;
13259
13260
SLJIT_ASSERT(tables);
13261
13262
#if HAS_VIRTUAL_REGISTERS == 1
13263
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);
13264
#elif HAS_VIRTUAL_REGISTERS == 0
13265
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);
13266
#else
13267
#error "Invalid value for HAS_VIRTUAL_REGISTERS"
13268
#endif
13269
13270
memset(&rootbacktrack, 0, sizeof(backtrack_common));
13271
memset(common, 0, sizeof(compiler_common));
13272
common->re = re;
13273
common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13274
rootbacktrack.cc = (PCRE2_SPTR)((uint8_t *)re + re->code_start);
13275
13276
#ifdef SUPPORT_UNICODE
13277
common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13278
#endif /* SUPPORT_UNICODE */
13279
mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13280
13281
common->start = rootbacktrack.cc;
13282
common->read_only_data_head = NULL;
13283
common->fcc = tables + fcc_offset;
13284
common->lcc = (sljit_sw)(tables + lcc_offset);
13285
common->mode = mode;
13286
common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13287
common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13288
common->nltype = NLTYPE_FIXED;
13289
switch(re->newline_convention)
13290
  {
13291
  case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13292
  case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13293
  case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13294
  case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13295
  case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13296
  case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13297
  default: return PCRE2_ERROR_INTERNAL;
13298
  }
13299
common->nlmax = READ_CHAR_MAX;
13300
common->nlmin = 0;
13301
if (re->bsr_convention == PCRE2_BSR_UNICODE)
13302
  common->bsr_nltype = NLTYPE_ANY;
13303
else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13304
  common->bsr_nltype = NLTYPE_ANYCRLF;
13305
else
13306
  {
13307
#ifdef BSR_ANYCRLF
13308
  common->bsr_nltype = NLTYPE_ANYCRLF;
13309
#else
13310
  common->bsr_nltype = NLTYPE_ANY;
13311
#endif
13312
  }
13313
common->bsr_nlmax = READ_CHAR_MAX;
13314
common->bsr_nlmin = 0;
13315
common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13316
common->ctypes = (sljit_sw)(tables + ctypes_offset);
13317
common->name_count = re->name_count;
13318
common->name_entry_size = re->name_entry_size;
13319
common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13320
common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13321
#ifdef SUPPORT_UNICODE
13322
/* PCRE2_UTF[16|32] have the same value as PCRE2_UTF8. */
13323
common->utf = (re->overall_options & PCRE2_UTF) != 0;
13324
common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13325
if (common->utf)
13326
  {
13327
  if (common->nltype == NLTYPE_ANY)
13328
    common->nlmax = 0x2029;
13329
  else if (common->nltype == NLTYPE_ANYCRLF)
13330
    common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13331
  else
13332
    {
13333
    /* We only care about the first newline character. */
13334
    common->nlmax = common->newline & 0xff;
13335
    }
13336
13337
  if (common->nltype == NLTYPE_FIXED)
13338
    common->nlmin = common->newline & 0xff;
13339
  else
13340
    common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13341
13342
  if (common->bsr_nltype == NLTYPE_ANY)
13343
    common->bsr_nlmax = 0x2029;
13344
  else
13345
    common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13346
  common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13347
  }
13348
else
13349
  common->invalid_utf = FALSE;
13350
#endif /* SUPPORT_UNICODE */
13351
ccend = bracketend(common->start);
13352
13353
/* Calculate the local space size on the stack. */
13354
common->ovector_start = LOCAL0;
13355
/* Allocate space for temporary data structures. */
13356
private_data_length = ccend - common->start;
13357
/* The chance of overflow is very low, but might happen on 32 bit. */
13358
if (private_data_length > ~(sljit_uw)0 / sizeof(sljit_s32))
13359
  return PCRE2_ERROR_NOMEMORY;
13360
13361
private_data_length *= sizeof(sljit_s32);
13362
/* Align to 32 bit. */
13363
total_length = ((re->top_bracket + 1) + (sljit_uw)(sizeof(sljit_s32) - 1)) & ~(sljit_uw)(sizeof(sljit_s32) - 1);
13364
if (~(sljit_uw)0 - private_data_length < total_length)
13365
  return PCRE2_ERROR_NOMEMORY;
13366
13367
total_length += private_data_length;
13368
common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length, allocator_data);
13369
if (!common->private_data_ptrs)
13370
  return PCRE2_ERROR_NOMEMORY;
13371
13372
memset(common->private_data_ptrs, 0, private_data_length);
13373
common->optimized_cbracket = ((sljit_u8 *)common->private_data_ptrs) + private_data_length;
13374
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13375
memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13376
#else
13377
memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13378
#endif
13379
13380
SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13381
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13382
common->capture_last_ptr = common->ovector_start;
13383
common->ovector_start += sizeof(sljit_sw);
13384
#endif
13385
if (!check_opcode_types(common, common->start, ccend))
13386
  {
13387
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13388
  return PCRE2_ERROR_JIT_UNSUPPORTED;
13389
  }
13390
13391
/* Checking flags and updating ovector_start. */
13392
if (mode == PCRE2_JIT_COMPLETE &&
13393
    (re->flags & PCRE2_LASTSET) != 0 &&
13394
    (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
13395
  {
13396
  common->req_char_ptr = common->ovector_start;
13397
  common->ovector_start += sizeof(sljit_sw);
13398
  }
13399
13400
if (mode != PCRE2_JIT_COMPLETE)
13401
  {
13402
  common->start_used_ptr = common->ovector_start;
13403
  common->ovector_start += sizeof(sljit_sw);
13404
  if (mode == PCRE2_JIT_PARTIAL_SOFT)
13405
    {
13406
    common->hit_start = common->ovector_start;
13407
    common->ovector_start += sizeof(sljit_sw);
13408
    }
13409
  }
13410
13411
if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13412
  {
13413
  common->match_end_ptr = common->ovector_start;
13414
  common->ovector_start += sizeof(sljit_sw);
13415
  }
13416
13417
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13418
common->control_head_ptr = 1;
13419
#endif
13420
13421
if (common->control_head_ptr != 0)
13422
  {
13423
  common->control_head_ptr = common->ovector_start;
13424
  common->ovector_start += sizeof(sljit_sw);
13425
  }
13426
13427
if (common->has_set_som)
13428
  {
13429
  /* Saving the real start pointer is necessary. */
13430
  common->start_ptr = common->ovector_start;
13431
  common->ovector_start += sizeof(sljit_sw);
13432
  }
13433
13434
/* Aligning ovector to even number of sljit words. */
13435
if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13436
  common->ovector_start += sizeof(sljit_sw);
13437
13438
if (common->start_ptr == 0)
13439
  common->start_ptr = OVECTOR(0);
13440
13441
/* Capturing brackets cannot be optimized if callouts are allowed. */
13442
if (common->capture_last_ptr != 0)
13443
  memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13444
13445
SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13446
common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13447
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13448
13449
if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
13450
    (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 &&
13451
    !common->has_skip_in_assert_back)
13452
  detect_early_fail(common, common->start, &private_data_size, 0, 0);
13453
13454
set_private_data_ptrs(common, &private_data_size, ccend);
13455
13456
SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13457
13458
if (private_data_size > 65536)
13459
  {
13460
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13461
  return PCRE2_ERROR_JIT_UNSUPPORTED;
13462
  }
13463
13464
if (common->has_then)
13465
  {
13466
  total_length = ccend - common->start;
13467
  common->then_offsets = (sljit_u8 *)SLJIT_MALLOC(total_length, allocator_data);
13468
  if (!common->then_offsets)
13469
    {
13470
    SLJIT_FREE(common->private_data_ptrs, allocator_data);
13471
    return PCRE2_ERROR_NOMEMORY;
13472
    }
13473
  memset(common->then_offsets, 0, total_length);
13474
  set_then_offsets(common, common->start, NULL);
13475
  }
13476
13477
compiler = sljit_create_compiler(allocator_data);
13478
if (!compiler)
13479
  {
13480
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13481
  if (common->has_then)
13482
    SLJIT_FREE(common->then_offsets, allocator_data);
13483
  return PCRE2_ERROR_NOMEMORY;
13484
  }
13485
common->compiler = compiler;
13486
13487
/* Main pcre2_jit_exec entry. */
13488
SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
13489
sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS), 5, private_data_size);
13490
13491
/* Register init. */
13492
reset_ovector(common, (re->top_bracket + 1) * 2);
13493
if (common->req_char_ptr != 0)
13494
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13495
13496
OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13497
OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13498
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13499
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13500
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13501
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13502
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13503
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13504
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13505
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13506
13507
if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13508
  reset_early_fail(common);
13509
13510
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13511
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13512
if (common->mark_ptr != 0)
13513
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13514
if (common->control_head_ptr != 0)
13515
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13516
13517
/* Main part of the matching */
13518
if ((re->overall_options & PCRE2_ANCHORED) == 0)
13519
  {
13520
  mainloop_label = mainloop_entry(common);
13521
  continue_match_label = LABEL();
13522
  /* Forward search if possible. */
13523
  if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
13524
    {
13525
    if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13526
      ;
13527
    else if ((re->flags & PCRE2_FIRSTSET) != 0)
13528
      fast_forward_first_char(common);
13529
    else if ((re->flags & PCRE2_STARTLINE) != 0)
13530
      fast_forward_newline(common);
13531
    else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13532
      fast_forward_start_bits(common);
13533
    }
13534
  }
13535
else
13536
  continue_match_label = LABEL();
13537
13538
if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 &&
13539
    (re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
13540
  {
13541
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13542
  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13543
  minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13544
  }
13545
if (common->req_char_ptr != 0)
13546
  reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13547
13548
/* Store the current STR_PTR in OVECTOR(0). */
13549
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13550
/* Copy the limit of allowed recursions. */
13551
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13552
if (common->capture_last_ptr != 0)
13553
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13554
if (common->fast_forward_bc_ptr != NULL)
13555
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13556
13557
if (common->start_ptr != OVECTOR(0))
13558
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13559
13560
/* Copy the beginning of the string. */
13561
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13562
  {
13563
  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13564
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13565
  JUMPHERE(jump);
13566
  }
13567
else if (mode == PCRE2_JIT_PARTIAL_HARD)
13568
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13569
13570
compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13571
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13572
  {
13573
  sljit_free_compiler(compiler);
13574
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13575
  if (common->has_then)
13576
    SLJIT_FREE(common->then_offsets, allocator_data);
13577
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13578
  return PCRE2_ERROR_NOMEMORY;
13579
  }
13580
13581
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13582
  end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13583
13584
if (common->might_be_empty)
13585
  {
13586
  empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13587
  empty_match_found_label = LABEL();
13588
  }
13589
13590
common->accept_label = LABEL();
13591
if (common->accept != NULL)
13592
  set_jumps(common->accept, common->accept_label);
13593
13594
/* This means we have a match. Update the ovector. */
13595
copy_ovector(common, re->top_bracket + 1);
13596
common->quit_label = common->abort_label = LABEL();
13597
if (common->quit != NULL)
13598
  set_jumps(common->quit, common->quit_label);
13599
if (common->abort != NULL)
13600
  set_jumps(common->abort, common->abort_label);
13601
if (minlength_check_failed != NULL)
13602
  SET_LABEL(minlength_check_failed, common->abort_label);
13603
13604
sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
13605
sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13606
13607
if (common->failed_match != NULL)
13608
  {
13609
  SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13610
  set_jumps(common->failed_match, LABEL());
13611
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13612
  JUMPTO(SLJIT_JUMP, common->abort_label);
13613
  }
13614
13615
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13616
  JUMPHERE(end_anchor_failed);
13617
13618
if (mode != PCRE2_JIT_COMPLETE)
13619
  {
13620
  common->partialmatchlabel = LABEL();
13621
  set_jumps(common->partialmatch, common->partialmatchlabel);
13622
  return_with_partial_match(common, common->quit_label);
13623
  }
13624
13625
if (common->might_be_empty)
13626
  empty_match_backtrack_label = LABEL();
13627
compile_backtrackingpath(common, rootbacktrack.top);
13628
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13629
  {
13630
  sljit_free_compiler(compiler);
13631
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13632
  if (common->has_then)
13633
    SLJIT_FREE(common->then_offsets, allocator_data);
13634
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13635
  return PCRE2_ERROR_NOMEMORY;
13636
  }
13637
13638
SLJIT_ASSERT(rootbacktrack.prev == NULL);
13639
reset_match_label = LABEL();
13640
13641
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13642
  {
13643
  /* Update hit_start only in the first time. */
13644
  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13645
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13646
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13647
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13648
  JUMPHERE(jump);
13649
  }
13650
13651
/* Check we have remaining characters. */
13652
if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13653
  {
13654
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13655
  }
13656
13657
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13658
    (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
13659
13660
if ((re->overall_options & PCRE2_ANCHORED) == 0)
13661
  {
13662
  if (common->ff_newline_shortcut != NULL)
13663
    {
13664
    /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13665
    if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13666
      {
13667
      if (common->match_end_ptr != 0)
13668
        {
13669
        OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13670
        OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13671
        CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13672
        OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13673
        }
13674
      else
13675
        CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13676
      }
13677
    }
13678
  else
13679
    CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13680
  }
13681
13682
/* No more remaining characters. */
13683
if (reqcu_not_found != NULL)
13684
  set_jumps(reqcu_not_found, LABEL());
13685
13686
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13687
  CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13688
13689
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13690
JUMPTO(SLJIT_JUMP, common->quit_label);
13691
13692
flush_stubs(common);
13693
13694
if (common->might_be_empty)
13695
  {
13696
  JUMPHERE(empty_match);
13697
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13698
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13699
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13700
  JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13701
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13702
  JUMPTO(SLJIT_ZERO, empty_match_found_label);
13703
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13704
  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13705
  JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13706
  }
13707
13708
common->fast_forward_bc_ptr = NULL;
13709
common->early_fail_start_ptr = 0;
13710
common->early_fail_end_ptr = 0;
13711
common->currententry = common->entries;
13712
common->local_quit_available = TRUE;
13713
quit_label = common->quit_label;
13714
SLJIT_ASSERT(common->restore_end_ptr == 0);
13715
13716
if (common->currententry != NULL)
13717
  {
13718
  /* A free bit for each private data. */
13719
  common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
13720
  SLJIT_ASSERT(common->recurse_bitset_size > 0);
13721
  common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
13722
13723
  if (common->recurse_bitset != NULL)
13724
    {
13725
    do
13726
      {
13727
      /* Might add new entries. */
13728
      compile_recurse(common);
13729
      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13730
        break;
13731
      flush_stubs(common);
13732
      common->currententry = common->currententry->next;
13733
      }
13734
    while (common->currententry != NULL);
13735
13736
    SLJIT_FREE(common->recurse_bitset, allocator_data);
13737
    }
13738
13739
  if (common->currententry != NULL)
13740
    {
13741
    /* The common->recurse_bitset has been freed. */
13742
    SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
13743
13744
    sljit_free_compiler(compiler);
13745
    SLJIT_FREE(common->private_data_ptrs, allocator_data);
13746
    if (common->has_then)
13747
      SLJIT_FREE(common->then_offsets, allocator_data);
13748
    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13749
    return PCRE2_ERROR_NOMEMORY;
13750
    }
13751
  }
13752
13753
common->local_quit_available = FALSE;
13754
common->quit_label = quit_label;
13755
SLJIT_ASSERT(common->restore_end_ptr == 0);
13756
13757
/* Allocating stack, returns with PCRE2_ERROR_JIT_STACKLIMIT if fails. */
13758
/* This is a (really) rare case. */
13759
set_jumps(common->stackalloc, LABEL());
13760
/* RETURN_ADDR is not a saved register. */
13761
SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));
13762
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);
13763
13764
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13765
13766
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, STR_PTR, 0);
13767
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
13768
OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
13769
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
13770
OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
13771
13772
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
13773
13774
jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
13775
OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
13776
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
13777
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
13778
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
13779
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
13780
13781
/* Allocation failed. */
13782
JUMPHERE(jump);
13783
/* We break the return address cache here, but this is a really rare case. */
13784
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
13785
JUMPTO(SLJIT_JUMP, common->quit_label);
13786
13787
/* Call limit reached. */
13788
set_jumps(common->calllimit, LABEL());
13789
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
13790
JUMPTO(SLJIT_JUMP, common->quit_label);
13791
13792
if (common->revertframes != NULL)
13793
  {
13794
  set_jumps(common->revertframes, LABEL());
13795
  do_revertframes(common);
13796
  }
13797
if (common->wordboundary != NULL)
13798
  {
13799
  set_jumps(common->wordboundary, LABEL());
13800
  check_wordboundary(common, FALSE);
13801
  }
13802
if (common->ucp_wordboundary != NULL)
13803
  {
13804
  set_jumps(common->ucp_wordboundary, LABEL());
13805
  check_wordboundary(common, TRUE);
13806
  }
13807
if (common->anynewline != NULL)
13808
  {
13809
  set_jumps(common->anynewline, LABEL());
13810
  check_anynewline(common);
13811
  }
13812
if (common->hspace != NULL)
13813
  {
13814
  set_jumps(common->hspace, LABEL());
13815
  check_hspace(common);
13816
  }
13817
if (common->vspace != NULL)
13818
  {
13819
  set_jumps(common->vspace, LABEL());
13820
  check_vspace(common);
13821
  }
13822
if (common->casefulcmp != NULL)
13823
  {
13824
  set_jumps(common->casefulcmp, LABEL());
13825
  do_casefulcmp(common);
13826
  }
13827
if (common->caselesscmp != NULL)
13828
  {
13829
  set_jumps(common->caselesscmp, LABEL());
13830
  do_caselesscmp(common);
13831
  }
13832
if (common->reset_match != NULL || common->restart_match != NULL)
13833
  {
13834
  if (common->restart_match != NULL)
13835
    {
13836
    set_jumps(common->restart_match, LABEL());
13837
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13838
    }
13839
13840
  set_jumps(common->reset_match, LABEL());
13841
  do_reset_match(common, (re->top_bracket + 1) * 2);
13842
  /* The value of restart_match is in TMP1. */
13843
  CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
13844
  OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
13845
  JUMPTO(SLJIT_JUMP, reset_match_label);
13846
  }
13847
#ifdef SUPPORT_UNICODE
13848
#if PCRE2_CODE_UNIT_WIDTH == 8
13849
if (common->utfreadchar != NULL)
13850
  {
13851
  set_jumps(common->utfreadchar, LABEL());
13852
  do_utfreadchar(common);
13853
  }
13854
if (common->utfreadtype8 != NULL)
13855
  {
13856
  set_jumps(common->utfreadtype8, LABEL());
13857
  do_utfreadtype8(common);
13858
  }
13859
if (common->utfpeakcharback != NULL)
13860
  {
13861
  set_jumps(common->utfpeakcharback, LABEL());
13862
  do_utfpeakcharback(common);
13863
  }
13864
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
13865
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
13866
if (common->utfreadchar_invalid != NULL)
13867
  {
13868
  set_jumps(common->utfreadchar_invalid, LABEL());
13869
  do_utfreadchar_invalid(common);
13870
  }
13871
if (common->utfreadnewline_invalid != NULL)
13872
  {
13873
  set_jumps(common->utfreadnewline_invalid, LABEL());
13874
  do_utfreadnewline_invalid(common);
13875
  }
13876
if (common->utfmoveback_invalid)
13877
  {
13878
  set_jumps(common->utfmoveback_invalid, LABEL());
13879
  do_utfmoveback_invalid(common);
13880
  }
13881
if (common->utfpeakcharback_invalid)
13882
  {
13883
  set_jumps(common->utfpeakcharback_invalid, LABEL());
13884
  do_utfpeakcharback_invalid(common);
13885
  }
13886
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
13887
if (common->getucd != NULL)
13888
  {
13889
  set_jumps(common->getucd, LABEL());
13890
  do_getucd(common);
13891
  }
13892
if (common->getucdtype != NULL)
13893
  {
13894
  set_jumps(common->getucdtype, LABEL());
13895
  do_getucdtype(common);
13896
  }
13897
#endif /* SUPPORT_UNICODE */
13898
13899
SLJIT_FREE(common->private_data_ptrs, allocator_data);
13900
if (common->has_then)
13901
  SLJIT_FREE(common->then_offsets, allocator_data);
13902
13903
executable_func = sljit_generate_code(compiler, 0, NULL);
13904
executable_size = sljit_get_generated_code_size(compiler);
13905
sljit_free_compiler(compiler);
13906
13907
if (executable_func == NULL)
13908
  {
13909
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13910
  return PCRE2_ERROR_NOMEMORY;
13911
  }
13912
13913
/* Reuse the function descriptor if possible. */
13914
if (re->executable_jit != NULL)
13915
  functions = (executable_functions *)re->executable_jit;
13916
else
13917
  {
13918
  functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
13919
  if (functions == NULL)
13920
    {
13921
    /* This case is highly unlikely since we just recently
13922
    freed a lot of memory. Not impossible though. */
13923
    sljit_free_code(executable_func, NULL);
13924
    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13925
    return PCRE2_ERROR_NOMEMORY;
13926
    }
13927
  memset(functions, 0, sizeof(executable_functions));
13928
  functions->top_bracket = re->top_bracket + 1;
13929
  functions->limit_match = re->limit_match;
13930
  re->executable_jit = functions;
13931
  }
13932
13933
/* Turn mode into an index. */
13934
if (mode == PCRE2_JIT_COMPLETE)
13935
  mode = 0;
13936
else
13937
  mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
13938
13939
SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
13940
functions->executable_funcs[mode] = executable_func;
13941
functions->read_only_data_heads[mode] = common->read_only_data_head;
13942
functions->executable_sizes[mode] = executable_size;
13943
return 0;
13944
}
13945
13946
#endif
13947
13948
/*************************************************
13949
*        JIT compile a Regular Expression        *
13950
*************************************************/
13951
13952
/* This function used JIT to convert a previously-compiled pattern into machine
13953
code.
13954
13955
Arguments:
13956
  code          a compiled pattern
13957
  options       JIT option bits
13958
13959
Returns:        0: success or (*NOJIT) was used
13960
               <0: an error code
13961
*/
13962
13963
#define PUBLIC_JIT_COMPILE_OPTIONS \
13964
0
  (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
13965
13966
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
13967
pcre2_jit_compile(pcre2_code *code, uint32_t options)
13968
0
{
13969
0
pcre2_real_code *re = (pcre2_real_code *)code;
13970
#ifdef SUPPORT_JIT
13971
void *exec_memory;
13972
executable_functions *functions;
13973
static int executable_allocator_is_working = -1;
13974
13975
if (executable_allocator_is_working == -1)
13976
  {
13977
  /* Checks whether the executable allocator is working. This check
13978
     might run multiple times in multi-threaded environments, but the
13979
     result should not be affected by it. */
13980
  exec_memory = SLJIT_MALLOC_EXEC(32, NULL);
13981
  if (exec_memory != NULL)
13982
    {
13983
    SLJIT_FREE_EXEC(((sljit_u8*)(exec_memory)) + SLJIT_EXEC_OFFSET(exec_memory), NULL);
13984
    executable_allocator_is_working = 1;
13985
    }
13986
  else executable_allocator_is_working = 0;
13987
  }
13988
#endif
13989
13990
0
if (options & PCRE2_JIT_TEST_ALLOC)
13991
0
  {
13992
0
  if (options != PCRE2_JIT_TEST_ALLOC)
13993
0
    return PCRE2_ERROR_JIT_BADOPTION;
13994
13995
#ifdef SUPPORT_JIT
13996
  return executable_allocator_is_working ? 0 : PCRE2_ERROR_NOMEMORY;
13997
#else
13998
0
  return PCRE2_ERROR_JIT_UNSUPPORTED;
13999
0
#endif
14000
0
  }
14001
14002
0
if (code == NULL)
14003
0
  return PCRE2_ERROR_NULL;
14004
14005
0
if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14006
0
  return PCRE2_ERROR_JIT_BADOPTION;
14007
14008
/* Support for invalid UTF was first introduced in JIT, with the option
14009
PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14010
compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14011
preferred feature, with the earlier option deprecated. However, for backward
14012
compatibility, if the earlier option is set, it forces the new option so that
14013
if JIT matching falls back to the interpreter, there is still support for
14014
invalid UTF. However, if this function has already been successfully called
14015
without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14016
non-invalid-supporting JIT code was compiled), give an error.
14017
14018
If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14019
actions are needed:
14020
14021
  1. Remove the definition from pcre2.h.in and from the list in
14022
     PUBLIC_JIT_COMPILE_OPTIONS above.
14023
14024
  2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14025
14026
  3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14027
14028
  4. Delete the following short block of code. The setting of "re" and
14029
     "functions" can be moved into the JIT-only block below, but if that is
14030
     done, (void)re and (void)functions will be needed in the non-JIT case, to
14031
     avoid compiler warnings.
14032
*/
14033
14034
#ifdef SUPPORT_JIT
14035
functions = (executable_functions *)re->executable_jit;
14036
#endif
14037
14038
0
if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14039
0
  {
14040
0
  if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14041
0
    {
14042
#ifdef SUPPORT_JIT
14043
    if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14044
#endif
14045
0
    re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14046
0
    }
14047
0
  }
14048
14049
/* The above tests are run with and without JIT support. This means that
14050
PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14051
interpreter support) even in the absence of JIT. But now, if there is no JIT
14052
support, give an error return. */
14053
14054
0
#ifndef SUPPORT_JIT
14055
0
return PCRE2_ERROR_JIT_BADOPTION;
14056
#else  /* SUPPORT_JIT */
14057
14058
/* There is JIT support. Do the necessary. */
14059
14060
if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14061
14062
if (!executable_allocator_is_working)
14063
  return PCRE2_ERROR_NOMEMORY;
14064
14065
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14066
  options |= PCRE2_JIT_INVALID_UTF;
14067
14068
if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14069
    || functions->executable_funcs[0] == NULL)) {
14070
  uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14071
  int result = jit_compile(code, options & ~excluded_options);
14072
  if (result != 0)
14073
    return result;
14074
  }
14075
14076
if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14077
    || functions->executable_funcs[1] == NULL)) {
14078
  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14079
  int result = jit_compile(code, options & ~excluded_options);
14080
  if (result != 0)
14081
    return result;
14082
  }
14083
14084
if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14085
    || functions->executable_funcs[2] == NULL)) {
14086
  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14087
  int result = jit_compile(code, options & ~excluded_options);
14088
  if (result != 0)
14089
    return result;
14090
  }
14091
14092
return 0;
14093
14094
#endif  /* SUPPORT_JIT */
14095
0
}
14096
14097
/* JIT compiler uses an all-in-one approach. This improves security,
14098
   since the code generator functions are not exported. */
14099
14100
#define INCLUDED_FROM_PCRE2_JIT_COMPILE
14101
14102
#include "pcre2_jit_match.c"
14103
#include "pcre2_jit_misc.c"
14104
14105
/* End of pcre2_jit_compile.c */