Coverage Report

Created: 2025-11-16 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/pcre/pcre2lib/pcre2_jit_compile.c
Line
Count
Source
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
                    This module by Zoltan Herczeg
10
     Original API code Copyright (c) 1997-2012 University of Cambridge
11
          New API code Copyright (c) 2016-2024 University of Cambridge
12
13
-----------------------------------------------------------------------------
14
Redistribution and use in source and binary forms, with or without
15
modification, are permitted provided that the following conditions are met:
16
17
    * Redistributions of source code must retain the above copyright notice,
18
      this list of conditions and the following disclaimer.
19
20
    * Redistributions in binary form must reproduce the above copyright
21
      notice, this list of conditions and the following disclaimer in the
22
      documentation and/or other materials provided with the distribution.
23
24
    * Neither the name of the University of Cambridge nor the names of its
25
      contributors may be used to endorse or promote products derived from
26
      this software without specific prior written permission.
27
28
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
-----------------------------------------------------------------------------
40
*/
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#if defined(__has_feature)
47
#if __has_feature(memory_sanitizer)
48
#include <sanitizer/msan_interface.h>
49
#endif /* __has_feature(memory_sanitizer) */
50
#endif /* defined(__has_feature) */
51
52
#include "pcre2_internal.h"
53
54
#ifdef SUPPORT_JIT
55
56
/* All-in-one: Since we use the JIT compiler only from here,
57
we just include it. This way we don't need to touch the build
58
system files. */
59
60
#define SLJIT_CONFIG_AUTO 1
61
#define SLJIT_CONFIG_STATIC 1
62
#define SLJIT_VERBOSE 0
63
64
#ifdef PCRE2_DEBUG
65
#define SLJIT_DEBUG 1
66
#else
67
#define SLJIT_DEBUG 0
68
#endif
69
70
#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
71
#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
72
73
static void * pcre2_jit_malloc(size_t size, void *allocator_data)
74
{
75
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76
return allocator->malloc(size, allocator->memory_data);
77
}
78
79
static void pcre2_jit_free(void *ptr, void *allocator_data)
80
{
81
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
82
allocator->free(ptr, allocator->memory_data);
83
}
84
85
#include "sljit/sljitLir.c"
86
87
#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
88
#error Unsupported architecture
89
#endif
90
91
/* Defines for debugging purposes. */
92
93
/* 1 - Use unoptimized capturing brackets.
94
   2 - Enable capture_last_ptr (includes option 1). */
95
/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
96
97
/* 1 - Always have a control head. */
98
/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
99
100
/* Allocate memory for the regex stack on the real machine stack.
101
Fast, but limited size. */
102
#define MACHINE_STACK_SIZE 32768
103
104
/* Growth rate for stack allocated by the OS. Should be the multiply
105
of page size. */
106
#define STACK_GROWTH_RATE 8192
107
108
/* Enable to check that the allocation could destroy temporaries. */
109
#if defined SLJIT_DEBUG && SLJIT_DEBUG
110
#define DESTROY_REGISTERS 1
111
#endif
112
113
/*
114
Short summary about the backtracking mechanism empolyed by the jit code generator:
115
116
The code generator follows the recursive nature of the PERL compatible regular
117
expressions. The basic blocks of regular expressions are condition checkers
118
whose execute different commands depending on the result of the condition check.
119
The relationship between the operators can be horizontal (concatenation) and
120
vertical (sub-expression) (See struct backtrack_common for more details).
121
122
  'ab' - 'a' and 'b' regexps are concatenated
123
  'a+' - 'a' is the sub-expression of the '+' operator
124
125
The condition checkers are boolean (true/false) checkers. Machine code is generated
126
for the checker itself and for the actions depending on the result of the checker.
127
The 'true' case is called as the matching path (expected path), and the other is called as
128
the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
129
branches on the matching path.
130
131
 Greedy star operator (*) :
132
   Matching path: match happens.
133
   Backtrack path: match failed.
134
 Non-greedy star operator (*?) :
135
   Matching path: no need to perform a match.
136
   Backtrack path: match is required.
137
138
The following example shows how the code generated for a capturing bracket
139
with two alternatives. Let A, B, C, D are arbirary regular expressions, and
140
we have the following regular expression:
141
142
   A(B|C)D
143
144
The generated code will be the following:
145
146
 A matching path
147
 '(' matching path (pushing arguments to the stack)
148
 B matching path
149
 ')' matching path (pushing arguments to the stack)
150
 D matching path
151
 return with successful match
152
153
 D backtrack path
154
 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
155
 B backtrack path
156
 C expected path
157
 jump to D matching path
158
 C backtrack path
159
 A backtrack path
160
161
 Notice, that the order of backtrack code paths are the opposite of the fast
162
 code paths. In this way the topmost value on the stack is always belong
163
 to the current backtrack code path. The backtrack path must check
164
 whether there is a next alternative. If so, it needs to jump back to
165
 the matching path eventually. Otherwise it needs to clear out its own stack
166
 frame and continue the execution on the backtrack code paths.
167
*/
168
169
/*
170
Saved stack frames:
171
172
Atomic blocks and asserts require reloading the values of private data
173
when the backtrack mechanism performed. Because of OP_RECURSE, the data
174
are not necessarly known in compile time, thus we need a dynamic restore
175
mechanism.
176
177
The stack frames are stored in a chain list, and have the following format:
178
([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
179
180
Thus we can restore the private data to a particular point in the stack.
181
*/
182
183
typedef struct jit_arguments {
184
  /* Pointers first. */
185
  struct sljit_stack *stack;
186
  PCRE2_SPTR str;
187
  PCRE2_SPTR begin;
188
  PCRE2_SPTR end;
189
  pcre2_match_data *match_data;
190
  PCRE2_SPTR startchar_ptr;
191
  PCRE2_UCHAR *mark_ptr;
192
  int (*callout)(pcre2_callout_block *, void *);
193
  void *callout_data;
194
  /* Everything else after. */
195
  sljit_uw offset_limit;
196
  sljit_u32 limit_match;
197
  sljit_u32 oveccount;
198
  sljit_u32 options;
199
} jit_arguments;
200
201
#define JIT_NUMBER_OF_COMPILE_MODES 3
202
203
typedef struct executable_functions {
204
  void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
205
  void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
206
  sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
207
  sljit_u32 top_bracket;
208
  sljit_u32 limit_match;
209
} executable_functions;
210
211
typedef struct jump_list {
212
  struct sljit_jump *jump;
213
  struct jump_list *next;
214
} jump_list;
215
216
typedef struct stub_list {
217
  struct sljit_jump *start;
218
  struct sljit_label *quit;
219
  struct stub_list *next;
220
} stub_list;
221
222
enum frame_types {
223
  no_frame = -1,
224
  no_stack = -2
225
};
226
227
enum control_types {
228
  type_mark = 0,
229
  type_then_trap = 1
230
};
231
232
enum  early_fail_types {
233
  type_skip = 0,
234
  type_fail = 1,
235
  type_fail_range = 2
236
};
237
238
typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
239
240
/* The following structure is the key data type for the recursive
241
code generator. It is allocated by compile_matchingpath, and contains
242
the arguments for compile_backtrackingpath. Must be the first member
243
of its descendants. */
244
typedef struct backtrack_common {
245
  /* Backtracking path of an opcode, which falls back
246
     to our opcode, if it cannot resume matching. */
247
  struct backtrack_common *prev;
248
  /* Backtracks for opcodes without backtracking path.
249
     These opcodes are between 'prev' and the current
250
     opcode, and they never resume the match. */
251
  jump_list *simple_backtracks;
252
  /* Internal backtracking list for block constructs
253
     which contains other opcodes, such as brackets,
254
     asserts, conditionals, etc. */
255
  struct backtrack_common *top;
256
  /* Backtracks used internally by the opcode. For component
257
     opcodes, this list is also used by those opcodes without
258
     backtracking path which follows the 'top' backtrack. */
259
  jump_list *own_backtracks;
260
  /* Opcode pointer. */
261
  PCRE2_SPTR cc;
262
} backtrack_common;
263
264
typedef struct assert_backtrack {
265
  backtrack_common common;
266
  jump_list *condfailed;
267
  /* Less than 0 if a frame is not needed. */
268
  int framesize;
269
  /* Points to our private memory word on the stack. */
270
  int private_data_ptr;
271
  /* For iterators. */
272
  struct sljit_label *matchingpath;
273
} assert_backtrack;
274
275
typedef struct bracket_backtrack {
276
  backtrack_common common;
277
  /* Where to coninue if an alternative is successfully matched. */
278
  struct sljit_label *alternative_matchingpath;
279
  /* For rmin and rmax iterators. */
280
  struct sljit_label *recursive_matchingpath;
281
  /* For greedy ? operator. */
282
  struct sljit_label *zero_matchingpath;
283
  /* Contains the branches of a failed condition. */
284
  union {
285
    /* Both for OP_COND, OP_SCOND. */
286
    jump_list *condfailed;
287
    assert_backtrack *assert;
288
    /* For OP_ONCE. Less than 0 if not needed. */
289
    int framesize;
290
    /* For brackets with >3 alternatives. */
291
    struct sljit_jump *matching_mov_addr;
292
  } u;
293
  /* Points to our private memory word on the stack. */
294
  int private_data_ptr;
295
} bracket_backtrack;
296
297
typedef struct bracketpos_backtrack {
298
  backtrack_common common;
299
  /* Points to our private memory word on the stack. */
300
  int private_data_ptr;
301
  /* Reverting stack is needed. */
302
  int framesize;
303
  /* Allocated stack size. */
304
  int stacksize;
305
} bracketpos_backtrack;
306
307
typedef struct braminzero_backtrack {
308
  backtrack_common common;
309
  struct sljit_label *matchingpath;
310
} braminzero_backtrack;
311
312
typedef struct char_iterator_backtrack {
313
  backtrack_common common;
314
  /* Next iteration. */
315
  struct sljit_label *matchingpath;
316
  union {
317
    jump_list *backtracks;
318
    struct {
319
      unsigned int othercasebit;
320
      PCRE2_UCHAR chr;
321
      BOOL enabled;
322
    } charpos;
323
  } u;
324
} char_iterator_backtrack;
325
326
typedef struct ref_iterator_backtrack {
327
  backtrack_common common;
328
  /* Next iteration. */
329
  struct sljit_label *matchingpath;
330
} ref_iterator_backtrack;
331
332
typedef struct recurse_entry {
333
  struct recurse_entry *next;
334
  /* Contains the function entry label. */
335
  struct sljit_label *entry_label;
336
  /* Contains the function entry label. */
337
  struct sljit_label *backtrack_label;
338
  /* Collects the entry calls until the function is not created. */
339
  jump_list *entry_calls;
340
  /* Collects the backtrack calls until the function is not created. */
341
  jump_list *backtrack_calls;
342
  /* Points to the starting opcode. */
343
  sljit_sw start;
344
} recurse_entry;
345
346
typedef struct recurse_backtrack {
347
  backtrack_common common;
348
  /* Return to the matching path. */
349
  struct sljit_label *matchingpath;
350
  /* Recursive pattern. */
351
  recurse_entry *entry;
352
  /* Pattern is inlined. */
353
  BOOL inlined_pattern;
354
} recurse_backtrack;
355
356
typedef struct vreverse_backtrack {
357
  backtrack_common common;
358
  /* Return to the matching path. */
359
  struct sljit_label *matchingpath;
360
} vreverse_backtrack;
361
362
#define OP_THEN_TRAP OP_TABLE_LENGTH
363
364
typedef struct then_trap_backtrack {
365
  backtrack_common common;
366
  /* If then_trap is not NULL, this structure contains the real
367
  then_trap for the backtracking path. */
368
  struct then_trap_backtrack *then_trap;
369
  /* Points to the starting opcode. */
370
  sljit_sw start;
371
  /* Exit point for the then opcodes of this alternative. */
372
  jump_list *quit;
373
  /* Frame size of the current alternative. */
374
  int framesize;
375
} then_trap_backtrack;
376
377
#define MAX_N_CHARS 12
378
#define MAX_DIFF_CHARS 5
379
380
typedef struct fast_forward_char_data {
381
  /* Number of characters in the chars array, 255 for any character. */
382
  sljit_u8 count;
383
  /* Number of last UTF-8 characters in the chars array. */
384
  sljit_u8 last_count;
385
  /* Available characters in the current position. */
386
  PCRE2_UCHAR chars[MAX_DIFF_CHARS];
387
} fast_forward_char_data;
388
389
#define MAX_CLASS_RANGE_SIZE 4
390
#define MAX_CLASS_CHARS_SIZE 3
391
392
typedef struct compiler_common {
393
  /* The sljit ceneric compiler. */
394
  struct sljit_compiler *compiler;
395
  /* Compiled regular expression. */
396
  pcre2_real_code *re;
397
  /* First byte code. */
398
  PCRE2_SPTR start;
399
  /* Maps private data offset to each opcode. */
400
  sljit_s32 *private_data_ptrs;
401
  /* Chain list of read-only data ptrs. */
402
  void *read_only_data_head;
403
  /* Tells whether the capturing bracket is optimized. */
404
  sljit_u8 *optimized_cbracket;
405
  /* Tells whether the starting offset is a target of then. */
406
  sljit_u8 *then_offsets;
407
  /* Current position where a THEN must jump. */
408
  then_trap_backtrack *then_trap;
409
  /* Starting offset of private data for capturing brackets. */
410
  sljit_s32 cbra_ptr;
411
  /* Output vector starting point. Must be divisible by 2. */
412
  sljit_s32 ovector_start;
413
  /* Points to the starting character of the current match. */
414
  sljit_s32 start_ptr;
415
  /* Last known position of the requested byte. */
416
  sljit_s32 req_char_ptr;
417
  /* Head of the last recursion. */
418
  sljit_s32 recursive_head_ptr;
419
  /* First inspected character for partial matching.
420
     (Needed for avoiding zero length partial matches.) */
421
  sljit_s32 start_used_ptr;
422
  /* Starting pointer for partial soft matches. */
423
  sljit_s32 hit_start;
424
  /* Pointer of the match end position. */
425
  sljit_s32 match_end_ptr;
426
  /* Points to the marked string. */
427
  sljit_s32 mark_ptr;
428
  /* Head of the recursive control verb management chain.
429
     Each item must have a previous offset and type
430
     (see control_types) values. See do_search_mark. */
431
  sljit_s32 control_head_ptr;
432
  /* Points to the last matched capture block index. */
433
  sljit_s32 capture_last_ptr;
434
  /* Fast forward skipping byte code pointer. */
435
  PCRE2_SPTR fast_forward_bc_ptr;
436
  /* Locals used by fast fail optimization. */
437
  sljit_s32 early_fail_start_ptr;
438
  sljit_s32 early_fail_end_ptr;
439
  /* Variables used by recursive call generator. */
440
  sljit_s32 recurse_bitset_size;
441
  uint8_t *recurse_bitset;
442
443
  /* Flipped and lower case tables. */
444
  const sljit_u8 *fcc;
445
  sljit_sw lcc;
446
  /* Mode can be PCRE2_JIT_COMPLETE and others. */
447
  int mode;
448
  /* TRUE, when empty match is accepted for partial matching. */
449
  BOOL allow_empty_partial;
450
  /* TRUE, when minlength is greater than 0. */
451
  BOOL might_be_empty;
452
  /* \K is found in the pattern. */
453
  BOOL has_set_som;
454
  /* (*SKIP:arg) is found in the pattern. */
455
  BOOL has_skip_arg;
456
  /* (*THEN) is found in the pattern. */
457
  BOOL has_then;
458
  /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
459
  BOOL has_skip_in_assert_back;
460
  /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
461
  BOOL local_quit_available;
462
  /* Currently in a positive assertion. */
463
  BOOL in_positive_assertion;
464
  /* Newline control. */
465
  int nltype;
466
  sljit_u32 nlmax;
467
  sljit_u32 nlmin;
468
  int newline;
469
  int bsr_nltype;
470
  sljit_u32 bsr_nlmax;
471
  sljit_u32 bsr_nlmin;
472
  /* Dollar endonly. */
473
  int endonly;
474
  /* Tables. */
475
  sljit_sw ctypes;
476
  /* Named capturing brackets. */
477
  PCRE2_SPTR name_table;
478
  sljit_sw name_count;
479
  sljit_sw name_entry_size;
480
481
  /* Labels and jump lists. */
482
  struct sljit_label *partialmatchlabel;
483
  struct sljit_label *quit_label;
484
  struct sljit_label *abort_label;
485
  struct sljit_label *accept_label;
486
  struct sljit_label *ff_newline_shortcut;
487
  stub_list *stubs;
488
  recurse_entry *entries;
489
  recurse_entry *currententry;
490
  jump_list *partialmatch;
491
  jump_list *quit;
492
  jump_list *positive_assertion_quit;
493
  jump_list *abort;
494
  jump_list *failed_match;
495
  jump_list *accept;
496
  jump_list *calllimit;
497
  jump_list *stackalloc;
498
  jump_list *revertframes;
499
  jump_list *wordboundary;
500
  jump_list *ucp_wordboundary;
501
  jump_list *anynewline;
502
  jump_list *hspace;
503
  jump_list *vspace;
504
  jump_list *casefulcmp;
505
  jump_list *caselesscmp;
506
  jump_list *reset_match;
507
  /* Same as reset_match, but resets the STR_PTR as well. */
508
  jump_list *restart_match;
509
  BOOL unset_backref;
510
  BOOL alt_circumflex;
511
#ifdef SUPPORT_UNICODE
512
  BOOL utf;
513
  BOOL invalid_utf;
514
  BOOL ucp;
515
  /* Points to saving area for iref. */
516
  sljit_s32 iref_ptr;
517
  jump_list *getucd;
518
  jump_list *getucdtype;
519
#if PCRE2_CODE_UNIT_WIDTH == 8
520
  jump_list *utfreadchar;
521
  jump_list *utfreadtype8;
522
  jump_list *utfpeakcharback;
523
#endif
524
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
525
  jump_list *utfreadchar_invalid;
526
  jump_list *utfreadnewline_invalid;
527
  jump_list *utfmoveback_invalid;
528
  jump_list *utfpeakcharback_invalid;
529
#endif
530
#endif /* SUPPORT_UNICODE */
531
} compiler_common;
532
533
/* For byte_sequence_compare. */
534
535
typedef struct compare_context {
536
  int length;
537
  int sourcereg;
538
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
539
  int ucharptr;
540
  union {
541
    sljit_s32 asint;
542
    sljit_u16 asushort;
543
#if PCRE2_CODE_UNIT_WIDTH == 8
544
    sljit_u8 asbyte;
545
    sljit_u8 asuchars[4];
546
#elif PCRE2_CODE_UNIT_WIDTH == 16
547
    sljit_u16 asuchars[2];
548
#elif PCRE2_CODE_UNIT_WIDTH == 32
549
    sljit_u32 asuchars[1];
550
#endif
551
  } c;
552
  union {
553
    sljit_s32 asint;
554
    sljit_u16 asushort;
555
#if PCRE2_CODE_UNIT_WIDTH == 8
556
    sljit_u8 asbyte;
557
    sljit_u8 asuchars[4];
558
#elif PCRE2_CODE_UNIT_WIDTH == 16
559
    sljit_u16 asuchars[2];
560
#elif PCRE2_CODE_UNIT_WIDTH == 32
561
    sljit_u32 asuchars[1];
562
#endif
563
  } oc;
564
#endif
565
} compare_context;
566
567
/* Undefine sljit macros. */
568
#undef CMP
569
570
/* Used for accessing the elements of the stack. */
571
#define STACK(i)      ((i) * SSIZE_OF(sw))
572
573
#ifdef SLJIT_PREF_SHIFT_REG
574
#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
575
/* Nothing. */
576
#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
577
#define SHIFT_REG_IS_R3
578
#else
579
#error "Unsupported shift register"
580
#endif
581
#endif
582
583
#define TMP1          SLJIT_R0
584
#ifdef SHIFT_REG_IS_R3
585
#define TMP2          SLJIT_R3
586
#define TMP3          SLJIT_R2
587
#else
588
#define TMP2          SLJIT_R2
589
#define TMP3          SLJIT_R3
590
#endif
591
#define STR_PTR       SLJIT_R1
592
#define STR_END       SLJIT_S0
593
#define STACK_TOP     SLJIT_S1
594
#define STACK_LIMIT   SLJIT_S2
595
#define COUNT_MATCH   SLJIT_S3
596
#define ARGUMENTS     SLJIT_S4
597
#define RETURN_ADDR   SLJIT_R4
598
599
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
600
#define HAS_VIRTUAL_REGISTERS 1
601
#else
602
#define HAS_VIRTUAL_REGISTERS 0
603
#endif
604
605
/* Local space layout. */
606
/* These two locals can be used by the current opcode. */
607
#define LOCALS0          (0 * sizeof(sljit_sw))
608
#define LOCALS1          (1 * sizeof(sljit_sw))
609
/* Two local variables for possessive quantifiers (char1 cannot use them). */
610
#define POSSESSIVE0      (2 * sizeof(sljit_sw))
611
#define POSSESSIVE1      (3 * sizeof(sljit_sw))
612
/* Max limit of recursions. */
613
#define LIMIT_MATCH      (4 * sizeof(sljit_sw))
614
/* The output vector is stored on the stack, and contains pointers
615
to characters. The vector data is divided into two groups: the first
616
group contains the start / end character pointers, and the second is
617
the start pointers when the end of the capturing group has not yet reached. */
618
#define OVECTOR_START    (common->ovector_start)
619
#define OVECTOR(i)       (OVECTOR_START + (i) * SSIZE_OF(sw))
620
#define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * SSIZE_OF(sw))
621
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
622
623
#if PCRE2_CODE_UNIT_WIDTH == 8
624
#define MOV_UCHAR  SLJIT_MOV_U8
625
#define IN_UCHARS(x) (x)
626
#elif PCRE2_CODE_UNIT_WIDTH == 16
627
#define MOV_UCHAR  SLJIT_MOV_U16
628
#define UCHAR_SHIFT (1)
629
#define IN_UCHARS(x) ((x) * 2)
630
#elif PCRE2_CODE_UNIT_WIDTH == 32
631
#define MOV_UCHAR  SLJIT_MOV_U32
632
#define UCHAR_SHIFT (2)
633
#define IN_UCHARS(x) ((x) * 4)
634
#else
635
#error Unsupported compiling mode
636
#endif
637
638
/* Shortcuts. */
639
#define DEFINE_COMPILER \
640
  struct sljit_compiler *compiler = common->compiler
641
#define OP1(op, dst, dstw, src, srcw) \
642
  sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
643
#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
644
  sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
645
#define OP2U(op, src1, src1w, src2, src2w) \
646
  sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
647
#define OP_SRC(op, src, srcw) \
648
  sljit_emit_op_src(compiler, (op), (src), (srcw))
649
#define LABEL() \
650
  sljit_emit_label(compiler)
651
#define JUMP(type) \
652
  sljit_emit_jump(compiler, (type))
653
#define JUMPTO(type, label) \
654
  sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
655
#define JUMPHERE(jump) \
656
  sljit_set_label((jump), sljit_emit_label(compiler))
657
#define SET_LABEL(jump, label) \
658
  sljit_set_label((jump), (label))
659
#define CMP(type, src1, src1w, src2, src2w) \
660
  sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
661
#define CMPTO(type, src1, src1w, src2, src2w, label) \
662
  sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
663
#define OP_FLAGS(op, dst, dstw, type) \
664
  sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
665
#define SELECT(type, dst_reg, src1, src1w, src2_reg) \
666
  sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))
667
#define GET_LOCAL_BASE(dst, dstw, offset) \
668
  sljit_get_local_base(compiler, (dst), (dstw), (offset))
669
670
#define READ_CHAR_MAX 0x7fffffff
671
672
#define INVALID_UTF_CHAR -1
673
#define UNASSIGNED_UTF_CHAR 888
674
675
#if defined SUPPORT_UNICODE
676
#if PCRE2_CODE_UNIT_WIDTH == 8
677
678
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
679
  { \
680
  if (ptr[0] <= 0x7f) \
681
    c = *ptr++; \
682
  else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
683
    { \
684
    c = ptr[1] - 0x80; \
685
    \
686
    if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
687
      { \
688
      c |= (ptr[0] - 0xc0) << 6; \
689
      ptr += 2; \
690
      } \
691
    else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
692
      { \
693
      c = c << 6 | (ptr[2] - 0x80); \
694
      \
695
      if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
696
        { \
697
        c |= (ptr[0] - 0xe0) << 12; \
698
        ptr += 3; \
699
        \
700
        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
701
          { \
702
          invalid_action; \
703
          } \
704
        } \
705
      else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
706
        { \
707
        c = c << 6 | (ptr[3] - 0x80); \
708
        \
709
        if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
710
          { \
711
          c |= (ptr[0] - 0xf0) << 18; \
712
          ptr += 4; \
713
          \
714
          if (c >= 0x110000 || c < 0x10000) \
715
            { \
716
            invalid_action; \
717
            } \
718
          } \
719
        else \
720
          { \
721
          invalid_action; \
722
          } \
723
        } \
724
      else \
725
        { \
726
        invalid_action; \
727
        } \
728
      } \
729
    else \
730
      { \
731
      invalid_action; \
732
      } \
733
    } \
734
  else \
735
    { \
736
    invalid_action; \
737
    } \
738
  }
739
740
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
741
  { \
742
  c = ptr[-1]; \
743
  if (c <= 0x7f) \
744
    ptr--; \
745
  else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
746
    { \
747
    c -= 0x80; \
748
    \
749
    if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
750
      { \
751
      c |= (ptr[-2] - 0xc0) << 6; \
752
      ptr -= 2; \
753
      } \
754
    else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
755
      { \
756
      c = c << 6 | (ptr[-2] - 0x80); \
757
      \
758
      if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
759
        { \
760
        c |= (ptr[-3] - 0xe0) << 12; \
761
        ptr -= 3; \
762
        \
763
        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
764
          { \
765
          invalid_action; \
766
          } \
767
        } \
768
      else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
769
        { \
770
        c = c << 6 | (ptr[-3] - 0x80); \
771
        \
772
        if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
773
          { \
774
          c |= (ptr[-4] - 0xf0) << 18; \
775
          ptr -= 4; \
776
          \
777
          if (c >= 0x110000 || c < 0x10000) \
778
            { \
779
            invalid_action; \
780
            } \
781
          } \
782
        else \
783
          { \
784
          invalid_action; \
785
          } \
786
        } \
787
      else \
788
        { \
789
        invalid_action; \
790
        } \
791
      } \
792
    else \
793
      { \
794
      invalid_action; \
795
      } \
796
    } \
797
  else \
798
    { \
799
    invalid_action; \
800
    } \
801
  }
802
803
#elif PCRE2_CODE_UNIT_WIDTH == 16
804
805
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
806
  { \
807
  if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
808
    c = *ptr++; \
809
  else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
810
    { \
811
    c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
812
    ptr += 2; \
813
    } \
814
  else \
815
    { \
816
    invalid_action; \
817
    } \
818
  }
819
820
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
821
  { \
822
  c = ptr[-1]; \
823
  if (c < 0xd800 || c >= 0xe000) \
824
    ptr--; \
825
  else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
826
    { \
827
    c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
828
    ptr -= 2; \
829
    } \
830
  else \
831
    { \
832
    invalid_action; \
833
    } \
834
  }
835
836
837
#elif PCRE2_CODE_UNIT_WIDTH == 32
838
839
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
840
  { \
841
  if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
842
    c = *ptr++; \
843
  else \
844
    { \
845
    invalid_action; \
846
    } \
847
  }
848
849
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
850
  { \
851
  c = ptr[-1]; \
852
  if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
853
    ptr--; \
854
  else \
855
    { \
856
    invalid_action; \
857
    } \
858
  }
859
860
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
861
#endif /* SUPPORT_UNICODE */
862
863
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
864
{
865
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
866
do cc += GET(cc, 1); while (*cc == OP_ALT);
867
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
868
cc += 1 + LINK_SIZE;
869
return cc;
870
}
871
872
static int no_alternatives(PCRE2_SPTR cc)
873
{
874
int count = 0;
875
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
876
do
877
  {
878
  cc += GET(cc, 1);
879
  count++;
880
  }
881
while (*cc == OP_ALT);
882
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
883
return count;
884
}
885
886
static BOOL find_vreverse(PCRE2_SPTR cc)
887
{
888
  SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT ||  *cc == OP_ASSERTBACK_NA);
889
890
  do
891
    {
892
    if (cc[1 + LINK_SIZE] == OP_VREVERSE)
893
      return TRUE;
894
    cc += GET(cc, 1);
895
    }
896
  while (*cc == OP_ALT);
897
898
  return FALSE;
899
}
900
901
/* Functions whose might need modification for all new supported opcodes:
902
 next_opcode
903
 check_opcode_types
904
 set_private_data_ptrs
905
 get_framesize
906
 init_frame
907
 get_recurse_data_length
908
 copy_recurse_data
909
 compile_matchingpath
910
 compile_backtrackingpath
911
*/
912
913
static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
914
{
915
SLJIT_UNUSED_ARG(common);
916
switch(*cc)
917
  {
918
  case OP_SOD:
919
  case OP_SOM:
920
  case OP_SET_SOM:
921
  case OP_NOT_WORD_BOUNDARY:
922
  case OP_WORD_BOUNDARY:
923
  case OP_NOT_DIGIT:
924
  case OP_DIGIT:
925
  case OP_NOT_WHITESPACE:
926
  case OP_WHITESPACE:
927
  case OP_NOT_WORDCHAR:
928
  case OP_WORDCHAR:
929
  case OP_ANY:
930
  case OP_ALLANY:
931
  case OP_NOTPROP:
932
  case OP_PROP:
933
  case OP_ANYNL:
934
  case OP_NOT_HSPACE:
935
  case OP_HSPACE:
936
  case OP_NOT_VSPACE:
937
  case OP_VSPACE:
938
  case OP_EXTUNI:
939
  case OP_EODN:
940
  case OP_EOD:
941
  case OP_CIRC:
942
  case OP_CIRCM:
943
  case OP_DOLL:
944
  case OP_DOLLM:
945
  case OP_CRSTAR:
946
  case OP_CRMINSTAR:
947
  case OP_CRPLUS:
948
  case OP_CRMINPLUS:
949
  case OP_CRQUERY:
950
  case OP_CRMINQUERY:
951
  case OP_CRRANGE:
952
  case OP_CRMINRANGE:
953
  case OP_CRPOSSTAR:
954
  case OP_CRPOSPLUS:
955
  case OP_CRPOSQUERY:
956
  case OP_CRPOSRANGE:
957
  case OP_CLASS:
958
  case OP_NCLASS:
959
  case OP_REF:
960
  case OP_REFI:
961
  case OP_DNREF:
962
  case OP_DNREFI:
963
  case OP_RECURSE:
964
  case OP_CALLOUT:
965
  case OP_ALT:
966
  case OP_KET:
967
  case OP_KETRMAX:
968
  case OP_KETRMIN:
969
  case OP_KETRPOS:
970
  case OP_REVERSE:
971
  case OP_VREVERSE:
972
  case OP_ASSERT:
973
  case OP_ASSERT_NOT:
974
  case OP_ASSERTBACK:
975
  case OP_ASSERTBACK_NOT:
976
  case OP_ASSERT_NA:
977
  case OP_ASSERTBACK_NA:
978
  case OP_ONCE:
979
  case OP_SCRIPT_RUN:
980
  case OP_BRA:
981
  case OP_BRAPOS:
982
  case OP_CBRA:
983
  case OP_CBRAPOS:
984
  case OP_COND:
985
  case OP_SBRA:
986
  case OP_SBRAPOS:
987
  case OP_SCBRA:
988
  case OP_SCBRAPOS:
989
  case OP_SCOND:
990
  case OP_CREF:
991
  case OP_DNCREF:
992
  case OP_RREF:
993
  case OP_DNRREF:
994
  case OP_FALSE:
995
  case OP_TRUE:
996
  case OP_BRAZERO:
997
  case OP_BRAMINZERO:
998
  case OP_BRAPOSZERO:
999
  case OP_PRUNE:
1000
  case OP_SKIP:
1001
  case OP_THEN:
1002
  case OP_COMMIT:
1003
  case OP_FAIL:
1004
  case OP_ACCEPT:
1005
  case OP_ASSERT_ACCEPT:
1006
  case OP_CLOSE:
1007
  case OP_SKIPZERO:
1008
  case OP_NOT_UCP_WORD_BOUNDARY:
1009
  case OP_UCP_WORD_BOUNDARY:
1010
  return cc + PRIV(OP_lengths)[*cc];
1011
1012
  case OP_CHAR:
1013
  case OP_CHARI:
1014
  case OP_NOT:
1015
  case OP_NOTI:
1016
  case OP_STAR:
1017
  case OP_MINSTAR:
1018
  case OP_PLUS:
1019
  case OP_MINPLUS:
1020
  case OP_QUERY:
1021
  case OP_MINQUERY:
1022
  case OP_UPTO:
1023
  case OP_MINUPTO:
1024
  case OP_EXACT:
1025
  case OP_POSSTAR:
1026
  case OP_POSPLUS:
1027
  case OP_POSQUERY:
1028
  case OP_POSUPTO:
1029
  case OP_STARI:
1030
  case OP_MINSTARI:
1031
  case OP_PLUSI:
1032
  case OP_MINPLUSI:
1033
  case OP_QUERYI:
1034
  case OP_MINQUERYI:
1035
  case OP_UPTOI:
1036
  case OP_MINUPTOI:
1037
  case OP_EXACTI:
1038
  case OP_POSSTARI:
1039
  case OP_POSPLUSI:
1040
  case OP_POSQUERYI:
1041
  case OP_POSUPTOI:
1042
  case OP_NOTSTAR:
1043
  case OP_NOTMINSTAR:
1044
  case OP_NOTPLUS:
1045
  case OP_NOTMINPLUS:
1046
  case OP_NOTQUERY:
1047
  case OP_NOTMINQUERY:
1048
  case OP_NOTUPTO:
1049
  case OP_NOTMINUPTO:
1050
  case OP_NOTEXACT:
1051
  case OP_NOTPOSSTAR:
1052
  case OP_NOTPOSPLUS:
1053
  case OP_NOTPOSQUERY:
1054
  case OP_NOTPOSUPTO:
1055
  case OP_NOTSTARI:
1056
  case OP_NOTMINSTARI:
1057
  case OP_NOTPLUSI:
1058
  case OP_NOTMINPLUSI:
1059
  case OP_NOTQUERYI:
1060
  case OP_NOTMINQUERYI:
1061
  case OP_NOTUPTOI:
1062
  case OP_NOTMINUPTOI:
1063
  case OP_NOTEXACTI:
1064
  case OP_NOTPOSSTARI:
1065
  case OP_NOTPOSPLUSI:
1066
  case OP_NOTPOSQUERYI:
1067
  case OP_NOTPOSUPTOI:
1068
  cc += PRIV(OP_lengths)[*cc];
1069
#ifdef SUPPORT_UNICODE
1070
  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1071
#endif
1072
  return cc;
1073
1074
  /* Special cases. */
1075
  case OP_TYPESTAR:
1076
  case OP_TYPEMINSTAR:
1077
  case OP_TYPEPLUS:
1078
  case OP_TYPEMINPLUS:
1079
  case OP_TYPEQUERY:
1080
  case OP_TYPEMINQUERY:
1081
  case OP_TYPEUPTO:
1082
  case OP_TYPEMINUPTO:
1083
  case OP_TYPEEXACT:
1084
  case OP_TYPEPOSSTAR:
1085
  case OP_TYPEPOSPLUS:
1086
  case OP_TYPEPOSQUERY:
1087
  case OP_TYPEPOSUPTO:
1088
  return cc + PRIV(OP_lengths)[*cc] - 1;
1089
1090
  case OP_ANYBYTE:
1091
#ifdef SUPPORT_UNICODE
1092
  if (common->utf) return NULL;
1093
#endif
1094
  return cc + 1;
1095
1096
  case OP_CALLOUT_STR:
1097
  return cc + GET(cc, 1 + 2*LINK_SIZE);
1098
1099
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1100
  case OP_XCLASS:
1101
  return cc + GET(cc, 1);
1102
#endif
1103
1104
  case OP_MARK:
1105
  case OP_COMMIT_ARG:
1106
  case OP_PRUNE_ARG:
1107
  case OP_SKIP_ARG:
1108
  case OP_THEN_ARG:
1109
  return cc + 1 + 2 + cc[1];
1110
1111
  default:
1112
  SLJIT_UNREACHABLE();
1113
  return NULL;
1114
  }
1115
}
1116
1117
static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1118
{
1119
int count;
1120
PCRE2_SPTR slot;
1121
PCRE2_SPTR assert_back_end = cc - 1;
1122
PCRE2_SPTR assert_na_end = cc - 1;
1123
1124
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1125
while (cc < ccend)
1126
  {
1127
  switch(*cc)
1128
    {
1129
    case OP_SET_SOM:
1130
    common->has_set_som = TRUE;
1131
    common->might_be_empty = TRUE;
1132
    cc += 1;
1133
    break;
1134
1135
    case OP_REFI:
1136
#ifdef SUPPORT_UNICODE
1137
    if (common->iref_ptr == 0)
1138
      {
1139
      common->iref_ptr = common->ovector_start;
1140
      common->ovector_start += 3 * sizeof(sljit_sw);
1141
      }
1142
#endif /* SUPPORT_UNICODE */
1143
    /* Fall through. */
1144
    case OP_REF:
1145
    common->optimized_cbracket[GET2(cc, 1)] = 0;
1146
    cc += 1 + IMM2_SIZE;
1147
    break;
1148
1149
    case OP_ASSERT_NA:
1150
    case OP_ASSERTBACK_NA:
1151
    slot = bracketend(cc);
1152
    if (slot > assert_na_end)
1153
      assert_na_end = slot;
1154
    cc += 1 + LINK_SIZE;
1155
    break;
1156
1157
    case OP_CBRAPOS:
1158
    case OP_SCBRAPOS:
1159
    common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1160
    cc += 1 + LINK_SIZE + IMM2_SIZE;
1161
    break;
1162
1163
    case OP_COND:
1164
    case OP_SCOND:
1165
    /* Only AUTO_CALLOUT can insert this opcode. We do
1166
       not intend to support this case. */
1167
    if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1168
      return FALSE;
1169
    cc += 1 + LINK_SIZE;
1170
    break;
1171
1172
    case OP_CREF:
1173
    common->optimized_cbracket[GET2(cc, 1)] = 0;
1174
    cc += 1 + IMM2_SIZE;
1175
    break;
1176
1177
    case OP_DNREF:
1178
    case OP_DNREFI:
1179
    case OP_DNCREF:
1180
    count = GET2(cc, 1 + IMM2_SIZE);
1181
    slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1182
    while (count-- > 0)
1183
      {
1184
      common->optimized_cbracket[GET2(slot, 0)] = 0;
1185
      slot += common->name_entry_size;
1186
      }
1187
    cc += 1 + 2 * IMM2_SIZE;
1188
    break;
1189
1190
    case OP_RECURSE:
1191
    /* Set its value only once. */
1192
    if (common->recursive_head_ptr == 0)
1193
      {
1194
      common->recursive_head_ptr = common->ovector_start;
1195
      common->ovector_start += sizeof(sljit_sw);
1196
      }
1197
    cc += 1 + LINK_SIZE;
1198
    break;
1199
1200
    case OP_CALLOUT:
1201
    case OP_CALLOUT_STR:
1202
    if (common->capture_last_ptr == 0)
1203
      {
1204
      common->capture_last_ptr = common->ovector_start;
1205
      common->ovector_start += sizeof(sljit_sw);
1206
      }
1207
    cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1208
    break;
1209
1210
    case OP_ASSERTBACK:
1211
    slot = bracketend(cc);
1212
    if (slot > assert_back_end)
1213
      assert_back_end = slot;
1214
    cc += 1 + LINK_SIZE;
1215
    break;
1216
1217
    case OP_THEN_ARG:
1218
    common->has_then = TRUE;
1219
    common->control_head_ptr = 1;
1220
    /* Fall through. */
1221
1222
    case OP_COMMIT_ARG:
1223
    case OP_PRUNE_ARG:
1224
    if (cc < assert_na_end)
1225
      return FALSE;
1226
    /* Fall through */
1227
    case OP_MARK:
1228
    if (common->mark_ptr == 0)
1229
      {
1230
      common->mark_ptr = common->ovector_start;
1231
      common->ovector_start += sizeof(sljit_sw);
1232
      }
1233
    cc += 1 + 2 + cc[1];
1234
    break;
1235
1236
    case OP_THEN:
1237
    common->has_then = TRUE;
1238
    common->control_head_ptr = 1;
1239
    cc += 1;
1240
    break;
1241
1242
    case OP_SKIP:
1243
    if (cc < assert_back_end)
1244
      common->has_skip_in_assert_back = TRUE;
1245
    if (cc < assert_na_end)
1246
      return FALSE;
1247
    cc += 1;
1248
    break;
1249
1250
    case OP_SKIP_ARG:
1251
    common->control_head_ptr = 1;
1252
    common->has_skip_arg = TRUE;
1253
    if (cc < assert_back_end)
1254
      common->has_skip_in_assert_back = TRUE;
1255
    if (cc < assert_na_end)
1256
      return FALSE;
1257
    cc += 1 + 2 + cc[1];
1258
    break;
1259
1260
    case OP_PRUNE:
1261
    case OP_COMMIT:
1262
    case OP_ASSERT_ACCEPT:
1263
    if (cc < assert_na_end)
1264
      return FALSE;
1265
    cc++;
1266
    break;
1267
1268
    default:
1269
    cc = next_opcode(common, cc);
1270
    if (cc == NULL)
1271
      return FALSE;
1272
    break;
1273
    }
1274
  }
1275
return TRUE;
1276
}
1277
1278
#define EARLY_FAIL_ENHANCE_MAX (3 + 3)
1279
1280
/*
1281
  Start represent the number of allowed early fail enhancements
1282
1283
  The 0-2 values has a special meaning:
1284
    0 - skip is allowed for all iterators
1285
    1 - fail is allowed for all iterators
1286
    2 - fail is allowed for greedy iterators
1287
    3 - only ranged early fail is allowed
1288
  >3 - (start - 3) number of remaining ranged early fails allowed
1289
1290
return: the updated value of start
1291
*/
1292
static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,
1293
   int *private_data_start, sljit_s32 depth, int start)
1294
{
1295
PCRE2_SPTR begin = cc;
1296
PCRE2_SPTR next_alt;
1297
PCRE2_SPTR end;
1298
PCRE2_SPTR accelerated_start;
1299
int result = 0;
1300
int count, prev_count;
1301
1302
SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1303
SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1304
SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1305
1306
next_alt = cc + GET(cc, 1);
1307
if (*next_alt == OP_ALT && start < 1)
1308
  start = 1;
1309
1310
do
1311
  {
1312
  count = start;
1313
  cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1314
1315
  while (TRUE)
1316
    {
1317
    accelerated_start = NULL;
1318
1319
    switch(*cc)
1320
      {
1321
      case OP_SOD:
1322
      case OP_SOM:
1323
      case OP_SET_SOM:
1324
      case OP_NOT_WORD_BOUNDARY:
1325
      case OP_WORD_BOUNDARY:
1326
      case OP_EODN:
1327
      case OP_EOD:
1328
      case OP_CIRC:
1329
      case OP_CIRCM:
1330
      case OP_DOLL:
1331
      case OP_DOLLM:
1332
      case OP_NOT_UCP_WORD_BOUNDARY:
1333
      case OP_UCP_WORD_BOUNDARY:
1334
      /* Zero width assertions. */
1335
      cc++;
1336
      continue;
1337
1338
      case OP_NOT_DIGIT:
1339
      case OP_DIGIT:
1340
      case OP_NOT_WHITESPACE:
1341
      case OP_WHITESPACE:
1342
      case OP_NOT_WORDCHAR:
1343
      case OP_WORDCHAR:
1344
      case OP_ANY:
1345
      case OP_ALLANY:
1346
      case OP_ANYBYTE:
1347
      case OP_NOT_HSPACE:
1348
      case OP_HSPACE:
1349
      case OP_NOT_VSPACE:
1350
      case OP_VSPACE:
1351
      if (count < 1)
1352
        count = 1;
1353
      cc++;
1354
      continue;
1355
1356
      case OP_ANYNL:
1357
      case OP_EXTUNI:
1358
      if (count < 3)
1359
        count = 3;
1360
      cc++;
1361
      continue;
1362
1363
      case OP_NOTPROP:
1364
      case OP_PROP:
1365
      if (count < 1)
1366
        count = 1;
1367
      cc += 1 + 2;
1368
      continue;
1369
1370
      case OP_CHAR:
1371
      case OP_CHARI:
1372
      case OP_NOT:
1373
      case OP_NOTI:
1374
      if (count < 1)
1375
        count = 1;
1376
      cc += 2;
1377
#ifdef SUPPORT_UNICODE
1378
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1379
#endif
1380
      continue;
1381
1382
      case OP_TYPEMINSTAR:
1383
      case OP_TYPEMINPLUS:
1384
      if (count == 2)
1385
        count = 3;
1386
      /* Fall through */
1387
1388
      case OP_TYPESTAR:
1389
      case OP_TYPEPLUS:
1390
      case OP_TYPEPOSSTAR:
1391
      case OP_TYPEPOSPLUS:
1392
      /* The type or prop opcode is skipped in the next iteration. */
1393
      cc += 1;
1394
1395
      if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1396
        {
1397
        accelerated_start = cc - 1;
1398
        break;
1399
        }
1400
1401
      if (count < 3)
1402
        count = 3;
1403
      continue;
1404
1405
      case OP_TYPEEXACT:
1406
      if (count < 1)
1407
        count = 1;
1408
      cc += 1 + IMM2_SIZE;
1409
      continue;
1410
1411
      case OP_TYPEUPTO:
1412
      case OP_TYPEMINUPTO:
1413
      case OP_TYPEPOSUPTO:
1414
      cc += IMM2_SIZE;
1415
      /* Fall through */
1416
1417
      case OP_TYPEQUERY:
1418
      case OP_TYPEMINQUERY:
1419
      case OP_TYPEPOSQUERY:
1420
      /* The type or prop opcode is skipped in the next iteration. */
1421
      if (count < 3)
1422
        count = 3;
1423
      cc += 1;
1424
      continue;
1425
1426
      case OP_MINSTAR:
1427
      case OP_MINPLUS:
1428
      case OP_MINSTARI:
1429
      case OP_MINPLUSI:
1430
      case OP_NOTMINSTAR:
1431
      case OP_NOTMINPLUS:
1432
      case OP_NOTMINSTARI:
1433
      case OP_NOTMINPLUSI:
1434
      if (count == 2)
1435
        count = 3;
1436
      /* Fall through */
1437
1438
      case OP_STAR:
1439
      case OP_PLUS:
1440
      case OP_POSSTAR:
1441
      case OP_POSPLUS:
1442
1443
      case OP_STARI:
1444
      case OP_PLUSI:
1445
      case OP_POSSTARI:
1446
      case OP_POSPLUSI:
1447
1448
      case OP_NOTSTAR:
1449
      case OP_NOTPLUS:
1450
      case OP_NOTPOSSTAR:
1451
      case OP_NOTPOSPLUS:
1452
1453
      case OP_NOTSTARI:
1454
      case OP_NOTPLUSI:
1455
      case OP_NOTPOSSTARI:
1456
      case OP_NOTPOSPLUSI:
1457
      accelerated_start = cc;
1458
      cc += 2;
1459
#ifdef SUPPORT_UNICODE
1460
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1461
#endif
1462
      break;
1463
1464
      case OP_EXACT:
1465
      if (count < 1)
1466
        count = 1;
1467
      cc += 2 + IMM2_SIZE;
1468
#ifdef SUPPORT_UNICODE
1469
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1470
#endif
1471
      continue;
1472
1473
      case OP_UPTO:
1474
      case OP_MINUPTO:
1475
      case OP_POSUPTO:
1476
      case OP_UPTOI:
1477
      case OP_MINUPTOI:
1478
      case OP_EXACTI:
1479
      case OP_POSUPTOI:
1480
      case OP_NOTUPTO:
1481
      case OP_NOTMINUPTO:
1482
      case OP_NOTEXACT:
1483
      case OP_NOTPOSUPTO:
1484
      case OP_NOTUPTOI:
1485
      case OP_NOTMINUPTOI:
1486
      case OP_NOTEXACTI:
1487
      case OP_NOTPOSUPTOI:
1488
      cc += IMM2_SIZE;
1489
      /* Fall through */
1490
1491
      case OP_QUERY:
1492
      case OP_MINQUERY:
1493
      case OP_POSQUERY:
1494
      case OP_QUERYI:
1495
      case OP_MINQUERYI:
1496
      case OP_POSQUERYI:
1497
      case OP_NOTQUERY:
1498
      case OP_NOTMINQUERY:
1499
      case OP_NOTPOSQUERY:
1500
      case OP_NOTQUERYI:
1501
      case OP_NOTMINQUERYI:
1502
      case OP_NOTPOSQUERYI:
1503
      if (count < 3)
1504
        count = 3;
1505
      cc += 2;
1506
#ifdef SUPPORT_UNICODE
1507
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1508
#endif
1509
      continue;
1510
1511
      case OP_CLASS:
1512
      case OP_NCLASS:
1513
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1514
      case OP_XCLASS:
1515
      accelerated_start = cc;
1516
      cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1517
#else
1518
      accelerated_start = cc;
1519
      cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1520
#endif
1521
1522
      switch (*cc)
1523
        {
1524
        case OP_CRMINSTAR:
1525
        case OP_CRMINPLUS:
1526
        if (count == 2)
1527
          count = 3;
1528
        /* Fall through */
1529
1530
        case OP_CRSTAR:
1531
        case OP_CRPLUS:
1532
        case OP_CRPOSSTAR:
1533
        case OP_CRPOSPLUS:
1534
        cc++;
1535
        break;
1536
1537
        case OP_CRRANGE:
1538
        case OP_CRMINRANGE:
1539
        case OP_CRPOSRANGE:
1540
        if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
1541
          {
1542
          /* Exact repeat. */
1543
          cc += 1 + 2 * IMM2_SIZE;
1544
          if (count < 1)
1545
            count = 1;
1546
          continue;
1547
          }
1548
1549
        cc += 2 * IMM2_SIZE;
1550
        /* Fall through */
1551
        case OP_CRQUERY:
1552
        case OP_CRMINQUERY:
1553
        case OP_CRPOSQUERY:
1554
        cc++;
1555
        if (count < 3)
1556
          count = 3;
1557
        continue;
1558
1559
        default:
1560
        /* No repeat. */
1561
        if (count < 1)
1562
          count = 1;
1563
        continue;
1564
        }
1565
      break;
1566
1567
      case OP_BRA:
1568
      case OP_CBRA:
1569
      prev_count = count;
1570
      if (count < 1)
1571
        count = 1;
1572
1573
      if (depth >= 4)
1574
        break;
1575
1576
      if (count < 3 && cc[GET(cc, 1)] == OP_ALT)
1577
        count = 3;
1578
1579
      end = bracketend(cc);
1580
      if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1581
        break;
1582
1583
      prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);
1584
1585
      if (prev_count > count)
1586
        count = prev_count;
1587
1588
      if (PRIVATE_DATA(cc) != 0)
1589
        common->private_data_ptrs[begin - common->start] = 1;
1590
1591
      if (count < EARLY_FAIL_ENHANCE_MAX)
1592
        {
1593
        cc = end;
1594
        continue;
1595
        }
1596
      break;
1597
1598
      case OP_KET:
1599
      SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1600
      if (cc >= next_alt)
1601
        break;
1602
      cc += 1 + LINK_SIZE;
1603
      continue;
1604
      }
1605
1606
    if (accelerated_start == NULL)
1607
      break;
1608
1609
    if (count == 0)
1610
      {
1611
      common->fast_forward_bc_ptr = accelerated_start;
1612
      common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1613
      *private_data_start += sizeof(sljit_sw);
1614
      count = 4;
1615
      }
1616
    else if (count < 3)
1617
      {
1618
      common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1619
1620
      if (common->early_fail_start_ptr == 0)
1621
        common->early_fail_start_ptr = *private_data_start;
1622
1623
      *private_data_start += sizeof(sljit_sw);
1624
      common->early_fail_end_ptr = *private_data_start;
1625
1626
      if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1627
        return EARLY_FAIL_ENHANCE_MAX;
1628
1629
      count = 4;
1630
      }
1631
    else
1632
      {
1633
      common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1634
1635
      if (common->early_fail_start_ptr == 0)
1636
        common->early_fail_start_ptr = *private_data_start;
1637
1638
      *private_data_start += 2 * sizeof(sljit_sw);
1639
      common->early_fail_end_ptr = *private_data_start;
1640
1641
      if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1642
        return EARLY_FAIL_ENHANCE_MAX;
1643
1644
      count++;
1645
      }
1646
1647
    /* Cannot be part of a repeat. */
1648
    common->private_data_ptrs[begin - common->start] = 1;
1649
1650
    if (count >= EARLY_FAIL_ENHANCE_MAX)
1651
      break;
1652
    }
1653
1654
  if (*cc != OP_ALT && *cc != OP_KET)
1655
    result = EARLY_FAIL_ENHANCE_MAX;
1656
  else if (result < count)
1657
    result = count;
1658
1659
  cc = next_alt;
1660
  next_alt = cc + GET(cc, 1);
1661
  }
1662
while (*cc == OP_ALT);
1663
1664
return result;
1665
}
1666
1667
static int get_class_iterator_size(PCRE2_SPTR cc)
1668
{
1669
sljit_u32 min;
1670
sljit_u32 max;
1671
switch(*cc)
1672
  {
1673
  case OP_CRSTAR:
1674
  case OP_CRPLUS:
1675
  return 2;
1676
1677
  case OP_CRMINSTAR:
1678
  case OP_CRMINPLUS:
1679
  case OP_CRQUERY:
1680
  case OP_CRMINQUERY:
1681
  return 1;
1682
1683
  case OP_CRRANGE:
1684
  case OP_CRMINRANGE:
1685
  min = GET2(cc, 1);
1686
  max = GET2(cc, 1 + IMM2_SIZE);
1687
  if (max == 0)
1688
    return (*cc == OP_CRRANGE) ? 2 : 1;
1689
  max -= min;
1690
  if (max > 2)
1691
    max = 2;
1692
  return max;
1693
1694
  default:
1695
  return 0;
1696
  }
1697
}
1698
1699
static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1700
{
1701
PCRE2_SPTR end = bracketend(begin);
1702
PCRE2_SPTR next;
1703
PCRE2_SPTR next_end;
1704
PCRE2_SPTR max_end;
1705
PCRE2_UCHAR type;
1706
sljit_sw length = end - begin;
1707
sljit_s32 min, max, i;
1708
1709
/* Detect fixed iterations first. */
1710
if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1711
  return FALSE;
1712
1713
/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1714
 * Skip the check of the second part. */
1715
if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1716
  return TRUE;
1717
1718
next = end;
1719
min = 1;
1720
while (1)
1721
  {
1722
  if (*next != *begin)
1723
    break;
1724
  next_end = bracketend(next);
1725
  if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1726
    break;
1727
  next = next_end;
1728
  min++;
1729
  }
1730
1731
if (min == 2)
1732
  return FALSE;
1733
1734
max = 0;
1735
max_end = next;
1736
if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1737
  {
1738
  type = *next;
1739
  while (1)
1740
    {
1741
    if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1742
      break;
1743
    next_end = bracketend(next + 2 + LINK_SIZE);
1744
    if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1745
      break;
1746
    next = next_end;
1747
    max++;
1748
    }
1749
1750
  if (next[0] == type && next[1] == *begin && max >= 1)
1751
    {
1752
    next_end = bracketend(next + 1);
1753
    if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1754
      {
1755
      for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1756
        if (*next_end != OP_KET)
1757
          break;
1758
1759
      if (i == max)
1760
        {
1761
        common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1762
        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1763
        /* +2 the original and the last. */
1764
        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1765
        if (min == 1)
1766
          return TRUE;
1767
        min--;
1768
        max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1769
        }
1770
      }
1771
    }
1772
  }
1773
1774
if (min >= 3)
1775
  {
1776
  common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1777
  common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1778
  common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1779
  return TRUE;
1780
  }
1781
1782
return FALSE;
1783
}
1784
1785
#define CASE_ITERATOR_PRIVATE_DATA_1 \
1786
    case OP_MINSTAR: \
1787
    case OP_MINPLUS: \
1788
    case OP_QUERY: \
1789
    case OP_MINQUERY: \
1790
    case OP_MINSTARI: \
1791
    case OP_MINPLUSI: \
1792
    case OP_QUERYI: \
1793
    case OP_MINQUERYI: \
1794
    case OP_NOTMINSTAR: \
1795
    case OP_NOTMINPLUS: \
1796
    case OP_NOTQUERY: \
1797
    case OP_NOTMINQUERY: \
1798
    case OP_NOTMINSTARI: \
1799
    case OP_NOTMINPLUSI: \
1800
    case OP_NOTQUERYI: \
1801
    case OP_NOTMINQUERYI:
1802
1803
#define CASE_ITERATOR_PRIVATE_DATA_2A \
1804
    case OP_STAR: \
1805
    case OP_PLUS: \
1806
    case OP_STARI: \
1807
    case OP_PLUSI: \
1808
    case OP_NOTSTAR: \
1809
    case OP_NOTPLUS: \
1810
    case OP_NOTSTARI: \
1811
    case OP_NOTPLUSI:
1812
1813
#define CASE_ITERATOR_PRIVATE_DATA_2B \
1814
    case OP_UPTO: \
1815
    case OP_MINUPTO: \
1816
    case OP_UPTOI: \
1817
    case OP_MINUPTOI: \
1818
    case OP_NOTUPTO: \
1819
    case OP_NOTMINUPTO: \
1820
    case OP_NOTUPTOI: \
1821
    case OP_NOTMINUPTOI:
1822
1823
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1824
    case OP_TYPEMINSTAR: \
1825
    case OP_TYPEMINPLUS: \
1826
    case OP_TYPEQUERY: \
1827
    case OP_TYPEMINQUERY:
1828
1829
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1830
    case OP_TYPESTAR: \
1831
    case OP_TYPEPLUS:
1832
1833
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1834
    case OP_TYPEUPTO: \
1835
    case OP_TYPEMINUPTO:
1836
1837
static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1838
{
1839
PCRE2_SPTR cc = common->start;
1840
PCRE2_SPTR alternative;
1841
PCRE2_SPTR end = NULL;
1842
int private_data_ptr = *private_data_start;
1843
int space, size, bracketlen;
1844
BOOL repeat_check = TRUE;
1845
1846
while (cc < ccend)
1847
  {
1848
  space = 0;
1849
  size = 0;
1850
  bracketlen = 0;
1851
  if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1852
    break;
1853
1854
  /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1855
  if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1856
    {
1857
    if (detect_repeat(common, cc))
1858
      {
1859
      /* These brackets are converted to repeats, so no global
1860
      based single character repeat is allowed. */
1861
      if (cc >= end)
1862
        end = bracketend(cc);
1863
      }
1864
    }
1865
  repeat_check = TRUE;
1866
1867
  switch(*cc)
1868
    {
1869
    case OP_KET:
1870
    if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1871
      {
1872
      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1873
      private_data_ptr += sizeof(sljit_sw);
1874
      cc += common->private_data_ptrs[cc + 1 - common->start];
1875
      }
1876
    cc += 1 + LINK_SIZE;
1877
    break;
1878
1879
    case OP_ASSERT:
1880
    case OP_ASSERT_NOT:
1881
    case OP_ASSERTBACK:
1882
    case OP_ASSERTBACK_NOT:
1883
    case OP_ASSERT_NA:
1884
    case OP_ONCE:
1885
    case OP_SCRIPT_RUN:
1886
    case OP_BRAPOS:
1887
    case OP_SBRA:
1888
    case OP_SBRAPOS:
1889
    case OP_SCOND:
1890
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1891
    private_data_ptr += sizeof(sljit_sw);
1892
    bracketlen = 1 + LINK_SIZE;
1893
    break;
1894
1895
    case OP_ASSERTBACK_NA:
1896
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1897
    private_data_ptr += sizeof(sljit_sw);
1898
1899
    if (find_vreverse(cc))
1900
      {
1901
      common->private_data_ptrs[cc + 1 - common->start] = 1;
1902
      private_data_ptr += sizeof(sljit_sw);
1903
      }
1904
1905
    bracketlen = 1 + LINK_SIZE;
1906
    break;
1907
1908
    case OP_CBRAPOS:
1909
    case OP_SCBRAPOS:
1910
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1911
    private_data_ptr += sizeof(sljit_sw);
1912
    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1913
    break;
1914
1915
    case OP_COND:
1916
    /* Might be a hidden SCOND. */
1917
    common->private_data_ptrs[cc - common->start] = 0;
1918
    alternative = cc + GET(cc, 1);
1919
    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1920
      {
1921
      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1922
      private_data_ptr += sizeof(sljit_sw);
1923
      }
1924
    bracketlen = 1 + LINK_SIZE;
1925
    break;
1926
1927
    case OP_BRA:
1928
    bracketlen = 1 + LINK_SIZE;
1929
    break;
1930
1931
    case OP_CBRA:
1932
    case OP_SCBRA:
1933
    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1934
    break;
1935
1936
    case OP_BRAZERO:
1937
    case OP_BRAMINZERO:
1938
    case OP_BRAPOSZERO:
1939
    size = 1;
1940
    repeat_check = FALSE;
1941
    break;
1942
1943
    CASE_ITERATOR_PRIVATE_DATA_1
1944
    size = -2;
1945
    space = 1;
1946
    break;
1947
1948
    CASE_ITERATOR_PRIVATE_DATA_2A
1949
    size = -2;
1950
    space = 2;
1951
    break;
1952
1953
    CASE_ITERATOR_PRIVATE_DATA_2B
1954
    size = -(2 + IMM2_SIZE);
1955
    space = 2;
1956
    break;
1957
1958
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1959
    size = 1;
1960
    space = 1;
1961
    break;
1962
1963
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1964
    size = 1;
1965
    if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1966
      space = 2;
1967
    break;
1968
1969
    case OP_TYPEUPTO:
1970
    size = 1 + IMM2_SIZE;
1971
    if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1972
      space = 2;
1973
    break;
1974
1975
    case OP_TYPEMINUPTO:
1976
    size = 1 + IMM2_SIZE;
1977
    space = 2;
1978
    break;
1979
1980
    case OP_CLASS:
1981
    case OP_NCLASS:
1982
    size = 1 + 32 / sizeof(PCRE2_UCHAR);
1983
    space = get_class_iterator_size(cc + size);
1984
    break;
1985
1986
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1987
    case OP_XCLASS:
1988
    size = GET(cc, 1);
1989
    space = get_class_iterator_size(cc + size);
1990
    break;
1991
#endif
1992
1993
    default:
1994
    cc = next_opcode(common, cc);
1995
    SLJIT_ASSERT(cc != NULL);
1996
    break;
1997
    }
1998
1999
  /* Character iterators, which are not inside a repeated bracket,
2000
     gets a private slot instead of allocating it on the stack. */
2001
  if (space > 0 && cc >= end)
2002
    {
2003
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
2004
    private_data_ptr += sizeof(sljit_sw) * space;
2005
    }
2006
2007
  if (size != 0)
2008
    {
2009
    if (size < 0)
2010
      {
2011
      cc += -size;
2012
#ifdef SUPPORT_UNICODE
2013
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2014
#endif
2015
      }
2016
    else
2017
      cc += size;
2018
    }
2019
2020
  if (bracketlen > 0)
2021
    {
2022
    if (cc >= end)
2023
      {
2024
      end = bracketend(cc);
2025
      if (end[-1 - LINK_SIZE] == OP_KET)
2026
        end = NULL;
2027
      }
2028
    cc += bracketlen;
2029
    }
2030
  }
2031
*private_data_start = private_data_ptr;
2032
}
2033
2034
/* Returns with a frame_types (always < 0) if no need for frame. */
2035
static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
2036
{
2037
int length = 0;
2038
int possessive = 0;
2039
BOOL stack_restore = FALSE;
2040
BOOL setsom_found = recursive;
2041
BOOL setmark_found = recursive;
2042
/* The last capture is a local variable even for recursions. */
2043
BOOL capture_last_found = FALSE;
2044
2045
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2046
SLJIT_ASSERT(common->control_head_ptr != 0);
2047
*needs_control_head = TRUE;
2048
#else
2049
*needs_control_head = FALSE;
2050
#endif
2051
2052
if (ccend == NULL)
2053
  {
2054
  ccend = bracketend(cc) - (1 + LINK_SIZE);
2055
  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
2056
    {
2057
    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
2058
    /* This is correct regardless of common->capture_last_ptr. */
2059
    capture_last_found = TRUE;
2060
    }
2061
  cc = next_opcode(common, cc);
2062
  }
2063
2064
SLJIT_ASSERT(cc != NULL);
2065
while (cc < ccend)
2066
  switch(*cc)
2067
    {
2068
    case OP_SET_SOM:
2069
    SLJIT_ASSERT(common->has_set_som);
2070
    stack_restore = TRUE;
2071
    if (!setsom_found)
2072
      {
2073
      length += 2;
2074
      setsom_found = TRUE;
2075
      }
2076
    cc += 1;
2077
    break;
2078
2079
    case OP_MARK:
2080
    case OP_COMMIT_ARG:
2081
    case OP_PRUNE_ARG:
2082
    case OP_THEN_ARG:
2083
    SLJIT_ASSERT(common->mark_ptr != 0);
2084
    stack_restore = TRUE;
2085
    if (!setmark_found)
2086
      {
2087
      length += 2;
2088
      setmark_found = TRUE;
2089
      }
2090
    if (common->control_head_ptr != 0)
2091
      *needs_control_head = TRUE;
2092
    cc += 1 + 2 + cc[1];
2093
    break;
2094
2095
    case OP_RECURSE:
2096
    stack_restore = TRUE;
2097
    if (common->has_set_som && !setsom_found)
2098
      {
2099
      length += 2;
2100
      setsom_found = TRUE;
2101
      }
2102
    if (common->mark_ptr != 0 && !setmark_found)
2103
      {
2104
      length += 2;
2105
      setmark_found = TRUE;
2106
      }
2107
    if (common->capture_last_ptr != 0 && !capture_last_found)
2108
      {
2109
      length += 2;
2110
      capture_last_found = TRUE;
2111
      }
2112
    cc += 1 + LINK_SIZE;
2113
    break;
2114
2115
    case OP_CBRA:
2116
    case OP_CBRAPOS:
2117
    case OP_SCBRA:
2118
    case OP_SCBRAPOS:
2119
    stack_restore = TRUE;
2120
    if (common->capture_last_ptr != 0 && !capture_last_found)
2121
      {
2122
      length += 2;
2123
      capture_last_found = TRUE;
2124
      }
2125
    length += 3;
2126
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2127
    break;
2128
2129
    case OP_THEN:
2130
    stack_restore = TRUE;
2131
    if (common->control_head_ptr != 0)
2132
      *needs_control_head = TRUE;
2133
    cc ++;
2134
    break;
2135
2136
    default:
2137
    stack_restore = TRUE;
2138
    /* Fall through. */
2139
2140
    case OP_NOT_WORD_BOUNDARY:
2141
    case OP_WORD_BOUNDARY:
2142
    case OP_NOT_DIGIT:
2143
    case OP_DIGIT:
2144
    case OP_NOT_WHITESPACE:
2145
    case OP_WHITESPACE:
2146
    case OP_NOT_WORDCHAR:
2147
    case OP_WORDCHAR:
2148
    case OP_ANY:
2149
    case OP_ALLANY:
2150
    case OP_ANYBYTE:
2151
    case OP_NOTPROP:
2152
    case OP_PROP:
2153
    case OP_ANYNL:
2154
    case OP_NOT_HSPACE:
2155
    case OP_HSPACE:
2156
    case OP_NOT_VSPACE:
2157
    case OP_VSPACE:
2158
    case OP_EXTUNI:
2159
    case OP_EODN:
2160
    case OP_EOD:
2161
    case OP_CIRC:
2162
    case OP_CIRCM:
2163
    case OP_DOLL:
2164
    case OP_DOLLM:
2165
    case OP_CHAR:
2166
    case OP_CHARI:
2167
    case OP_NOT:
2168
    case OP_NOTI:
2169
2170
    case OP_EXACT:
2171
    case OP_POSSTAR:
2172
    case OP_POSPLUS:
2173
    case OP_POSQUERY:
2174
    case OP_POSUPTO:
2175
2176
    case OP_EXACTI:
2177
    case OP_POSSTARI:
2178
    case OP_POSPLUSI:
2179
    case OP_POSQUERYI:
2180
    case OP_POSUPTOI:
2181
2182
    case OP_NOTEXACT:
2183
    case OP_NOTPOSSTAR:
2184
    case OP_NOTPOSPLUS:
2185
    case OP_NOTPOSQUERY:
2186
    case OP_NOTPOSUPTO:
2187
2188
    case OP_NOTEXACTI:
2189
    case OP_NOTPOSSTARI:
2190
    case OP_NOTPOSPLUSI:
2191
    case OP_NOTPOSQUERYI:
2192
    case OP_NOTPOSUPTOI:
2193
2194
    case OP_TYPEEXACT:
2195
    case OP_TYPEPOSSTAR:
2196
    case OP_TYPEPOSPLUS:
2197
    case OP_TYPEPOSQUERY:
2198
    case OP_TYPEPOSUPTO:
2199
2200
    case OP_CLASS:
2201
    case OP_NCLASS:
2202
    case OP_XCLASS:
2203
2204
    case OP_CALLOUT:
2205
    case OP_CALLOUT_STR:
2206
2207
    case OP_NOT_UCP_WORD_BOUNDARY:
2208
    case OP_UCP_WORD_BOUNDARY:
2209
2210
    cc = next_opcode(common, cc);
2211
    SLJIT_ASSERT(cc != NULL);
2212
    break;
2213
    }
2214
2215
/* Possessive quantifiers can use a special case. */
2216
if (SLJIT_UNLIKELY(possessive == length))
2217
  return stack_restore ? no_frame : no_stack;
2218
2219
if (length > 0)
2220
  return length + 1;
2221
return stack_restore ? no_frame : no_stack;
2222
}
2223
2224
static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2225
{
2226
DEFINE_COMPILER;
2227
BOOL setsom_found = FALSE;
2228
BOOL setmark_found = FALSE;
2229
/* The last capture is a local variable even for recursions. */
2230
BOOL capture_last_found = FALSE;
2231
int offset;
2232
2233
/* >= 1 + shortest item size (2) */
2234
SLJIT_UNUSED_ARG(stacktop);
2235
SLJIT_ASSERT(stackpos >= stacktop + 2);
2236
2237
stackpos = STACK(stackpos);
2238
if (ccend == NULL)
2239
  {
2240
  ccend = bracketend(cc) - (1 + LINK_SIZE);
2241
  if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2242
    cc = next_opcode(common, cc);
2243
  }
2244
2245
SLJIT_ASSERT(cc != NULL);
2246
while (cc < ccend)
2247
  switch(*cc)
2248
    {
2249
    case OP_SET_SOM:
2250
    SLJIT_ASSERT(common->has_set_som);
2251
    if (!setsom_found)
2252
      {
2253
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2254
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2255
      stackpos -= SSIZE_OF(sw);
2256
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2257
      stackpos -= SSIZE_OF(sw);
2258
      setsom_found = TRUE;
2259
      }
2260
    cc += 1;
2261
    break;
2262
2263
    case OP_MARK:
2264
    case OP_COMMIT_ARG:
2265
    case OP_PRUNE_ARG:
2266
    case OP_THEN_ARG:
2267
    SLJIT_ASSERT(common->mark_ptr != 0);
2268
    if (!setmark_found)
2269
      {
2270
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2271
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2272
      stackpos -= SSIZE_OF(sw);
2273
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2274
      stackpos -= SSIZE_OF(sw);
2275
      setmark_found = TRUE;
2276
      }
2277
    cc += 1 + 2 + cc[1];
2278
    break;
2279
2280
    case OP_RECURSE:
2281
    if (common->has_set_som && !setsom_found)
2282
      {
2283
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2284
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2285
      stackpos -= SSIZE_OF(sw);
2286
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2287
      stackpos -= SSIZE_OF(sw);
2288
      setsom_found = TRUE;
2289
      }
2290
    if (common->mark_ptr != 0 && !setmark_found)
2291
      {
2292
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2293
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2294
      stackpos -= SSIZE_OF(sw);
2295
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2296
      stackpos -= SSIZE_OF(sw);
2297
      setmark_found = TRUE;
2298
      }
2299
    if (common->capture_last_ptr != 0 && !capture_last_found)
2300
      {
2301
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2302
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2303
      stackpos -= SSIZE_OF(sw);
2304
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2305
      stackpos -= SSIZE_OF(sw);
2306
      capture_last_found = TRUE;
2307
      }
2308
    cc += 1 + LINK_SIZE;
2309
    break;
2310
2311
    case OP_CBRA:
2312
    case OP_CBRAPOS:
2313
    case OP_SCBRA:
2314
    case OP_SCBRAPOS:
2315
    if (common->capture_last_ptr != 0 && !capture_last_found)
2316
      {
2317
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2318
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2319
      stackpos -= SSIZE_OF(sw);
2320
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2321
      stackpos -= SSIZE_OF(sw);
2322
      capture_last_found = TRUE;
2323
      }
2324
    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2325
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2326
    stackpos -= SSIZE_OF(sw);
2327
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2328
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2329
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2330
    stackpos -= SSIZE_OF(sw);
2331
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2332
    stackpos -= SSIZE_OF(sw);
2333
2334
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2335
    break;
2336
2337
    default:
2338
    cc = next_opcode(common, cc);
2339
    SLJIT_ASSERT(cc != NULL);
2340
    break;
2341
    }
2342
2343
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2344
SLJIT_ASSERT(stackpos == STACK(stacktop));
2345
}
2346
2347
#define RECURSE_TMP_REG_COUNT 3
2348
2349
typedef struct delayed_mem_copy_status {
2350
  struct sljit_compiler *compiler;
2351
  int store_bases[RECURSE_TMP_REG_COUNT];
2352
  int store_offsets[RECURSE_TMP_REG_COUNT];
2353
  int tmp_regs[RECURSE_TMP_REG_COUNT];
2354
  int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2355
  int next_tmp_reg;
2356
} delayed_mem_copy_status;
2357
2358
static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2359
{
2360
int i;
2361
2362
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2363
  {
2364
  SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2365
  SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2366
2367
  status->store_bases[i] = -1;
2368
  }
2369
status->next_tmp_reg = 0;
2370
status->compiler = common->compiler;
2371
}
2372
2373
static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2374
  int store_base, sljit_sw store_offset)
2375
{
2376
struct sljit_compiler *compiler = status->compiler;
2377
int next_tmp_reg = status->next_tmp_reg;
2378
int tmp_reg = status->tmp_regs[next_tmp_reg];
2379
2380
SLJIT_ASSERT(load_base > 0 && store_base > 0);
2381
2382
if (status->store_bases[next_tmp_reg] == -1)
2383
  {
2384
  /* Preserve virtual registers. */
2385
  if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
2386
    OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2387
  }
2388
else
2389
  OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2390
2391
OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2392
status->store_bases[next_tmp_reg] = store_base;
2393
status->store_offsets[next_tmp_reg] = store_offset;
2394
2395
status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2396
}
2397
2398
static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2399
{
2400
struct sljit_compiler *compiler = status->compiler;
2401
int next_tmp_reg = status->next_tmp_reg;
2402
int tmp_reg, saved_tmp_reg, i;
2403
2404
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2405
  {
2406
  if (status->store_bases[next_tmp_reg] != -1)
2407
    {
2408
    tmp_reg = status->tmp_regs[next_tmp_reg];
2409
    saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2410
2411
    OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2412
2413
    /* Restore virtual registers. */
2414
    if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)
2415
      OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2416
    }
2417
2418
  next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2419
  }
2420
}
2421
2422
#undef RECURSE_TMP_REG_COUNT
2423
2424
static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2425
{
2426
uint8_t *byte;
2427
uint8_t mask;
2428
2429
SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2430
2431
bit_index >>= SLJIT_WORD_SHIFT;
2432
2433
SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2434
2435
mask = 1 << (bit_index & 0x7);
2436
byte = common->recurse_bitset + (bit_index >> 3);
2437
2438
if (*byte & mask)
2439
  return FALSE;
2440
2441
*byte |= mask;
2442
return TRUE;
2443
}
2444
2445
enum get_recurse_flags {
2446
  recurse_flag_quit_found = (1 << 0),
2447
  recurse_flag_accept_found = (1 << 1),
2448
  recurse_flag_setsom_found = (1 << 2),
2449
  recurse_flag_setmark_found = (1 << 3),
2450
  recurse_flag_control_head_found = (1 << 4),
2451
};
2452
2453
static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2454
{
2455
int length = 1;
2456
int size, offset;
2457
PCRE2_SPTR alternative;
2458
uint32_t recurse_flags = 0;
2459
2460
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2461
2462
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2463
SLJIT_ASSERT(common->control_head_ptr != 0);
2464
recurse_flags |= recurse_flag_control_head_found;
2465
#endif
2466
2467
/* Calculate the sum of the private machine words. */
2468
while (cc < ccend)
2469
  {
2470
  size = 0;
2471
  switch(*cc)
2472
    {
2473
    case OP_SET_SOM:
2474
    SLJIT_ASSERT(common->has_set_som);
2475
    recurse_flags |= recurse_flag_setsom_found;
2476
    cc += 1;
2477
    break;
2478
2479
    case OP_RECURSE:
2480
    if (common->has_set_som)
2481
      recurse_flags |= recurse_flag_setsom_found;
2482
    if (common->mark_ptr != 0)
2483
      recurse_flags |= recurse_flag_setmark_found;
2484
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2485
      length++;
2486
    cc += 1 + LINK_SIZE;
2487
    break;
2488
2489
    case OP_KET:
2490
    offset = PRIVATE_DATA(cc);
2491
    if (offset != 0)
2492
      {
2493
      if (recurse_check_bit(common, offset))
2494
        length++;
2495
      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2496
      cc += PRIVATE_DATA(cc + 1);
2497
      }
2498
    cc += 1 + LINK_SIZE;
2499
    break;
2500
2501
    case OP_ASSERT:
2502
    case OP_ASSERT_NOT:
2503
    case OP_ASSERTBACK:
2504
    case OP_ASSERTBACK_NOT:
2505
    case OP_ASSERT_NA:
2506
    case OP_ASSERTBACK_NA:
2507
    case OP_ONCE:
2508
    case OP_SCRIPT_RUN:
2509
    case OP_BRAPOS:
2510
    case OP_SBRA:
2511
    case OP_SBRAPOS:
2512
    case OP_SCOND:
2513
    SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2514
    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2515
      length++;
2516
    cc += 1 + LINK_SIZE;
2517
    break;
2518
2519
    case OP_CBRA:
2520
    case OP_SCBRA:
2521
    offset = GET2(cc, 1 + LINK_SIZE);
2522
    if (recurse_check_bit(common, OVECTOR(offset << 1)))
2523
      {
2524
      SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2525
      length += 2;
2526
      }
2527
    if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2528
      length++;
2529
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2530
      length++;
2531
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2532
    break;
2533
2534
    case OP_CBRAPOS:
2535
    case OP_SCBRAPOS:
2536
    offset = GET2(cc, 1 + LINK_SIZE);
2537
    if (recurse_check_bit(common, OVECTOR(offset << 1)))
2538
      {
2539
      SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2540
      length += 2;
2541
      }
2542
    if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2543
      length++;
2544
    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2545
      length++;
2546
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2547
      length++;
2548
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2549
    break;
2550
2551
    case OP_COND:
2552
    /* Might be a hidden SCOND. */
2553
    alternative = cc + GET(cc, 1);
2554
    if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2555
      length++;
2556
    cc += 1 + LINK_SIZE;
2557
    break;
2558
2559
    CASE_ITERATOR_PRIVATE_DATA_1
2560
    offset = PRIVATE_DATA(cc);
2561
    if (offset != 0 && recurse_check_bit(common, offset))
2562
      length++;
2563
    cc += 2;
2564
#ifdef SUPPORT_UNICODE
2565
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2566
#endif
2567
    break;
2568
2569
    CASE_ITERATOR_PRIVATE_DATA_2A
2570
    offset = PRIVATE_DATA(cc);
2571
    if (offset != 0 && recurse_check_bit(common, offset))
2572
      {
2573
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2574
      length += 2;
2575
      }
2576
    cc += 2;
2577
#ifdef SUPPORT_UNICODE
2578
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2579
#endif
2580
    break;
2581
2582
    CASE_ITERATOR_PRIVATE_DATA_2B
2583
    offset = PRIVATE_DATA(cc);
2584
    if (offset != 0 && recurse_check_bit(common, offset))
2585
      {
2586
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2587
      length += 2;
2588
      }
2589
    cc += 2 + IMM2_SIZE;
2590
#ifdef SUPPORT_UNICODE
2591
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2592
#endif
2593
    break;
2594
2595
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2596
    offset = PRIVATE_DATA(cc);
2597
    if (offset != 0 && recurse_check_bit(common, offset))
2598
      length++;
2599
    cc += 1;
2600
    break;
2601
2602
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2603
    offset = PRIVATE_DATA(cc);
2604
    if (offset != 0 && recurse_check_bit(common, offset))
2605
      {
2606
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2607
      length += 2;
2608
      }
2609
    cc += 1;
2610
    break;
2611
2612
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2613
    offset = PRIVATE_DATA(cc);
2614
    if (offset != 0 && recurse_check_bit(common, offset))
2615
      {
2616
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2617
      length += 2;
2618
      }
2619
    cc += 1 + IMM2_SIZE;
2620
    break;
2621
2622
    case OP_CLASS:
2623
    case OP_NCLASS:
2624
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2625
    case OP_XCLASS:
2626
    size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2627
#else
2628
    size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2629
#endif
2630
2631
    offset = PRIVATE_DATA(cc);
2632
    if (offset != 0 && recurse_check_bit(common, offset))
2633
      length += get_class_iterator_size(cc + size);
2634
    cc += size;
2635
    break;
2636
2637
    case OP_MARK:
2638
    case OP_COMMIT_ARG:
2639
    case OP_PRUNE_ARG:
2640
    case OP_THEN_ARG:
2641
    SLJIT_ASSERT(common->mark_ptr != 0);
2642
    recurse_flags |= recurse_flag_setmark_found;
2643
    if (common->control_head_ptr != 0)
2644
      recurse_flags |= recurse_flag_control_head_found;
2645
    if (*cc != OP_MARK)
2646
      recurse_flags |= recurse_flag_quit_found;
2647
2648
    cc += 1 + 2 + cc[1];
2649
    break;
2650
2651
    case OP_PRUNE:
2652
    case OP_SKIP:
2653
    case OP_COMMIT:
2654
    recurse_flags |= recurse_flag_quit_found;
2655
    cc++;
2656
    break;
2657
2658
    case OP_SKIP_ARG:
2659
    recurse_flags |= recurse_flag_quit_found;
2660
    cc += 1 + 2 + cc[1];
2661
    break;
2662
2663
    case OP_THEN:
2664
    SLJIT_ASSERT(common->control_head_ptr != 0);
2665
    recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2666
    cc++;
2667
    break;
2668
2669
    case OP_ACCEPT:
2670
    case OP_ASSERT_ACCEPT:
2671
    recurse_flags |= recurse_flag_accept_found;
2672
    cc++;
2673
    break;
2674
2675
    default:
2676
    cc = next_opcode(common, cc);
2677
    SLJIT_ASSERT(cc != NULL);
2678
    break;
2679
    }
2680
  }
2681
SLJIT_ASSERT(cc == ccend);
2682
2683
if (recurse_flags & recurse_flag_control_head_found)
2684
  length++;
2685
if (recurse_flags & recurse_flag_quit_found)
2686
  {
2687
  if (recurse_flags & recurse_flag_setsom_found)
2688
    length++;
2689
  if (recurse_flags & recurse_flag_setmark_found)
2690
    length++;
2691
  }
2692
2693
*result_flags = recurse_flags;
2694
return length;
2695
}
2696
2697
enum copy_recurse_data_types {
2698
  recurse_copy_from_global,
2699
  recurse_copy_private_to_global,
2700
  recurse_copy_shared_to_global,
2701
  recurse_copy_kept_shared_to_global,
2702
  recurse_swap_global
2703
};
2704
2705
static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2706
  int type, int stackptr, int stacktop, uint32_t recurse_flags)
2707
{
2708
delayed_mem_copy_status status;
2709
PCRE2_SPTR alternative;
2710
sljit_sw private_srcw[2];
2711
sljit_sw shared_srcw[3];
2712
sljit_sw kept_shared_srcw[2];
2713
int private_count, shared_count, kept_shared_count;
2714
int from_sp, base_reg, offset, i;
2715
2716
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2717
2718
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2719
SLJIT_ASSERT(common->control_head_ptr != 0);
2720
recurse_check_bit(common, common->control_head_ptr);
2721
#endif
2722
2723
switch (type)
2724
  {
2725
  case recurse_copy_from_global:
2726
  from_sp = TRUE;
2727
  base_reg = STACK_TOP;
2728
  break;
2729
2730
  case recurse_copy_private_to_global:
2731
  case recurse_copy_shared_to_global:
2732
  case recurse_copy_kept_shared_to_global:
2733
  from_sp = FALSE;
2734
  base_reg = STACK_TOP;
2735
  break;
2736
2737
  default:
2738
  SLJIT_ASSERT(type == recurse_swap_global);
2739
  from_sp = FALSE;
2740
  base_reg = TMP2;
2741
  break;
2742
  }
2743
2744
stackptr = STACK(stackptr);
2745
stacktop = STACK(stacktop);
2746
2747
status.tmp_regs[0] = TMP1;
2748
status.saved_tmp_regs[0] = TMP1;
2749
2750
if (base_reg != TMP2)
2751
  {
2752
  status.tmp_regs[1] = TMP2;
2753
  status.saved_tmp_regs[1] = TMP2;
2754
  }
2755
else
2756
  {
2757
  status.saved_tmp_regs[1] = RETURN_ADDR;
2758
  if (HAS_VIRTUAL_REGISTERS)
2759
    status.tmp_regs[1] = STR_PTR;
2760
  else
2761
    status.tmp_regs[1] = RETURN_ADDR;
2762
  }
2763
2764
status.saved_tmp_regs[2] = TMP3;
2765
if (HAS_VIRTUAL_REGISTERS)
2766
  status.tmp_regs[2] = STR_END;
2767
else
2768
  status.tmp_regs[2] = TMP3;
2769
2770
delayed_mem_copy_init(&status, common);
2771
2772
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2773
  {
2774
  SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2775
2776
  if (!from_sp)
2777
    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2778
2779
  if (from_sp || type == recurse_swap_global)
2780
    delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2781
  }
2782
2783
stackptr += sizeof(sljit_sw);
2784
2785
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2786
if (type != recurse_copy_shared_to_global)
2787
  {
2788
  if (!from_sp)
2789
    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2790
2791
  if (from_sp || type == recurse_swap_global)
2792
    delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2793
  }
2794
2795
stackptr += sizeof(sljit_sw);
2796
#endif
2797
2798
while (cc < ccend)
2799
  {
2800
  private_count = 0;
2801
  shared_count = 0;
2802
  kept_shared_count = 0;
2803
2804
  switch(*cc)
2805
    {
2806
    case OP_SET_SOM:
2807
    SLJIT_ASSERT(common->has_set_som);
2808
    if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
2809
      {
2810
      kept_shared_srcw[0] = OVECTOR(0);
2811
      kept_shared_count = 1;
2812
      }
2813
    cc += 1;
2814
    break;
2815
2816
    case OP_RECURSE:
2817
    if (recurse_flags & recurse_flag_quit_found)
2818
      {
2819
      if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2820
        {
2821
        kept_shared_srcw[0] = OVECTOR(0);
2822
        kept_shared_count = 1;
2823
        }
2824
      if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2825
        {
2826
        kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2827
        kept_shared_count++;
2828
        }
2829
      }
2830
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2831
      {
2832
      shared_srcw[0] = common->capture_last_ptr;
2833
      shared_count = 1;
2834
      }
2835
    cc += 1 + LINK_SIZE;
2836
    break;
2837
2838
    case OP_KET:
2839
    private_srcw[0] = PRIVATE_DATA(cc);
2840
    if (private_srcw[0] != 0)
2841
      {
2842
      if (recurse_check_bit(common, private_srcw[0]))
2843
        private_count = 1;
2844
      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2845
      cc += PRIVATE_DATA(cc + 1);
2846
      }
2847
    cc += 1 + LINK_SIZE;
2848
    break;
2849
2850
    case OP_ASSERT:
2851
    case OP_ASSERT_NOT:
2852
    case OP_ASSERTBACK:
2853
    case OP_ASSERTBACK_NOT:
2854
    case OP_ASSERT_NA:
2855
    case OP_ASSERTBACK_NA:
2856
    case OP_ONCE:
2857
    case OP_SCRIPT_RUN:
2858
    case OP_BRAPOS:
2859
    case OP_SBRA:
2860
    case OP_SBRAPOS:
2861
    case OP_SCOND:
2862
    private_srcw[0] = PRIVATE_DATA(cc);
2863
    if (recurse_check_bit(common, private_srcw[0]))
2864
      private_count = 1;
2865
    cc += 1 + LINK_SIZE;
2866
    break;
2867
2868
    case OP_CBRA:
2869
    case OP_SCBRA:
2870
    offset = GET2(cc, 1 + LINK_SIZE);
2871
    shared_srcw[0] = OVECTOR(offset << 1);
2872
    if (recurse_check_bit(common, shared_srcw[0]))
2873
      {
2874
      shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2875
      SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2876
      shared_count = 2;
2877
      }
2878
2879
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2880
      {
2881
      shared_srcw[shared_count] = common->capture_last_ptr;
2882
      shared_count++;
2883
      }
2884
2885
    if (common->optimized_cbracket[offset] == 0)
2886
      {
2887
      private_srcw[0] = OVECTOR_PRIV(offset);
2888
      if (recurse_check_bit(common, private_srcw[0]))
2889
        private_count = 1;
2890
      }
2891
2892
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2893
    break;
2894
2895
    case OP_CBRAPOS:
2896
    case OP_SCBRAPOS:
2897
    offset = GET2(cc, 1 + LINK_SIZE);
2898
    shared_srcw[0] = OVECTOR(offset << 1);
2899
    if (recurse_check_bit(common, shared_srcw[0]))
2900
      {
2901
      shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2902
      SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2903
      shared_count = 2;
2904
      }
2905
2906
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2907
      {
2908
      shared_srcw[shared_count] = common->capture_last_ptr;
2909
      shared_count++;
2910
      }
2911
2912
    private_srcw[0] = PRIVATE_DATA(cc);
2913
    if (recurse_check_bit(common, private_srcw[0]))
2914
      private_count = 1;
2915
2916
    offset = OVECTOR_PRIV(offset);
2917
    if (recurse_check_bit(common, offset))
2918
      {
2919
      private_srcw[private_count] = offset;
2920
      private_count++;
2921
      }
2922
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2923
    break;
2924
2925
    case OP_COND:
2926
    /* Might be a hidden SCOND. */
2927
    alternative = cc + GET(cc, 1);
2928
    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2929
      {
2930
      private_srcw[0] = PRIVATE_DATA(cc);
2931
      if (recurse_check_bit(common, private_srcw[0]))
2932
        private_count = 1;
2933
      }
2934
    cc += 1 + LINK_SIZE;
2935
    break;
2936
2937
    CASE_ITERATOR_PRIVATE_DATA_1
2938
    private_srcw[0] = PRIVATE_DATA(cc);
2939
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2940
      private_count = 1;
2941
    cc += 2;
2942
#ifdef SUPPORT_UNICODE
2943
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2944
#endif
2945
    break;
2946
2947
    CASE_ITERATOR_PRIVATE_DATA_2A
2948
    private_srcw[0] = PRIVATE_DATA(cc);
2949
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2950
      {
2951
      private_count = 2;
2952
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2953
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2954
      }
2955
    cc += 2;
2956
#ifdef SUPPORT_UNICODE
2957
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2958
#endif
2959
    break;
2960
2961
    CASE_ITERATOR_PRIVATE_DATA_2B
2962
    private_srcw[0] = PRIVATE_DATA(cc);
2963
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2964
      {
2965
      private_count = 2;
2966
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2967
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2968
      }
2969
    cc += 2 + IMM2_SIZE;
2970
#ifdef SUPPORT_UNICODE
2971
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2972
#endif
2973
    break;
2974
2975
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2976
    private_srcw[0] = PRIVATE_DATA(cc);
2977
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2978
      private_count = 1;
2979
    cc += 1;
2980
    break;
2981
2982
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2983
    private_srcw[0] = PRIVATE_DATA(cc);
2984
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2985
      {
2986
      private_count = 2;
2987
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2988
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2989
      }
2990
    cc += 1;
2991
    break;
2992
2993
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2994
    private_srcw[0] = PRIVATE_DATA(cc);
2995
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2996
      {
2997
      private_count = 2;
2998
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2999
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3000
      }
3001
    cc += 1 + IMM2_SIZE;
3002
    break;
3003
3004
    case OP_CLASS:
3005
    case OP_NCLASS:
3006
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3007
    case OP_XCLASS:
3008
    i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3009
#else
3010
    i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3011
#endif
3012
    if (PRIVATE_DATA(cc) != 0)
3013
      {
3014
      private_count = 1;
3015
      private_srcw[0] = PRIVATE_DATA(cc);
3016
      switch(get_class_iterator_size(cc + i))
3017
        {
3018
        case 1:
3019
        break;
3020
3021
        case 2:
3022
        if (recurse_check_bit(common, private_srcw[0]))
3023
          {
3024
          private_count = 2;
3025
          private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3026
          SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3027
          }
3028
        break;
3029
3030
        default:
3031
        SLJIT_UNREACHABLE();
3032
        break;
3033
        }
3034
      }
3035
    cc += i;
3036
    break;
3037
3038
    case OP_MARK:
3039
    case OP_COMMIT_ARG:
3040
    case OP_PRUNE_ARG:
3041
    case OP_THEN_ARG:
3042
    SLJIT_ASSERT(common->mark_ptr != 0);
3043
    if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
3044
      {
3045
      kept_shared_srcw[0] = common->mark_ptr;
3046
      kept_shared_count = 1;
3047
      }
3048
    if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
3049
      {
3050
      private_srcw[0] = common->control_head_ptr;
3051
      private_count = 1;
3052
      }
3053
    cc += 1 + 2 + cc[1];
3054
    break;
3055
3056
    case OP_THEN:
3057
    SLJIT_ASSERT(common->control_head_ptr != 0);
3058
    if (recurse_check_bit(common, common->control_head_ptr))
3059
      {
3060
      private_srcw[0] = common->control_head_ptr;
3061
      private_count = 1;
3062
      }
3063
    cc++;
3064
    break;
3065
3066
    default:
3067
    cc = next_opcode(common, cc);
3068
    SLJIT_ASSERT(cc != NULL);
3069
    continue;
3070
    }
3071
3072
  if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
3073
    {
3074
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
3075
3076
    for (i = 0; i < private_count; i++)
3077
      {
3078
      SLJIT_ASSERT(private_srcw[i] != 0);
3079
3080
      if (!from_sp)
3081
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
3082
3083
      if (from_sp || type == recurse_swap_global)
3084
        delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
3085
3086
      stackptr += sizeof(sljit_sw);
3087
      }
3088
    }
3089
  else
3090
    stackptr += sizeof(sljit_sw) * private_count;
3091
3092
  if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
3093
    {
3094
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
3095
3096
    for (i = 0; i < shared_count; i++)
3097
      {
3098
      SLJIT_ASSERT(shared_srcw[i] != 0);
3099
3100
      if (!from_sp)
3101
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3102
3103
      if (from_sp || type == recurse_swap_global)
3104
        delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3105
3106
      stackptr += sizeof(sljit_sw);
3107
      }
3108
    }
3109
  else
3110
    stackptr += sizeof(sljit_sw) * shared_count;
3111
3112
  if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3113
    {
3114
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3115
3116
    for (i = 0; i < kept_shared_count; i++)
3117
      {
3118
      SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3119
3120
      if (!from_sp)
3121
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3122
3123
      if (from_sp || type == recurse_swap_global)
3124
        delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3125
3126
      stackptr += sizeof(sljit_sw);
3127
      }
3128
    }
3129
  else
3130
    stackptr += sizeof(sljit_sw) * kept_shared_count;
3131
  }
3132
3133
SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3134
3135
delayed_mem_copy_finish(&status);
3136
}
3137
3138
static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3139
{
3140
PCRE2_SPTR end = bracketend(cc);
3141
BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3142
3143
/* Assert captures then. */
3144
if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3145
  current_offset = NULL;
3146
/* Conditional block does not. */
3147
if (*cc == OP_COND || *cc == OP_SCOND)
3148
  has_alternatives = FALSE;
3149
3150
cc = next_opcode(common, cc);
3151
3152
if (has_alternatives)
3153
  {
3154
  if (*cc == OP_REVERSE)
3155
    cc += 1 + IMM2_SIZE;
3156
  else if (*cc == OP_VREVERSE)
3157
    cc += 1 + 2 * IMM2_SIZE;
3158
3159
  current_offset = common->then_offsets + (cc - common->start);
3160
  }
3161
3162
while (cc < end)
3163
  {
3164
  if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3165
    cc = set_then_offsets(common, cc, current_offset);
3166
  else
3167
    {
3168
    if (*cc == OP_ALT && has_alternatives)
3169
      {
3170
      cc += 1 + LINK_SIZE;
3171
3172
      if (*cc == OP_REVERSE)
3173
        cc += 1 + IMM2_SIZE;
3174
      else if (*cc == OP_VREVERSE)
3175
        cc += 1 + 2 * IMM2_SIZE;
3176
3177
      current_offset = common->then_offsets + (cc - common->start);
3178
      continue;
3179
      }
3180
3181
    if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3182
      *current_offset = 1;
3183
    cc = next_opcode(common, cc);
3184
    }
3185
  }
3186
3187
return end;
3188
}
3189
3190
#undef CASE_ITERATOR_PRIVATE_DATA_1
3191
#undef CASE_ITERATOR_PRIVATE_DATA_2A
3192
#undef CASE_ITERATOR_PRIVATE_DATA_2B
3193
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3194
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3195
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3196
3197
static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3198
{
3199
return (value & (value - 1)) == 0;
3200
}
3201
3202
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3203
{
3204
while (list != NULL)
3205
  {
3206
  /* sljit_set_label is clever enough to do nothing
3207
  if either the jump or the label is NULL. */
3208
  SET_LABEL(list->jump, label);
3209
  list = list->next;
3210
  }
3211
}
3212
3213
static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3214
{
3215
jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3216
if (list_item)
3217
  {
3218
  list_item->next = *list;
3219
  list_item->jump = jump;
3220
  *list = list_item;
3221
  }
3222
}
3223
3224
static void add_stub(compiler_common *common, struct sljit_jump *start)
3225
{
3226
DEFINE_COMPILER;
3227
stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3228
3229
if (list_item)
3230
  {
3231
  list_item->start = start;
3232
  list_item->quit = LABEL();
3233
  list_item->next = common->stubs;
3234
  common->stubs = list_item;
3235
  }
3236
}
3237
3238
static void flush_stubs(compiler_common *common)
3239
{
3240
DEFINE_COMPILER;
3241
stub_list *list_item = common->stubs;
3242
3243
while (list_item)
3244
  {
3245
  JUMPHERE(list_item->start);
3246
  add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3247
  JUMPTO(SLJIT_JUMP, list_item->quit);
3248
  list_item = list_item->next;
3249
  }
3250
common->stubs = NULL;
3251
}
3252
3253
static SLJIT_INLINE void count_match(compiler_common *common)
3254
{
3255
DEFINE_COMPILER;
3256
3257
OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3258
add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3259
}
3260
3261
static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3262
{
3263
/* May destroy all locals and registers except TMP2. */
3264
DEFINE_COMPILER;
3265
3266
SLJIT_ASSERT(size > 0);
3267
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3268
#ifdef DESTROY_REGISTERS
3269
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3270
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3271
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3272
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3273
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3274
#endif
3275
add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3276
}
3277
3278
static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3279
{
3280
DEFINE_COMPILER;
3281
3282
SLJIT_ASSERT(size > 0);
3283
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3284
}
3285
3286
static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3287
{
3288
DEFINE_COMPILER;
3289
sljit_uw *result;
3290
3291
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3292
  return NULL;
3293
3294
result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3295
if (SLJIT_UNLIKELY(result == NULL))
3296
  {
3297
  sljit_set_compiler_memory_error(compiler);
3298
  return NULL;
3299
  }
3300
3301
*(void**)result = common->read_only_data_head;
3302
common->read_only_data_head = (void *)result;
3303
return result + 1;
3304
}
3305
3306
static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3307
{
3308
DEFINE_COMPILER;
3309
struct sljit_label *loop;
3310
sljit_s32 i;
3311
3312
/* At this point we can freely use all temporary registers. */
3313
SLJIT_ASSERT(length > 1);
3314
/* TMP1 returns with begin - 1. */
3315
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3316
if (length < 8)
3317
  {
3318
  for (i = 1; i < length; i++)
3319
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3320
  }
3321
else
3322
  {
3323
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3324
    {
3325
    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3326
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3327
    loop = LABEL();
3328
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3329
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3330
    JUMPTO(SLJIT_NOT_ZERO, loop);
3331
    }
3332
  else
3333
    {
3334
    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3335
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3336
    loop = LABEL();
3337
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3338
    OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3339
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3340
    JUMPTO(SLJIT_NOT_ZERO, loop);
3341
    }
3342
  }
3343
}
3344
3345
static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3346
{
3347
DEFINE_COMPILER;
3348
sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3349
sljit_u32 uncleared_size;
3350
sljit_s32 src = SLJIT_IMM;
3351
sljit_s32 i;
3352
struct sljit_label *loop;
3353
3354
SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3355
3356
if (size == sizeof(sljit_sw))
3357
  {
3358
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3359
  return;
3360
  }
3361
3362
if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3363
  {
3364
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3365
  src = TMP3;
3366
  }
3367
3368
if (size <= 6 * sizeof(sljit_sw))
3369
  {
3370
  for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3371
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3372
  return;
3373
  }
3374
3375
GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3376
3377
uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3378
3379
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3380
3381
loop = LABEL();
3382
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3383
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3384
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
3385
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
3386
CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3387
3388
if (uncleared_size >= sizeof(sljit_sw))
3389
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3390
3391
if (uncleared_size >= 2 * sizeof(sljit_sw))
3392
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3393
}
3394
3395
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3396
{
3397
DEFINE_COMPILER;
3398
struct sljit_label *loop;
3399
int i;
3400
3401
SLJIT_ASSERT(length > 1);
3402
/* OVECTOR(1) contains the "string begin - 1" constant. */
3403
if (length > 2)
3404
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3405
if (length < 8)
3406
  {
3407
  for (i = 2; i < length; i++)
3408
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3409
  }
3410
else
3411
  {
3412
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3413
    {
3414
    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3415
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3416
    loop = LABEL();
3417
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3418
    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3419
    JUMPTO(SLJIT_NOT_ZERO, loop);
3420
    }
3421
  else
3422
    {
3423
    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3424
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3425
    loop = LABEL();
3426
    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3427
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3428
    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3429
    JUMPTO(SLJIT_NOT_ZERO, loop);
3430
    }
3431
  }
3432
3433
if (!HAS_VIRTUAL_REGISTERS)
3434
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3435
else
3436
  OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3437
3438
if (common->mark_ptr != 0)
3439
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3440
if (common->control_head_ptr != 0)
3441
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3442
if (HAS_VIRTUAL_REGISTERS)
3443
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3444
3445
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3446
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3447
}
3448
3449
static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3450
{
3451
while (current != NULL)
3452
  {
3453
  switch (current[1])
3454
    {
3455
    case type_then_trap:
3456
    break;
3457
3458
    case type_mark:
3459
    if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3460
      return current[3];
3461
    break;
3462
3463
    default:
3464
    SLJIT_UNREACHABLE();
3465
    break;
3466
    }
3467
  SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3468
  current = (sljit_sw*)current[0];
3469
  }
3470
return 0;
3471
}
3472
3473
static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3474
{
3475
DEFINE_COMPILER;
3476
struct sljit_label *loop;
3477
BOOL has_pre;
3478
3479
/* At this point we can freely use all registers. */
3480
OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3481
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3482
3483
if (HAS_VIRTUAL_REGISTERS)
3484
  {
3485
  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3486
  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3487
  if (common->mark_ptr != 0)
3488
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3489
  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3490
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3491
  if (common->mark_ptr != 0)
3492
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3493
  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3494
    SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3495
  }
3496
else
3497
  {
3498
  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3499
  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3500
  if (common->mark_ptr != 0)
3501
    OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3502
  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3503
  OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3504
  if (common->mark_ptr != 0)
3505
    OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3506
  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3507
  }
3508
3509
has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3510
3511
GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3512
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3513
3514
loop = LABEL();
3515
3516
if (has_pre)
3517
  sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3518
else
3519
  {
3520
  OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3521
  OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3522
  }
3523
3524
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3525
OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3526
/* Copy the integer value to the output buffer */
3527
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3528
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3529
#endif
3530
3531
SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3532
OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3533
3534
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3535
JUMPTO(SLJIT_NOT_ZERO, loop);
3536
3537
/* Calculate the return value, which is the maximum ovector value. */
3538
if (topbracket > 1)
3539
  {
3540
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
3541
    {
3542
    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3543
    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3544
3545
    /* OVECTOR(0) is never equal to SLJIT_S2. */
3546
    loop = LABEL();
3547
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
3548
    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3549
    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3550
    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3551
    }
3552
  else
3553
    {
3554
    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3555
    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3556
3557
    /* OVECTOR(0) is never equal to SLJIT_S2. */
3558
    loop = LABEL();
3559
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3560
    OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
3561
    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3562
    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3563
    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3564
    }
3565
  }
3566
else
3567
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3568
}
3569
3570
static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3571
{
3572
DEFINE_COMPILER;
3573
sljit_s32 mov_opcode;
3574
sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3575
3576
SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3577
SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3578
  && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3579
3580
if (arguments_reg != ARGUMENTS)
3581
  OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3582
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3583
  common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3584
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3585
3586
/* Store match begin and end. */
3587
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3588
OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3589
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3590
3591
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3592
3593
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3594
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3595
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3596
#endif
3597
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3598
3599
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3600
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3601
OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3602
#endif
3603
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3604
3605
JUMPTO(SLJIT_JUMP, quit);
3606
}
3607
3608
static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3609
{
3610
/* May destroy TMP1. */
3611
DEFINE_COMPILER;
3612
struct sljit_jump *jump;
3613
3614
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3615
  {
3616
  /* The value of -1 must be kept for start_used_ptr! */
3617
  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3618
  /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3619
  is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3620
  jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3621
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3622
  JUMPHERE(jump);
3623
  }
3624
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3625
  {
3626
  jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3627
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3628
  JUMPHERE(jump);
3629
  }
3630
}
3631
3632
static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3633
{
3634
/* Detects if the character has an othercase. */
3635
unsigned int c;
3636
3637
#ifdef SUPPORT_UNICODE
3638
if (common->utf || common->ucp)
3639
  {
3640
  if (common->utf)
3641
    {
3642
    GETCHAR(c, cc);
3643
    }
3644
  else
3645
    c = *cc;
3646
3647
  if (c > 127)
3648
    return c != UCD_OTHERCASE(c);
3649
3650
  return common->fcc[c] != c;
3651
  }
3652
else
3653
#endif
3654
  c = *cc;
3655
return MAX_255(c) ? common->fcc[c] != c : FALSE;
3656
}
3657
3658
static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3659
{
3660
/* Returns with the othercase. */
3661
#ifdef SUPPORT_UNICODE
3662
if ((common->utf || common->ucp) && c > 127)
3663
  return UCD_OTHERCASE(c);
3664
#endif
3665
return TABLE_GET(c, common->fcc, c);
3666
}
3667
3668
static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3669
{
3670
/* Detects if the character and its othercase has only 1 bit difference. */
3671
unsigned int c, oc, bit;
3672
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3673
int n;
3674
#endif
3675
3676
#ifdef SUPPORT_UNICODE
3677
if (common->utf || common->ucp)
3678
  {
3679
  if (common->utf)
3680
    {
3681
    GETCHAR(c, cc);
3682
    }
3683
  else
3684
    c = *cc;
3685
3686
  if (c <= 127)
3687
    oc = common->fcc[c];
3688
  else
3689
    oc = UCD_OTHERCASE(c);
3690
  }
3691
else
3692
  {
3693
  c = *cc;
3694
  oc = TABLE_GET(c, common->fcc, c);
3695
  }
3696
#else
3697
c = *cc;
3698
oc = TABLE_GET(c, common->fcc, c);
3699
#endif
3700
3701
SLJIT_ASSERT(c != oc);
3702
3703
bit = c ^ oc;
3704
/* Optimized for English alphabet. */
3705
if (c <= 127 && bit == 0x20)
3706
  return (0 << 8) | 0x20;
3707
3708
/* Since c != oc, they must have at least 1 bit difference. */
3709
if (!is_powerof2(bit))
3710
  return 0;
3711
3712
#if PCRE2_CODE_UNIT_WIDTH == 8
3713
3714
#ifdef SUPPORT_UNICODE
3715
if (common->utf && c > 127)
3716
  {
3717
  n = GET_EXTRALEN(*cc);
3718
  while ((bit & 0x3f) == 0)
3719
    {
3720
    n--;
3721
    bit >>= 6;
3722
    }
3723
  return (n << 8) | bit;
3724
  }
3725
#endif /* SUPPORT_UNICODE */
3726
return (0 << 8) | bit;
3727
3728
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3729
3730
#ifdef SUPPORT_UNICODE
3731
if (common->utf && c > 65535)
3732
  {
3733
  if (bit >= (1u << 10))
3734
    bit >>= 10;
3735
  else
3736
    return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3737
  }
3738
#endif /* SUPPORT_UNICODE */
3739
return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3740
3741
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3742
}
3743
3744
static void check_partial(compiler_common *common, BOOL force)
3745
{
3746
/* Checks whether a partial matching is occurred. Does not modify registers. */
3747
DEFINE_COMPILER;
3748
struct sljit_jump *jump = NULL;
3749
3750
SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3751
3752
if (common->mode == PCRE2_JIT_COMPLETE)
3753
  return;
3754
3755
if (!force && !common->allow_empty_partial)
3756
  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3757
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3758
  jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3759
3760
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3761
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3762
else
3763
  {
3764
  if (common->partialmatchlabel != NULL)
3765
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3766
  else
3767
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3768
  }
3769
3770
if (jump != NULL)
3771
  JUMPHERE(jump);
3772
}
3773
3774
static void check_str_end(compiler_common *common, jump_list **end_reached)
3775
{
3776
/* Does not affect registers. Usually used in a tight spot. */
3777
DEFINE_COMPILER;
3778
struct sljit_jump *jump;
3779
3780
if (common->mode == PCRE2_JIT_COMPLETE)
3781
  {
3782
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3783
  return;
3784
  }
3785
3786
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3787
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3788
  {
3789
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3790
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3791
  add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3792
  }
3793
else
3794
  {
3795
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3796
  if (common->partialmatchlabel != NULL)
3797
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3798
  else
3799
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3800
  }
3801
JUMPHERE(jump);
3802
}
3803
3804
static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3805
{
3806
DEFINE_COMPILER;
3807
struct sljit_jump *jump;
3808
3809
if (common->mode == PCRE2_JIT_COMPLETE)
3810
  {
3811
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3812
  return;
3813
  }
3814
3815
/* Partial matching mode. */
3816
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3817
if (!common->allow_empty_partial)
3818
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3819
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3820
  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3821
3822
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3823
  {
3824
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3825
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3826
  }
3827
else
3828
  {
3829
  if (common->partialmatchlabel != NULL)
3830
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3831
  else
3832
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3833
  }
3834
JUMPHERE(jump);
3835
}
3836
3837
static void process_partial_match(compiler_common *common)
3838
{
3839
DEFINE_COMPILER;
3840
struct sljit_jump *jump;
3841
3842
/* Partial matching mode. */
3843
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3844
  {
3845
  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3846
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3847
  JUMPHERE(jump);
3848
  }
3849
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3850
  {
3851
  if (common->partialmatchlabel != NULL)
3852
    CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3853
  else
3854
    add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3855
  }
3856
}
3857
3858
static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3859
{
3860
DEFINE_COMPILER;
3861
3862
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3863
process_partial_match(common);
3864
}
3865
3866
static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3867
{
3868
/* Reads the character into TMP1, keeps STR_PTR.
3869
Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3870
DEFINE_COMPILER;
3871
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3872
struct sljit_jump *jump;
3873
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3874
3875
SLJIT_UNUSED_ARG(max);
3876
SLJIT_UNUSED_ARG(dst);
3877
SLJIT_UNUSED_ARG(dstw);
3878
SLJIT_UNUSED_ARG(backtracks);
3879
3880
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3881
3882
#ifdef SUPPORT_UNICODE
3883
#if PCRE2_CODE_UNIT_WIDTH == 8
3884
if (common->utf)
3885
  {
3886
  if (max < 128) return;
3887
3888
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3889
  OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3890
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3891
  add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3892
  OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3893
  if (backtracks && common->invalid_utf)
3894
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3895
  JUMPHERE(jump);
3896
  }
3897
#elif PCRE2_CODE_UNIT_WIDTH == 16
3898
if (common->utf)
3899
  {
3900
  if (max < 0xd800) return;
3901
3902
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3903
3904
  if (common->invalid_utf)
3905
    {
3906
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3907
    OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3908
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3909
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3910
    OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3911
    if (backtracks && common->invalid_utf)
3912
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3913
    }
3914
  else
3915
    {
3916
    /* TMP2 contains the high surrogate. */
3917
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3918
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3919
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3920
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3921
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3922
    }
3923
3924
  JUMPHERE(jump);
3925
  }
3926
#elif PCRE2_CODE_UNIT_WIDTH == 32
3927
if (common->invalid_utf)
3928
  {
3929
  if (max < 0xd800) return;
3930
3931
  if (backtracks != NULL)
3932
    {
3933
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3934
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3935
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3936
    }
3937
  else
3938
    {
3939
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3940
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
3941
    SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3942
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3943
    SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3944
    }
3945
  }
3946
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3947
#endif /* SUPPORT_UNICODE */
3948
}
3949
3950
static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3951
{
3952
/* Reads one character back without moving STR_PTR. TMP2 must
3953
contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3954
DEFINE_COMPILER;
3955
3956
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3957
struct sljit_jump *jump;
3958
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3959
3960
SLJIT_UNUSED_ARG(max);
3961
SLJIT_UNUSED_ARG(backtracks);
3962
3963
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3964
3965
#ifdef SUPPORT_UNICODE
3966
#if PCRE2_CODE_UNIT_WIDTH == 8
3967
if (common->utf)
3968
  {
3969
  if (max < 128) return;
3970
3971
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3972
  if (common->invalid_utf)
3973
    {
3974
    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3975
    if (backtracks != NULL)
3976
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3977
    }
3978
  else
3979
    add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3980
  JUMPHERE(jump);
3981
  }
3982
#elif PCRE2_CODE_UNIT_WIDTH == 16
3983
if (common->utf)
3984
  {
3985
  if (max < 0xd800) return;
3986
3987
  if (common->invalid_utf)
3988
    {
3989
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3990
    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3991
    if (backtracks != NULL)
3992
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3993
    }
3994
  else
3995
    {
3996
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3997
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3998
    /* TMP2 contains the low surrogate. */
3999
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4000
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4001
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4002
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
4003
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4004
    }
4005
    JUMPHERE(jump);
4006
  }
4007
#elif PCRE2_CODE_UNIT_WIDTH == 32
4008
if (common->invalid_utf)
4009
  {
4010
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4011
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4012
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4013
  }
4014
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4015
#endif /* SUPPORT_UNICODE */
4016
}
4017
4018
#define READ_CHAR_UPDATE_STR_PTR 0x1
4019
#define READ_CHAR_UTF8_NEWLINE 0x2
4020
#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
4021
#define READ_CHAR_VALID_UTF 0x4
4022
4023
static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
4024
  jump_list **backtracks, sljit_u32 options)
4025
{
4026
/* Reads the precise value of a character into TMP1, if the character is
4027
between min and max (c >= min && c <= max). Otherwise it returns with a value
4028
outside the range. Does not check STR_END. */
4029
DEFINE_COMPILER;
4030
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4031
struct sljit_jump *jump;
4032
#endif
4033
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4034
struct sljit_jump *jump2;
4035
#endif
4036
4037
SLJIT_UNUSED_ARG(min);
4038
SLJIT_UNUSED_ARG(max);
4039
SLJIT_UNUSED_ARG(backtracks);
4040
SLJIT_UNUSED_ARG(options);
4041
SLJIT_ASSERT(min <= max);
4042
4043
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4044
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4045
4046
#ifdef SUPPORT_UNICODE
4047
#if PCRE2_CODE_UNIT_WIDTH == 8
4048
if (common->utf)
4049
  {
4050
  if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4051
4052
  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4053
    {
4054
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4055
4056
    if (options & READ_CHAR_UTF8_NEWLINE)
4057
      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4058
    else
4059
      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4060
4061
    if (backtracks != NULL)
4062
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4063
    JUMPHERE(jump);
4064
    return;
4065
    }
4066
4067
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4068
  if (min >= 0x10000)
4069
    {
4070
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4071
    if (options & READ_CHAR_UPDATE_STR_PTR)
4072
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4073
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4074
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
4075
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4076
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4077
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4078
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4079
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4080
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4081
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4082
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4083
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4084
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4085
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4086
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4087
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4088
    JUMPHERE(jump2);
4089
    if (options & READ_CHAR_UPDATE_STR_PTR)
4090
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4091
    }
4092
  else if (min >= 0x800 && max <= 0xffff)
4093
    {
4094
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4095
    if (options & READ_CHAR_UPDATE_STR_PTR)
4096
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4097
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4098
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
4099
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4100
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4101
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4102
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4103
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4104
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4105
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4106
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4107
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4108
    JUMPHERE(jump2);
4109
    if (options & READ_CHAR_UPDATE_STR_PTR)
4110
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4111
    }
4112
  else if (max >= 0x800)
4113
    {
4114
    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4115
    }
4116
  else if (max < 128)
4117
    {
4118
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4119
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4120
    }
4121
  else
4122
    {
4123
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4124
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4125
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4126
    else
4127
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4128
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4129
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4130
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4131
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4132
    if (options & READ_CHAR_UPDATE_STR_PTR)
4133
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4134
    }
4135
  JUMPHERE(jump);
4136
  }
4137
#elif PCRE2_CODE_UNIT_WIDTH == 16
4138
if (common->utf)
4139
  {
4140
  if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4141
4142
  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4143
    {
4144
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4145
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4146
4147
    if (options & READ_CHAR_UTF8_NEWLINE)
4148
      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4149
    else
4150
      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4151
4152
    if (backtracks != NULL)
4153
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4154
    JUMPHERE(jump);
4155
    return;
4156
    }
4157
4158
  if (max >= 0x10000)
4159
    {
4160
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4161
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4162
    /* TMP2 contains the high surrogate. */
4163
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4164
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4165
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4166
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4167
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4168
    JUMPHERE(jump);
4169
    return;
4170
    }
4171
4172
  /* Skip low surrogate if necessary. */
4173
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4174
4175
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4176
    {
4177
    if (options & READ_CHAR_UPDATE_STR_PTR)
4178
      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4179
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4180
    if (options & READ_CHAR_UPDATE_STR_PTR)
4181
      SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4182
    if (max >= 0xd800)
4183
      SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);
4184
    }
4185
  else
4186
    {
4187
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4188
    if (options & READ_CHAR_UPDATE_STR_PTR)
4189
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4190
    if (max >= 0xd800)
4191
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4192
    JUMPHERE(jump);
4193
    }
4194
  }
4195
#elif PCRE2_CODE_UNIT_WIDTH == 32
4196
if (common->invalid_utf)
4197
  {
4198
  if (backtracks != NULL)
4199
    {
4200
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4201
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4202
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4203
    }
4204
  else
4205
    {
4206
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4207
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4208
    SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4209
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4210
    SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4211
    }
4212
  }
4213
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4214
#endif /* SUPPORT_UNICODE */
4215
}
4216
4217
static void skip_valid_char(compiler_common *common)
4218
{
4219
DEFINE_COMPILER;
4220
#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4221
struct sljit_jump *jump;
4222
#endif
4223
4224
#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4225
  if (common->utf)
4226
    {
4227
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4228
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4229
#if PCRE2_CODE_UNIT_WIDTH == 8
4230
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4231
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4232
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4233
#elif PCRE2_CODE_UNIT_WIDTH == 16
4234
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4235
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4236
    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
4237
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4238
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4239
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4240
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4241
    JUMPHERE(jump);
4242
    return;
4243
    }
4244
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
4245
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4246
}
4247
4248
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4249
4250
static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4251
{
4252
/* Tells whether the character codes below 128 are enough
4253
to determine a match. */
4254
const sljit_u8 value = nclass ? 0xff : 0;
4255
const sljit_u8 *end = bitset + 32;
4256
4257
bitset += 16;
4258
do
4259
  {
4260
  if (*bitset++ != value)
4261
    return FALSE;
4262
  }
4263
while (bitset < end);
4264
return TRUE;
4265
}
4266
4267
static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4268
{
4269
/* Reads the precise character type of a character into TMP1, if the character
4270
is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4271
full_read argument tells whether characters above max are accepted or not. */
4272
DEFINE_COMPILER;
4273
struct sljit_jump *jump;
4274
4275
SLJIT_ASSERT(common->utf);
4276
4277
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4278
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4279
4280
/* All values > 127 are zero in ctypes. */
4281
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4282
4283
if (negated)
4284
  {
4285
  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4286
4287
  if (common->invalid_utf)
4288
    {
4289
    OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);
4290
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4291
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4292
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4293
    }
4294
  else
4295
    {
4296
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4297
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4298
    }
4299
  JUMPHERE(jump);
4300
  }
4301
}
4302
4303
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4304
4305
static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4306
{
4307
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4308
DEFINE_COMPILER;
4309
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4310
struct sljit_jump *jump;
4311
#endif
4312
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4313
struct sljit_jump *jump2;
4314
#endif
4315
4316
SLJIT_UNUSED_ARG(backtracks);
4317
SLJIT_UNUSED_ARG(negated);
4318
4319
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4320
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4321
4322
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4323
if (common->utf)
4324
  {
4325
  /* The result of this read may be unused, but saves an "else" part. */
4326
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4327
  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4328
4329
  if (!negated)
4330
    {
4331
    if (common->invalid_utf)
4332
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4333
4334
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4335
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4336
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4337
    if (common->invalid_utf)
4338
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4339
4340
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4341
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4342
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4343
    if (common->invalid_utf)
4344
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4345
4346
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4347
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4348
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4349
    JUMPHERE(jump2);
4350
    }
4351
  else if (common->invalid_utf)
4352
    {
4353
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4354
    OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4355
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4356
4357
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4358
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4359
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4360
    JUMPHERE(jump2);
4361
    }
4362
  else
4363
    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4364
4365
  JUMPHERE(jump);
4366
  return;
4367
  }
4368
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4369
4370
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4371
if (common->invalid_utf && negated)
4372
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4373
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4374
4375
#if PCRE2_CODE_UNIT_WIDTH != 8
4376
/* The ctypes array contains only 256 values. */
4377
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4378
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4379
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4380
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4381
#if PCRE2_CODE_UNIT_WIDTH != 8
4382
JUMPHERE(jump);
4383
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4384
4385
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4386
if (common->utf && negated)
4387
  {
4388
  /* Skip low surrogate if necessary. */
4389
  if (!common->invalid_utf)
4390
    {
4391
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4392
4393
    if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4394
      {
4395
      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4396
      OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4397
      SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4398
      }
4399
    else
4400
      {
4401
      jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4402
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4403
      JUMPHERE(jump);
4404
      }
4405
    return;
4406
    }
4407
4408
  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4409
  jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4410
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4411
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4412
4413
  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4414
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4415
  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4416
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4417
4418
  JUMPHERE(jump);
4419
  return;
4420
  }
4421
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4422
}
4423
4424
static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4425
{
4426
/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4427
TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4428
and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4429
DEFINE_COMPILER;
4430
4431
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4432
struct sljit_jump *jump;
4433
#endif
4434
4435
#ifdef SUPPORT_UNICODE
4436
#if PCRE2_CODE_UNIT_WIDTH == 8
4437
struct sljit_label *label;
4438
4439
if (common->utf)
4440
  {
4441
  if (!must_be_valid && common->invalid_utf)
4442
    {
4443
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4444
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4445
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4446
    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4447
    if (backtracks != NULL)
4448
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4449
    JUMPHERE(jump);
4450
    return;
4451
    }
4452
4453
  label = LABEL();
4454
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4455
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4456
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4457
  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4458
  return;
4459
  }
4460
#elif PCRE2_CODE_UNIT_WIDTH == 16
4461
if (common->utf)
4462
  {
4463
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4464
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4465
4466
  if (!must_be_valid && common->invalid_utf)
4467
    {
4468
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4469
    jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4470
    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4471
    if (backtracks != NULL)
4472
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4473
    JUMPHERE(jump);
4474
    return;
4475
    }
4476
4477
  /* Skip low surrogate if necessary. */
4478
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4479
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4480
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4481
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4482
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4483
  return;
4484
  }
4485
#elif PCRE2_CODE_UNIT_WIDTH == 32
4486
if (common->invalid_utf && !must_be_valid)
4487
  {
4488
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4489
  if (backtracks != NULL)
4490
    {
4491
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4492
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4493
    return;
4494
    }
4495
4496
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4497
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4498
  OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4499
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4500
  return;
4501
  }
4502
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4503
#endif /* SUPPORT_UNICODE */
4504
4505
SLJIT_UNUSED_ARG(backtracks);
4506
SLJIT_UNUSED_ARG(must_be_valid);
4507
4508
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4509
}
4510
4511
static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4512
{
4513
/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4514
DEFINE_COMPILER;
4515
struct sljit_jump *jump;
4516
4517
if (nltype == NLTYPE_ANY)
4518
  {
4519
  add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4520
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
4521
  add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4522
  }
4523
else if (nltype == NLTYPE_ANYCRLF)
4524
  {
4525
  if (jumpifmatch)
4526
    {
4527
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4528
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4529
    }
4530
  else
4531
    {
4532
    jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4533
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4534
    JUMPHERE(jump);
4535
    }
4536
  }
4537
else
4538
  {
4539
  SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4540
  add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4541
  }
4542
}
4543
4544
#ifdef SUPPORT_UNICODE
4545
4546
#if PCRE2_CODE_UNIT_WIDTH == 8
4547
static void do_utfreadchar(compiler_common *common)
4548
{
4549
/* Fast decoding a UTF-8 character. TMP1 contains the first byte
4550
of the character (>= 0xc0). Return char value in TMP1. */
4551
DEFINE_COMPILER;
4552
struct sljit_jump *jump;
4553
4554
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4555
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4556
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4557
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4558
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4559
4560
/* Searching for the first zero. */
4561
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4562
jump = JUMP(SLJIT_NOT_ZERO);
4563
/* Two byte sequence. */
4564
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4565
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4566
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4567
4568
JUMPHERE(jump);
4569
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4570
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4571
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4572
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4573
4574
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4575
jump = JUMP(SLJIT_NOT_ZERO);
4576
/* Three byte sequence. */
4577
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4578
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4579
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4580
4581
/* Four byte sequence. */
4582
JUMPHERE(jump);
4583
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4584
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4585
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4586
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4587
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4588
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4589
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4590
}
4591
4592
static void do_utfreadtype8(compiler_common *common)
4593
{
4594
/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4595
of the character (>= 0xc0). Return value in TMP1. */
4596
DEFINE_COMPILER;
4597
struct sljit_jump *jump;
4598
struct sljit_jump *compare;
4599
4600
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4601
4602
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4603
jump = JUMP(SLJIT_NOT_ZERO);
4604
/* Two byte sequence. */
4605
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4606
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4607
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4608
/* The upper 5 bits are known at this point. */
4609
compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4610
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4611
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4612
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4613
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4614
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4615
4616
JUMPHERE(compare);
4617
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4618
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4619
4620
/* We only have types for characters less than 256. */
4621
JUMPHERE(jump);
4622
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4623
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4624
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4625
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4626
}
4627
4628
static void do_utfreadchar_invalid(compiler_common *common)
4629
{
4630
/* Slow decoding a UTF-8 character. TMP1 contains the first byte
4631
of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4632
undefined for invalid characters. */
4633
DEFINE_COMPILER;
4634
sljit_s32 i;
4635
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4636
struct sljit_jump *jump;
4637
struct sljit_jump *buffer_end_close;
4638
struct sljit_label *three_byte_entry;
4639
struct sljit_label *exit_invalid_label;
4640
struct sljit_jump *exit_invalid[11];
4641
4642
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4643
4644
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4645
4646
/* Usually more than 3 characters remained in the subject buffer. */
4647
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4648
4649
/* Not a valid start of a multi-byte sequence, no more bytes read. */
4650
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4651
4652
buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4653
4654
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4655
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4656
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4657
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4658
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4659
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4660
4661
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4662
jump = JUMP(SLJIT_NOT_ZERO);
4663
4664
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4665
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4666
4667
JUMPHERE(jump);
4668
4669
/* Three-byte sequence. */
4670
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4671
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4672
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4673
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4674
if (has_cmov)
4675
  {
4676
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4677
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);
4678
  exit_invalid[2] = NULL;
4679
  }
4680
else
4681
  exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4682
4683
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4684
jump = JUMP(SLJIT_NOT_ZERO);
4685
4686
three_byte_entry = LABEL();
4687
4688
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4689
if (has_cmov)
4690
  {
4691
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4692
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);
4693
  exit_invalid[3] = NULL;
4694
  }
4695
else
4696
  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4697
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4698
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4699
4700
if (has_cmov)
4701
  {
4702
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4703
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4704
  exit_invalid[4] = NULL;
4705
  }
4706
else
4707
  exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4708
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4709
4710
JUMPHERE(jump);
4711
4712
/* Four-byte sequence. */
4713
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4714
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4715
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4716
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4717
if (has_cmov)
4718
  {
4719
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4720
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);
4721
  exit_invalid[5] = NULL;
4722
  }
4723
else
4724
  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4725
4726
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4727
if (has_cmov)
4728
  {
4729
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4730
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
4731
  exit_invalid[6] = NULL;
4732
  }
4733
else
4734
  exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4735
4736
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4737
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4738
4739
JUMPHERE(buffer_end_close);
4740
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4741
exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4742
4743
/* Two-byte sequence. */
4744
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4745
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4746
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4747
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4748
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4749
exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4750
4751
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4752
jump = JUMP(SLJIT_NOT_ZERO);
4753
4754
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4755
4756
/* Three-byte sequence. */
4757
JUMPHERE(jump);
4758
exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4759
4760
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4761
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4762
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4763
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4764
if (has_cmov)
4765
  {
4766
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4767
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4768
  exit_invalid[10] = NULL;
4769
  }
4770
else
4771
  exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4772
4773
/* One will be substracted from STR_PTR later. */
4774
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4775
4776
/* Four byte sequences are not possible. */
4777
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4778
4779
exit_invalid_label = LABEL();
4780
for (i = 0; i < 11; i++)
4781
  sljit_set_label(exit_invalid[i], exit_invalid_label);
4782
4783
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4784
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4785
}
4786
4787
static void do_utfreadnewline_invalid(compiler_common *common)
4788
{
4789
/* Slow decoding a UTF-8 character, specialized for newlines.
4790
TMP1 contains the first byte of the character (>= 0xc0). Return
4791
char value in TMP1. */
4792
DEFINE_COMPILER;
4793
struct sljit_label *loop;
4794
struct sljit_label *skip_start;
4795
struct sljit_label *three_byte_exit;
4796
struct sljit_jump *jump[5];
4797
4798
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4799
4800
if (common->nltype != NLTYPE_ANY)
4801
  {
4802
  SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4803
4804
  /* All newlines are ascii, just skip intermediate octets. */
4805
  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4806
  loop = LABEL();
4807
  if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4808
    sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4809
  else
4810
    {
4811
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4812
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4813
    }
4814
4815
  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4816
  CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4817
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4818
4819
  JUMPHERE(jump[0]);
4820
4821
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4822
  OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4823
  return;
4824
  }
4825
4826
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4827
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4828
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4829
4830
jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4831
jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4832
4833
skip_start = LABEL();
4834
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4835
jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4836
4837
/* Skip intermediate octets. */
4838
loop = LABEL();
4839
jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4840
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4841
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4842
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4843
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4844
4845
JUMPHERE(jump[3]);
4846
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4847
4848
three_byte_exit = LABEL();
4849
JUMPHERE(jump[0]);
4850
JUMPHERE(jump[4]);
4851
4852
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4853
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4854
4855
/* Two byte long newline: 0x85. */
4856
JUMPHERE(jump[1]);
4857
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4858
4859
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4860
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4861
4862
/* Three byte long newlines: 0x2028 and 0x2029. */
4863
JUMPHERE(jump[2]);
4864
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4865
CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4866
4867
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4868
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4869
4870
OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4871
CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4872
4873
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4874
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4875
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4876
}
4877
4878
static void do_utfmoveback_invalid(compiler_common *common)
4879
{
4880
/* Goes one character back. */
4881
DEFINE_COMPILER;
4882
sljit_s32 i;
4883
struct sljit_jump *jump;
4884
struct sljit_jump *buffer_start_close;
4885
struct sljit_label *exit_ok_label;
4886
struct sljit_label *exit_invalid_label;
4887
struct sljit_jump *exit_invalid[7];
4888
4889
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4890
4891
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4892
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4893
4894
/* Two-byte sequence. */
4895
buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4896
4897
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4898
4899
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4900
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4901
4902
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4903
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4904
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4905
4906
/* Three-byte sequence. */
4907
JUMPHERE(jump);
4908
exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4909
4910
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4911
4912
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4913
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4914
4915
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4916
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4917
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4918
4919
/* Four-byte sequence. */
4920
JUMPHERE(jump);
4921
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4922
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4923
4924
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4925
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4926
exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4927
4928
exit_ok_label = LABEL();
4929
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4930
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4931
4932
/* Two-byte sequence. */
4933
JUMPHERE(buffer_start_close);
4934
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4935
4936
exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4937
4938
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4939
4940
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4941
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4942
4943
/* Three-byte sequence. */
4944
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4945
exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4946
exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4947
4948
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4949
4950
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4951
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4952
4953
/* Four-byte sequences are not possible. */
4954
4955
exit_invalid_label = LABEL();
4956
sljit_set_label(exit_invalid[5], exit_invalid_label);
4957
sljit_set_label(exit_invalid[6], exit_invalid_label);
4958
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4959
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4960
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4961
4962
JUMPHERE(exit_invalid[4]);
4963
/* -2 + 4 = 2 */
4964
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4965
4966
exit_invalid_label = LABEL();
4967
for (i = 0; i < 4; i++)
4968
  sljit_set_label(exit_invalid[i], exit_invalid_label);
4969
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4970
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4971
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4972
}
4973
4974
static void do_utfpeakcharback(compiler_common *common)
4975
{
4976
/* Peak a character back. Does not modify STR_PTR. */
4977
DEFINE_COMPILER;
4978
struct sljit_jump *jump[2];
4979
4980
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4981
4982
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4983
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4984
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4985
4986
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4987
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4988
jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4989
4990
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4991
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4992
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4993
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4994
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4995
4996
JUMPHERE(jump[1]);
4997
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4998
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4999
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5000
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5001
5002
JUMPHERE(jump[0]);
5003
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5004
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5005
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5006
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5007
5008
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5009
}
5010
5011
static void do_utfpeakcharback_invalid(compiler_common *common)
5012
{
5013
/* Peak a character back. Does not modify STR_PTR. */
5014
DEFINE_COMPILER;
5015
sljit_s32 i;
5016
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
5017
struct sljit_jump *jump[2];
5018
struct sljit_label *two_byte_entry;
5019
struct sljit_label *three_byte_entry;
5020
struct sljit_label *exit_invalid_label;
5021
struct sljit_jump *exit_invalid[8];
5022
5023
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5024
5025
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
5026
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5027
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5028
5029
/* Two-byte sequence. */
5030
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5031
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5032
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
5033
5034
two_byte_entry = LABEL();
5035
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5036
/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
5037
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5038
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5039
5040
JUMPHERE(jump[1]);
5041
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5042
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5043
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5044
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5045
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5046
5047
/* Three-byte sequence. */
5048
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5049
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5050
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
5051
5052
three_byte_entry = LABEL();
5053
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5054
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5055
5056
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5057
if (has_cmov)
5058
  {
5059
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5060
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);
5061
  exit_invalid[2] = NULL;
5062
  }
5063
else
5064
  exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5065
5066
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5067
if (has_cmov)
5068
  {
5069
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5070
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
5071
  exit_invalid[3] = NULL;
5072
  }
5073
else
5074
  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5075
5076
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5077
5078
JUMPHERE(jump[1]);
5079
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
5080
exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5081
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5082
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5083
5084
/* Four-byte sequence. */
5085
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5086
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5087
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5088
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
5089
/* ADD is used instead of OR because of the SUB 0x10000 above. */
5090
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5091
5092
if (has_cmov)
5093
  {
5094
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5095
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
5096
  exit_invalid[5] = NULL;
5097
  }
5098
else
5099
  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5100
5101
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5102
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5103
5104
JUMPHERE(jump[0]);
5105
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5106
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5107
5108
/* Two-byte sequence. */
5109
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5110
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5111
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5112
5113
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5114
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5115
exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5116
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5117
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5118
5119
/* Three-byte sequence. */
5120
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5121
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5122
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
5123
5124
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5125
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5126
5127
JUMPHERE(jump[0]);
5128
exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
5129
5130
/* Two-byte sequence. */
5131
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5132
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5133
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5134
5135
exit_invalid_label = LABEL();
5136
for (i = 0; i < 8; i++)
5137
  sljit_set_label(exit_invalid[i], exit_invalid_label);
5138
5139
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5140
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5141
}
5142
5143
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5144
5145
#if PCRE2_CODE_UNIT_WIDTH == 16
5146
5147
static void do_utfreadchar_invalid(compiler_common *common)
5148
{
5149
/* Slow decoding a UTF-16 character. TMP1 contains the first half
5150
of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
5151
undefined for invalid characters. */
5152
DEFINE_COMPILER;
5153
struct sljit_jump *exit_invalid[3];
5154
5155
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5156
5157
/* TMP2 contains the high surrogate. */
5158
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5159
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5160
5161
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5162
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5163
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5164
5165
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5166
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5167
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5168
5169
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5170
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5171
5172
JUMPHERE(exit_invalid[0]);
5173
JUMPHERE(exit_invalid[1]);
5174
JUMPHERE(exit_invalid[2]);
5175
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5176
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5177
}
5178
5179
static void do_utfreadnewline_invalid(compiler_common *common)
5180
{
5181
/* Slow decoding a UTF-16 character, specialized for newlines.
5182
TMP1 contains the first half of the character (>= 0xd800). Return
5183
char value in TMP1. */
5184
5185
DEFINE_COMPILER;
5186
struct sljit_jump *exit_invalid[2];
5187
5188
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5189
5190
/* TMP2 contains the high surrogate. */
5191
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5192
5193
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5194
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5195
5196
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5197
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5198
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5199
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5200
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5201
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5202
5203
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5204
5205
JUMPHERE(exit_invalid[0]);
5206
JUMPHERE(exit_invalid[1]);
5207
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5208
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5209
}
5210
5211
static void do_utfmoveback_invalid(compiler_common *common)
5212
{
5213
/* Goes one character back. */
5214
DEFINE_COMPILER;
5215
struct sljit_jump *exit_invalid[3];
5216
5217
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5218
5219
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5220
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5221
5222
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5223
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5224
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5225
5226
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5227
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5228
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5229
5230
JUMPHERE(exit_invalid[0]);
5231
JUMPHERE(exit_invalid[1]);
5232
JUMPHERE(exit_invalid[2]);
5233
5234
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5235
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5236
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5237
}
5238
5239
static void do_utfpeakcharback_invalid(compiler_common *common)
5240
{
5241
/* Peak a character back. Does not modify STR_PTR. */
5242
DEFINE_COMPILER;
5243
struct sljit_jump *jump;
5244
struct sljit_jump *exit_invalid[3];
5245
5246
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5247
5248
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5249
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5250
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5251
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5252
5253
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5254
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5255
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5256
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5257
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5258
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5259
5260
JUMPHERE(jump);
5261
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5262
5263
JUMPHERE(exit_invalid[0]);
5264
JUMPHERE(exit_invalid[1]);
5265
JUMPHERE(exit_invalid[2]);
5266
5267
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5268
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5269
}
5270
5271
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5272
5273
/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5274
#define UCD_BLOCK_MASK 127
5275
#define UCD_BLOCK_SHIFT 7
5276
5277
static void do_getucd(compiler_common *common)
5278
{
5279
/* Search the UCD record for the character comes in TMP1.
5280
Returns chartype in TMP1 and UCD offset in TMP2. */
5281
DEFINE_COMPILER;
5282
#if PCRE2_CODE_UNIT_WIDTH == 32
5283
struct sljit_jump *jump;
5284
#endif
5285
5286
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5287
/* dummy_ucd_record */
5288
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5289
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5290
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5291
#endif
5292
5293
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5294
5295
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5296
5297
#if PCRE2_CODE_UNIT_WIDTH == 32
5298
if (!common->utf)
5299
  {
5300
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5301
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5302
  JUMPHERE(jump);
5303
  }
5304
#endif
5305
5306
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5307
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5308
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5309
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5310
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5311
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5312
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5313
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5314
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5315
}
5316
5317
static void do_getucdtype(compiler_common *common)
5318
{
5319
/* Search the UCD record for the character comes in TMP1.
5320
Returns chartype in TMP1 and UCD offset in TMP2. */
5321
DEFINE_COMPILER;
5322
#if PCRE2_CODE_UNIT_WIDTH == 32
5323
struct sljit_jump *jump;
5324
#endif
5325
5326
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5327
/* dummy_ucd_record */
5328
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5329
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5330
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5331
#endif
5332
5333
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5334
5335
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5336
5337
#if PCRE2_CODE_UNIT_WIDTH == 32
5338
if (!common->utf)
5339
  {
5340
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5341
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5342
  JUMPHERE(jump);
5343
  }
5344
#endif
5345
5346
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5347
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5348
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5349
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5350
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5351
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5352
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5353
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5354
5355
/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5356
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5357
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5358
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5359
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5360
5361
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5362
}
5363
5364
#endif /* SUPPORT_UNICODE */
5365
5366
static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5367
{
5368
DEFINE_COMPILER;
5369
struct sljit_label *mainloop;
5370
struct sljit_label *newlinelabel = NULL;
5371
struct sljit_jump *start;
5372
struct sljit_jump *end = NULL;
5373
struct sljit_jump *end2 = NULL;
5374
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5375
struct sljit_label *loop;
5376
struct sljit_jump *jump;
5377
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5378
jump_list *newline = NULL;
5379
sljit_u32 overall_options = common->re->overall_options;
5380
BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5381
BOOL newlinecheck = FALSE;
5382
BOOL readuchar = FALSE;
5383
5384
if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5385
    && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5386
  newlinecheck = TRUE;
5387
5388
SLJIT_ASSERT(common->abort_label == NULL);
5389
5390
if ((overall_options & PCRE2_FIRSTLINE) != 0)
5391
  {
5392
  /* Search for the end of the first line. */
5393
  SLJIT_ASSERT(common->match_end_ptr != 0);
5394
  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5395
5396
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5397
    {
5398
    mainloop = LABEL();
5399
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5400
    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5401
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5402
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5403
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5404
    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5405
    JUMPHERE(end);
5406
    OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5407
    }
5408
  else
5409
    {
5410
    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5411
    mainloop = LABEL();
5412
    /* Continual stores does not cause data dependency. */
5413
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5414
    read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5415
    check_newlinechar(common, common->nltype, &newline, TRUE);
5416
    CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5417
    JUMPHERE(end);
5418
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5419
    set_jumps(newline, LABEL());
5420
    }
5421
5422
  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5423
  }
5424
else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5425
  {
5426
  /* Check whether offset limit is set and valid. */
5427
  SLJIT_ASSERT(common->match_end_ptr != 0);
5428
5429
  if (HAS_VIRTUAL_REGISTERS)
5430
    {
5431
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5432
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5433
    }
5434
  else
5435
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5436
5437
  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5438
  end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5439
  if (HAS_VIRTUAL_REGISTERS)
5440
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5441
  else
5442
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5443
5444
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5445
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5446
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5447
  if (HAS_VIRTUAL_REGISTERS)
5448
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5449
5450
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5451
  end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5452
  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5453
  JUMPHERE(end2);
5454
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5455
  add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5456
  JUMPHERE(end);
5457
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5458
  }
5459
5460
start = JUMP(SLJIT_JUMP);
5461
5462
if (newlinecheck)
5463
  {
5464
  newlinelabel = LABEL();
5465
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5466
  end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5467
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5468
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5469
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5470
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5471
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5472
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5473
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5474
  end2 = JUMP(SLJIT_JUMP);
5475
  }
5476
5477
mainloop = LABEL();
5478
5479
/* Increasing the STR_PTR here requires one less jump in the most common case. */
5480
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5481
if (common->utf && !common->invalid_utf) readuchar = TRUE;
5482
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5483
if (newlinecheck) readuchar = TRUE;
5484
5485
if (readuchar)
5486
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5487
5488
if (newlinecheck)
5489
  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5490
5491
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5492
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5493
#if PCRE2_CODE_UNIT_WIDTH == 8
5494
if (common->invalid_utf)
5495
  {
5496
  /* Skip continuation code units. */
5497
  loop = LABEL();
5498
  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5499
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5500
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5501
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5502
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5503
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5504
  JUMPHERE(jump);
5505
  }
5506
else if (common->utf)
5507
  {
5508
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5509
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5510
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5511
  JUMPHERE(jump);
5512
  }
5513
#elif PCRE2_CODE_UNIT_WIDTH == 16
5514
if (common->invalid_utf)
5515
  {
5516
  /* Skip continuation code units. */
5517
  loop = LABEL();
5518
  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5519
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5520
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5521
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5522
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5523
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5524
  JUMPHERE(jump);
5525
  }
5526
else if (common->utf)
5527
  {
5528
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5529
5530
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5531
    {
5532
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5533
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5534
    SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
5535
    }
5536
  else
5537
    {
5538
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5539
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5540
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5541
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5542
    }
5543
  }
5544
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5545
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5546
JUMPHERE(start);
5547
5548
if (newlinecheck)
5549
  {
5550
  JUMPHERE(end);
5551
  JUMPHERE(end2);
5552
  }
5553
5554
return mainloop;
5555
}
5556
5557
5558
static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5559
{
5560
sljit_u32 i, count = chars->count;
5561
5562
if (count == 255)
5563
  return;
5564
5565
if (count == 0)
5566
  {
5567
  chars->count = 1;
5568
  chars->chars[0] = chr;
5569
5570
  if (last)
5571
    chars->last_count = 1;
5572
  return;
5573
  }
5574
5575
for (i = 0; i < count; i++)
5576
  if (chars->chars[i] == chr)
5577
    return;
5578
5579
if (count >= MAX_DIFF_CHARS)
5580
  {
5581
  chars->count = 255;
5582
  return;
5583
  }
5584
5585
chars->chars[count] = chr;
5586
chars->count = count + 1;
5587
5588
if (last)
5589
  chars->last_count++;
5590
}
5591
5592
static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5593
{
5594
/* Recursive function, which scans prefix literals. */
5595
BOOL last, any, class, caseless;
5596
int len, repeat, len_save, consumed = 0;
5597
sljit_u32 chr; /* Any unicode character. */
5598
sljit_u8 *bytes, *bytes_end, byte;
5599
PCRE2_SPTR alternative, cc_save, oc;
5600
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5601
PCRE2_UCHAR othercase[4];
5602
#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5603
PCRE2_UCHAR othercase[2];
5604
#else
5605
PCRE2_UCHAR othercase[1];
5606
#endif
5607
5608
repeat = 1;
5609
while (TRUE)
5610
  {
5611
  if (*rec_count == 0)
5612
    return 0;
5613
  (*rec_count)--;
5614
5615
  last = TRUE;
5616
  any = FALSE;
5617
  class = FALSE;
5618
  caseless = FALSE;
5619
5620
  switch (*cc)
5621
    {
5622
    case OP_CHARI:
5623
    caseless = TRUE;
5624
    /* Fall through */
5625
    case OP_CHAR:
5626
    last = FALSE;
5627
    cc++;
5628
    break;
5629
5630
    case OP_SOD:
5631
    case OP_SOM:
5632
    case OP_SET_SOM:
5633
    case OP_NOT_WORD_BOUNDARY:
5634
    case OP_WORD_BOUNDARY:
5635
    case OP_EODN:
5636
    case OP_EOD:
5637
    case OP_CIRC:
5638
    case OP_CIRCM:
5639
    case OP_DOLL:
5640
    case OP_DOLLM:
5641
    case OP_NOT_UCP_WORD_BOUNDARY:
5642
    case OP_UCP_WORD_BOUNDARY:
5643
    /* Zero width assertions. */
5644
    cc++;
5645
    continue;
5646
5647
    case OP_ASSERT:
5648
    case OP_ASSERT_NOT:
5649
    case OP_ASSERTBACK:
5650
    case OP_ASSERTBACK_NOT:
5651
    case OP_ASSERT_NA:
5652
    case OP_ASSERTBACK_NA:
5653
    cc = bracketend(cc);
5654
    continue;
5655
5656
    case OP_PLUSI:
5657
    case OP_MINPLUSI:
5658
    case OP_POSPLUSI:
5659
    caseless = TRUE;
5660
    /* Fall through */
5661
    case OP_PLUS:
5662
    case OP_MINPLUS:
5663
    case OP_POSPLUS:
5664
    cc++;
5665
    break;
5666
5667
    case OP_EXACTI:
5668
    caseless = TRUE;
5669
    /* Fall through */
5670
    case OP_EXACT:
5671
    repeat = GET2(cc, 1);
5672
    last = FALSE;
5673
    cc += 1 + IMM2_SIZE;
5674
    break;
5675
5676
    case OP_QUERYI:
5677
    case OP_MINQUERYI:
5678
    case OP_POSQUERYI:
5679
    caseless = TRUE;
5680
    /* Fall through */
5681
    case OP_QUERY:
5682
    case OP_MINQUERY:
5683
    case OP_POSQUERY:
5684
    len = 1;
5685
    cc++;
5686
#ifdef SUPPORT_UNICODE
5687
    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5688
#endif
5689
    max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5690
    if (max_chars == 0)
5691
      return consumed;
5692
    last = FALSE;
5693
    break;
5694
5695
    case OP_KET:
5696
    cc += 1 + LINK_SIZE;
5697
    continue;
5698
5699
    case OP_ALT:
5700
    cc += GET(cc, 1);
5701
    continue;
5702
5703
    case OP_ONCE:
5704
    case OP_BRA:
5705
    case OP_BRAPOS:
5706
    case OP_CBRA:
5707
    case OP_CBRAPOS:
5708
    alternative = cc + GET(cc, 1);
5709
    while (*alternative == OP_ALT)
5710
      {
5711
      max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5712
      if (max_chars == 0)
5713
        return consumed;
5714
      alternative += GET(alternative, 1);
5715
      }
5716
5717
    if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5718
      cc += IMM2_SIZE;
5719
    cc += 1 + LINK_SIZE;
5720
    continue;
5721
5722
    case OP_CLASS:
5723
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5724
    if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5725
      return consumed;
5726
#endif
5727
    class = TRUE;
5728
    break;
5729
5730
    case OP_NCLASS:
5731
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5732
    if (common->utf) return consumed;
5733
#endif
5734
    class = TRUE;
5735
    break;
5736
5737
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5738
    case OP_XCLASS:
5739
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5740
    if (common->utf) return consumed;
5741
#endif
5742
    any = TRUE;
5743
    cc += GET(cc, 1);
5744
    break;
5745
#endif
5746
5747
    case OP_DIGIT:
5748
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5749
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5750
      return consumed;
5751
#endif
5752
    any = TRUE;
5753
    cc++;
5754
    break;
5755
5756
    case OP_WHITESPACE:
5757
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5758
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5759
      return consumed;
5760
#endif
5761
    any = TRUE;
5762
    cc++;
5763
    break;
5764
5765
    case OP_WORDCHAR:
5766
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5767
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5768
      return consumed;
5769
#endif
5770
    any = TRUE;
5771
    cc++;
5772
    break;
5773
5774
    case OP_NOT:
5775
    case OP_NOTI:
5776
    cc++;
5777
    /* Fall through. */
5778
    case OP_NOT_DIGIT:
5779
    case OP_NOT_WHITESPACE:
5780
    case OP_NOT_WORDCHAR:
5781
    case OP_ANY:
5782
    case OP_ALLANY:
5783
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5784
    if (common->utf) return consumed;
5785
#endif
5786
    any = TRUE;
5787
    cc++;
5788
    break;
5789
5790
#ifdef SUPPORT_UNICODE
5791
    case OP_NOTPROP:
5792
    case OP_PROP:
5793
#if PCRE2_CODE_UNIT_WIDTH != 32
5794
    if (common->utf) return consumed;
5795
#endif
5796
    any = TRUE;
5797
    cc += 1 + 2;
5798
    break;
5799
#endif
5800
5801
    case OP_TYPEEXACT:
5802
    repeat = GET2(cc, 1);
5803
    cc += 1 + IMM2_SIZE;
5804
    continue;
5805
5806
    case OP_NOTEXACT:
5807
    case OP_NOTEXACTI:
5808
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5809
    if (common->utf) return consumed;
5810
#endif
5811
    any = TRUE;
5812
    repeat = GET2(cc, 1);
5813
    cc += 1 + IMM2_SIZE + 1;
5814
    break;
5815
5816
    default:
5817
    return consumed;
5818
    }
5819
5820
  if (any)
5821
    {
5822
    do
5823
      {
5824
      chars->count = 255;
5825
5826
      consumed++;
5827
      if (--max_chars == 0)
5828
        return consumed;
5829
      chars++;
5830
      }
5831
    while (--repeat > 0);
5832
5833
    repeat = 1;
5834
    continue;
5835
    }
5836
5837
  if (class)
5838
    {
5839
    bytes = (sljit_u8*) (cc + 1);
5840
    cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5841
5842
    switch (*cc)
5843
      {
5844
      case OP_CRSTAR:
5845
      case OP_CRMINSTAR:
5846
      case OP_CRPOSSTAR:
5847
      case OP_CRQUERY:
5848
      case OP_CRMINQUERY:
5849
      case OP_CRPOSQUERY:
5850
      max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5851
      if (max_chars == 0)
5852
        return consumed;
5853
      break;
5854
5855
      default:
5856
      case OP_CRPLUS:
5857
      case OP_CRMINPLUS:
5858
      case OP_CRPOSPLUS:
5859
      break;
5860
5861
      case OP_CRRANGE:
5862
      case OP_CRMINRANGE:
5863
      case OP_CRPOSRANGE:
5864
      repeat = GET2(cc, 1);
5865
      if (repeat <= 0)
5866
        return consumed;
5867
      break;
5868
      }
5869
5870
    do
5871
      {
5872
      if (bytes[31] & 0x80)
5873
        chars->count = 255;
5874
      else if (chars->count != 255)
5875
        {
5876
        bytes_end = bytes + 32;
5877
        chr = 0;
5878
        do
5879
          {
5880
          byte = *bytes++;
5881
          SLJIT_ASSERT((chr & 0x7) == 0);
5882
          if (byte == 0)
5883
            chr += 8;
5884
          else
5885
            {
5886
            do
5887
              {
5888
              if ((byte & 0x1) != 0)
5889
                add_prefix_char(chr, chars, TRUE);
5890
              byte >>= 1;
5891
              chr++;
5892
              }
5893
            while (byte != 0);
5894
            chr = (chr + 7) & (sljit_u32)(~7);
5895
            }
5896
          }
5897
        while (chars->count != 255 && bytes < bytes_end);
5898
        bytes = bytes_end - 32;
5899
        }
5900
5901
      consumed++;
5902
      if (--max_chars == 0)
5903
        return consumed;
5904
      chars++;
5905
      }
5906
    while (--repeat > 0);
5907
5908
    switch (*cc)
5909
      {
5910
      case OP_CRSTAR:
5911
      case OP_CRMINSTAR:
5912
      case OP_CRPOSSTAR:
5913
      return consumed;
5914
5915
      case OP_CRQUERY:
5916
      case OP_CRMINQUERY:
5917
      case OP_CRPOSQUERY:
5918
      cc++;
5919
      break;
5920
5921
      case OP_CRRANGE:
5922
      case OP_CRMINRANGE:
5923
      case OP_CRPOSRANGE:
5924
      if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5925
        return consumed;
5926
      cc += 1 + 2 * IMM2_SIZE;
5927
      break;
5928
      }
5929
5930
    repeat = 1;
5931
    continue;
5932
    }
5933
5934
  len = 1;
5935
#ifdef SUPPORT_UNICODE
5936
  if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5937
#endif
5938
5939
  if (caseless && char_has_othercase(common, cc))
5940
    {
5941
#ifdef SUPPORT_UNICODE
5942
    if (common->utf)
5943
      {
5944
      GETCHAR(chr, cc);
5945
      if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5946
        return consumed;
5947
      }
5948
    else
5949
#endif
5950
      {
5951
      chr = *cc;
5952
#ifdef SUPPORT_UNICODE
5953
      if (common->ucp && chr > 127)
5954
        {
5955
        chr = UCD_OTHERCASE(chr);
5956
        othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc;
5957
        }
5958
      else
5959
#endif
5960
        othercase[0] = TABLE_GET(chr, common->fcc, chr);
5961
      }
5962
    }
5963
  else
5964
    {
5965
    caseless = FALSE;
5966
    othercase[0] = 0; /* Stops compiler warning - PH */
5967
    }
5968
5969
  len_save = len;
5970
  cc_save = cc;
5971
  while (TRUE)
5972
    {
5973
    oc = othercase;
5974
    do
5975
      {
5976
      len--;
5977
      consumed++;
5978
5979
      chr = *cc;
5980
      add_prefix_char(*cc, chars, len == 0);
5981
5982
      if (caseless)
5983
        add_prefix_char(*oc, chars, len == 0);
5984
5985
      if (--max_chars == 0)
5986
        return consumed;
5987
      chars++;
5988
      cc++;
5989
      oc++;
5990
      }
5991
    while (len > 0);
5992
5993
    if (--repeat == 0)
5994
      break;
5995
5996
    len = len_save;
5997
    cc = cc_save;
5998
    }
5999
6000
  repeat = 1;
6001
  if (last)
6002
    return consumed;
6003
  }
6004
}
6005
6006
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6007
static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
6008
{
6009
#if PCRE2_CODE_UNIT_WIDTH == 8
6010
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
6011
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
6012
#elif PCRE2_CODE_UNIT_WIDTH == 16
6013
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
6014
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
6015
#else
6016
#error "Unknown code width"
6017
#endif
6018
}
6019
#endif
6020
6021
#include "pcre2_jit_simd_inc.h"
6022
6023
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6024
6025
static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
6026
{
6027
  sljit_s32 i, j, max_i = 0, max_j = 0;
6028
  sljit_u32 max_pri = 0;
6029
  sljit_s32 max_offset = max_fast_forward_char_pair_offset();
6030
  PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
6031
6032
  for (i = max - 1; i >= 1; i--)
6033
    {
6034
    if (chars[i].last_count > 2)
6035
      {
6036
      a1 = chars[i].chars[0];
6037
      a2 = chars[i].chars[1];
6038
      a_pri = chars[i].last_count;
6039
6040
      j = i - max_offset;
6041
      if (j < 0)
6042
        j = 0;
6043
6044
      while (j < i)
6045
        {
6046
        b_pri = chars[j].last_count;
6047
        if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
6048
          {
6049
          b1 = chars[j].chars[0];
6050
          b2 = chars[j].chars[1];
6051
6052
          if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
6053
            {
6054
            max_pri = a_pri + b_pri;
6055
            max_i = i;
6056
            max_j = j;
6057
            }
6058
          }
6059
        j++;
6060
        }
6061
      }
6062
    }
6063
6064
if (max_pri == 0)
6065
  return FALSE;
6066
6067
fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
6068
return TRUE;
6069
}
6070
6071
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6072
6073
static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
6074
{
6075
DEFINE_COMPILER;
6076
struct sljit_label *start;
6077
struct sljit_jump *match;
6078
struct sljit_jump *partial_quit;
6079
PCRE2_UCHAR mask;
6080
BOOL has_match_end = (common->match_end_ptr != 0);
6081
6082
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
6083
6084
if (has_match_end)
6085
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6086
6087
if (offset > 0)
6088
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6089
6090
if (has_match_end)
6091
  {
6092
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6093
6094
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6095
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6096
  SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6097
  }
6098
6099
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6100
6101
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
6102
  {
6103
  fast_forward_char_simd(common, char1, char2, offset);
6104
6105
  if (offset > 0)
6106
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6107
6108
  if (has_match_end)
6109
    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6110
  return;
6111
  }
6112
6113
#endif
6114
6115
start = LABEL();
6116
6117
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6118
if (common->mode == PCRE2_JIT_COMPLETE)
6119
  add_jump(compiler, &common->failed_match, partial_quit);
6120
6121
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6122
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6123
6124
if (char1 == char2)
6125
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
6126
else
6127
  {
6128
  mask = char1 ^ char2;
6129
  if (is_powerof2(mask))
6130
    {
6131
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6132
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
6133
    }
6134
  else
6135
    {
6136
    match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
6137
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
6138
    JUMPHERE(match);
6139
    }
6140
  }
6141
6142
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6143
if (common->utf && offset > 0)
6144
  {
6145
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
6146
  jumpto_if_not_utf_char_start(compiler, TMP1, start);
6147
  }
6148
#endif
6149
6150
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6151
6152
if (common->mode != PCRE2_JIT_COMPLETE)
6153
  JUMPHERE(partial_quit);
6154
6155
if (has_match_end)
6156
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6157
}
6158
6159
static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6160
{
6161
DEFINE_COMPILER;
6162
struct sljit_label *start;
6163
struct sljit_jump *match;
6164
fast_forward_char_data chars[MAX_N_CHARS];
6165
sljit_s32 offset;
6166
PCRE2_UCHAR mask;
6167
PCRE2_UCHAR *char_set, *char_set_end;
6168
int i, max, from;
6169
int range_right = -1, range_len;
6170
sljit_u8 *update_table = NULL;
6171
BOOL in_range;
6172
sljit_u32 rec_count;
6173
6174
for (i = 0; i < MAX_N_CHARS; i++)
6175
  {
6176
  chars[i].count = 0;
6177
  chars[i].last_count = 0;
6178
  }
6179
6180
rec_count = 10000;
6181
max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6182
6183
if (max < 1)
6184
  return FALSE;
6185
6186
/* Convert last_count to priority. */
6187
for (i = 0; i < max; i++)
6188
  {
6189
  SLJIT_ASSERT(chars[i].last_count <= chars[i].count);
6190
6191
  switch (chars[i].count)
6192
    {
6193
    case 0:
6194
    chars[i].count = 255;
6195
    chars[i].last_count = 0;
6196
    break;
6197
6198
    case 1:
6199
    chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6200
    /* Simplifies algorithms later. */
6201
    chars[i].chars[1] = chars[i].chars[0];
6202
    break;
6203
6204
    case 2:
6205
    SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6206
6207
    if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6208
      chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6209
    else
6210
      chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6211
    break;
6212
6213
    default:
6214
    chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6215
    break;
6216
    }
6217
  }
6218
6219
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6220
if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6221
  return TRUE;
6222
#endif
6223
6224
in_range = FALSE;
6225
/* Prevent compiler "uninitialized" warning */
6226
from = 0;
6227
range_len = 4 /* minimum length */ - 1;
6228
for (i = 0; i <= max; i++)
6229
  {
6230
  if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6231
    {
6232
    range_len = i - from;
6233
    range_right = i - 1;
6234
    }
6235
6236
  if (i < max && chars[i].count < 255)
6237
    {
6238
    SLJIT_ASSERT(chars[i].count > 0);
6239
    if (!in_range)
6240
      {
6241
      in_range = TRUE;
6242
      from = i;
6243
      }
6244
    }
6245
  else
6246
    in_range = FALSE;
6247
  }
6248
6249
if (range_right >= 0)
6250
  {
6251
  update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6252
  if (update_table == NULL)
6253
    return TRUE;
6254
  memset(update_table, IN_UCHARS(range_len), 256);
6255
6256
  for (i = 0; i < range_len; i++)
6257
    {
6258
    SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6259
6260
    char_set = chars[range_right - i].chars;
6261
    char_set_end = char_set + chars[range_right - i].count;
6262
    do
6263
      {
6264
      if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6265
        update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6266
      char_set++;
6267
      }
6268
    while (char_set < char_set_end);
6269
    }
6270
  }
6271
6272
offset = -1;
6273
/* Scan forward. */
6274
for (i = 0; i < max; i++)
6275
  {
6276
  if (range_right == i)
6277
    continue;
6278
6279
  if (offset == -1)
6280
    {
6281
    if (chars[i].last_count >= 2)
6282
      offset = i;
6283
    }
6284
  else if (chars[offset].last_count < chars[i].last_count)
6285
    offset = i;
6286
  }
6287
6288
SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6289
6290
if (range_right < 0)
6291
  {
6292
  if (offset < 0)
6293
    return FALSE;
6294
  /* Works regardless the value is 1 or 2. */
6295
  fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6296
  return TRUE;
6297
  }
6298
6299
SLJIT_ASSERT(range_right != offset);
6300
6301
if (common->match_end_ptr != 0)
6302
  {
6303
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6304
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6305
  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6306
  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6307
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6308
  SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6309
  }
6310
else
6311
  {
6312
  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6313
  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6314
  }
6315
6316
SLJIT_ASSERT(range_right >= 0);
6317
6318
if (!HAS_VIRTUAL_REGISTERS)
6319
  OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6320
6321
start = LABEL();
6322
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6323
6324
#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6325
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6326
#else
6327
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6328
#endif
6329
6330
if (!HAS_VIRTUAL_REGISTERS)
6331
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6332
else
6333
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6334
6335
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6336
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6337
6338
if (offset >= 0)
6339
  {
6340
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6341
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6342
6343
  if (chars[offset].count == 1)
6344
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6345
  else
6346
    {
6347
    mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6348
    if (is_powerof2(mask))
6349
      {
6350
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6351
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6352
      }
6353
    else
6354
      {
6355
      match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6356
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6357
      JUMPHERE(match);
6358
      }
6359
    }
6360
  }
6361
6362
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6363
if (common->utf && offset != 0)
6364
  {
6365
  if (offset < 0)
6366
    {
6367
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6368
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6369
    }
6370
  else
6371
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6372
6373
  jumpto_if_not_utf_char_start(compiler, TMP1, start);
6374
6375
  if (offset < 0)
6376
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6377
  }
6378
#endif
6379
6380
if (offset >= 0)
6381
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6382
6383
if (common->match_end_ptr != 0)
6384
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6385
else
6386
  OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6387
return TRUE;
6388
}
6389
6390
static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6391
{
6392
PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6393
PCRE2_UCHAR oc;
6394
6395
oc = first_char;
6396
if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6397
  {
6398
  oc = TABLE_GET(first_char, common->fcc, first_char);
6399
#if defined SUPPORT_UNICODE
6400
  if (first_char > 127 && (common->utf || common->ucp))
6401
    oc = UCD_OTHERCASE(first_char);
6402
#endif
6403
  }
6404
6405
fast_forward_first_char2(common, first_char, oc, 0);
6406
}
6407
6408
static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6409
{
6410
DEFINE_COMPILER;
6411
struct sljit_label *loop;
6412
struct sljit_jump *lastchar = NULL;
6413
struct sljit_jump *firstchar;
6414
struct sljit_jump *quit = NULL;
6415
struct sljit_jump *foundcr = NULL;
6416
struct sljit_jump *notfoundnl;
6417
jump_list *newline = NULL;
6418
6419
if (common->match_end_ptr != 0)
6420
  {
6421
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6422
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6423
  }
6424
6425
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6426
  {
6427
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6428
  if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6429
    {
6430
    if (HAS_VIRTUAL_REGISTERS)
6431
      {
6432
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6433
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6434
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6435
      }
6436
    else
6437
      {
6438
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6439
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6440
      }
6441
    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6442
6443
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6444
    OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6445
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6446
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6447
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6448
#endif
6449
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6450
6451
    fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6452
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6453
    }
6454
  else
6455
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6456
    {
6457
    lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6458
    if (HAS_VIRTUAL_REGISTERS)
6459
      {
6460
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6461
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6462
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6463
      }
6464
    else
6465
      {
6466
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6467
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6468
      }
6469
    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6470
6471
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6472
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6473
    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6474
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6475
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6476
#endif
6477
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6478
6479
    loop = LABEL();
6480
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6481
    quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6482
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6483
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6484
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6485
    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6486
6487
    JUMPHERE(quit);
6488
    JUMPHERE(lastchar);
6489
    }
6490
6491
  JUMPHERE(firstchar);
6492
6493
  if (common->match_end_ptr != 0)
6494
    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6495
  return;
6496
  }
6497
6498
if (HAS_VIRTUAL_REGISTERS)
6499
  {
6500
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6501
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6502
  }
6503
else
6504
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6505
6506
/* Example: match /^/ to \r\n from offset 1. */
6507
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6508
6509
if (common->nltype == NLTYPE_ANY)
6510
  move_back(common, NULL, FALSE);
6511
else
6512
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6513
6514
loop = LABEL();
6515
common->ff_newline_shortcut = loop;
6516
6517
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6518
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6519
  {
6520
  if (common->nltype == NLTYPE_ANYCRLF)
6521
    {
6522
    fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6523
    if (common->mode != PCRE2_JIT_COMPLETE)
6524
      lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6525
6526
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6527
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6528
    quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6529
    }
6530
   else
6531
    {
6532
    fast_forward_char_simd(common, common->newline, common->newline, 0);
6533
6534
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6535
    if (common->mode != PCRE2_JIT_COMPLETE)
6536
      {
6537
      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6538
      SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
6539
      }
6540
    }
6541
  }
6542
else
6543
#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6544
  {
6545
  read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6546
  lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6547
  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6548
    foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6549
  check_newlinechar(common, common->nltype, &newline, FALSE);
6550
  set_jumps(newline, loop);
6551
  }
6552
6553
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6554
  {
6555
  if (quit == NULL)
6556
    {
6557
    quit = JUMP(SLJIT_JUMP);
6558
    JUMPHERE(foundcr);
6559
    }
6560
6561
  notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6562
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6563
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6564
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6565
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6566
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6567
#endif
6568
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6569
  JUMPHERE(notfoundnl);
6570
  JUMPHERE(quit);
6571
  }
6572
6573
if (lastchar)
6574
  JUMPHERE(lastchar);
6575
JUMPHERE(firstchar);
6576
6577
if (common->match_end_ptr != 0)
6578
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6579
}
6580
6581
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6582
6583
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6584
{
6585
DEFINE_COMPILER;
6586
const sljit_u8 *start_bits = common->re->start_bitmap;
6587
struct sljit_label *start;
6588
struct sljit_jump *partial_quit;
6589
#if PCRE2_CODE_UNIT_WIDTH != 8
6590
struct sljit_jump *found = NULL;
6591
#endif
6592
jump_list *matches = NULL;
6593
6594
if (common->match_end_ptr != 0)
6595
  {
6596
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6597
  OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6598
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6599
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6600
  SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6601
  }
6602
6603
start = LABEL();
6604
6605
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6606
if (common->mode == PCRE2_JIT_COMPLETE)
6607
  add_jump(compiler, &common->failed_match, partial_quit);
6608
6609
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6610
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6611
6612
if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6613
  {
6614
#if PCRE2_CODE_UNIT_WIDTH != 8
6615
  if ((start_bits[31] & 0x80) != 0)
6616
    found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6617
  else
6618
    CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6619
#elif defined SUPPORT_UNICODE
6620
  if (common->utf && is_char7_bitset(start_bits, FALSE))
6621
    CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6622
#endif
6623
  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6624
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6625
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6626
  if (!HAS_VIRTUAL_REGISTERS)
6627
    {
6628
    OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6629
    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6630
    }
6631
  else
6632
    {
6633
    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6634
    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6635
    }
6636
  JUMPTO(SLJIT_ZERO, start);
6637
  }
6638
else
6639
  set_jumps(matches, start);
6640
6641
#if PCRE2_CODE_UNIT_WIDTH != 8
6642
if (found != NULL)
6643
  JUMPHERE(found);
6644
#endif
6645
6646
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6647
6648
if (common->mode != PCRE2_JIT_COMPLETE)
6649
  JUMPHERE(partial_quit);
6650
6651
if (common->match_end_ptr != 0)
6652
  OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6653
}
6654
6655
static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6656
{
6657
DEFINE_COMPILER;
6658
struct sljit_label *loop;
6659
struct sljit_jump *toolong;
6660
struct sljit_jump *already_found;
6661
struct sljit_jump *found;
6662
struct sljit_jump *found_oc = NULL;
6663
jump_list *not_found = NULL;
6664
sljit_u32 oc, bit;
6665
6666
SLJIT_ASSERT(common->req_char_ptr != 0);
6667
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6668
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6669
toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6670
already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6671
6672
if (has_firstchar)
6673
  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6674
else
6675
  OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6676
6677
oc = req_char;
6678
if (caseless)
6679
  {
6680
  oc = TABLE_GET(req_char, common->fcc, req_char);
6681
#if defined SUPPORT_UNICODE
6682
  if (req_char > 127 && (common->utf || common->ucp))
6683
    oc = UCD_OTHERCASE(req_char);
6684
#endif
6685
  }
6686
6687
#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6688
if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6689
  {
6690
  not_found = fast_requested_char_simd(common, req_char, oc);
6691
  }
6692
else
6693
#endif
6694
  {
6695
  loop = LABEL();
6696
  add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6697
6698
  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6699
6700
  if (req_char == oc)
6701
    found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6702
  else
6703
    {
6704
    bit = req_char ^ oc;
6705
    if (is_powerof2(bit))
6706
      {
6707
       OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6708
      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6709
      }
6710
    else
6711
      {
6712
      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6713
      found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6714
      }
6715
    }
6716
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6717
  JUMPTO(SLJIT_JUMP, loop);
6718
6719
  JUMPHERE(found);
6720
  if (found_oc)
6721
    JUMPHERE(found_oc);
6722
  }
6723
6724
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6725
6726
JUMPHERE(already_found);
6727
JUMPHERE(toolong);
6728
return not_found;
6729
}
6730
6731
static void do_revertframes(compiler_common *common)
6732
{
6733
DEFINE_COMPILER;
6734
struct sljit_jump *jump;
6735
struct sljit_label *mainloop;
6736
6737
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
6738
GET_LOCAL_BASE(TMP1, 0, 0);
6739
6740
/* Drop frames until we reach STACK_TOP. */
6741
mainloop = LABEL();
6742
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
6743
OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
6744
jump = JUMP(SLJIT_SIG_LESS_EQUAL);
6745
6746
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6747
if (HAS_VIRTUAL_REGISTERS)
6748
  {
6749
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6750
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6751
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6752
  }
6753
else
6754
  {
6755
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6756
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6757
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6758
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6759
  GET_LOCAL_BASE(TMP1, 0, 0);
6760
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6761
  }
6762
JUMPTO(SLJIT_JUMP, mainloop);
6763
6764
JUMPHERE(jump);
6765
sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);
6766
jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);
6767
/* End of reverting values. */
6768
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6769
6770
JUMPHERE(jump);
6771
OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
6772
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6773
if (HAS_VIRTUAL_REGISTERS)
6774
  {
6775
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6776
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6777
  }
6778
else
6779
  {
6780
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6781
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6782
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6783
  }
6784
JUMPTO(SLJIT_JUMP, mainloop);
6785
}
6786
6787
#ifdef SUPPORT_UNICODE
6788
#define UCPCAT(bit) (1 << (bit))
6789
#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))
6790
#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
6791
#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
6792
#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
6793
#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
6794
#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)
6795
#endif
6796
6797
static void check_wordboundary(compiler_common *common, BOOL ucp)
6798
{
6799
DEFINE_COMPILER;
6800
struct sljit_jump *skipread;
6801
jump_list *skipread_list = NULL;
6802
#ifdef SUPPORT_UNICODE
6803
struct sljit_label *valid_utf;
6804
jump_list *invalid_utf1 = NULL;
6805
#endif /* SUPPORT_UNICODE */
6806
jump_list *invalid_utf2 = NULL;
6807
#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6808
struct sljit_jump *jump;
6809
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6810
6811
SLJIT_UNUSED_ARG(ucp);
6812
SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6813
6814
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6815
/* Get type of the previous char, and put it to TMP3. */
6816
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6817
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6818
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6819
skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6820
6821
#ifdef SUPPORT_UNICODE
6822
if (common->invalid_utf)
6823
  {
6824
  peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6825
6826
  if (common->mode != PCRE2_JIT_COMPLETE)
6827
    {
6828
    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6829
    OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6830
    move_back(common, NULL, TRUE);
6831
    check_start_used_ptr(common);
6832
    OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6833
    OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6834
    }
6835
  }
6836
else
6837
#endif /* SUPPORT_UNICODE */
6838
  {
6839
  if (common->mode == PCRE2_JIT_COMPLETE)
6840
    peek_char_back(common, READ_CHAR_MAX, NULL);
6841
  else
6842
    {
6843
    move_back(common, NULL, TRUE);
6844
    check_start_used_ptr(common);
6845
    read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6846
    }
6847
  }
6848
6849
/* Testing char type. */
6850
#ifdef SUPPORT_UNICODE
6851
if (ucp)
6852
  {
6853
  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6854
  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6855
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6856
  OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);
6857
  }
6858
else
6859
#endif /* SUPPORT_UNICODE */
6860
  {
6861
#if PCRE2_CODE_UNIT_WIDTH != 8
6862
  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6863
#elif defined SUPPORT_UNICODE
6864
  /* Here TMP3 has already been zeroed. */
6865
  jump = NULL;
6866
  if (common->utf)
6867
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6868
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6869
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6870
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6871
  OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6872
#if PCRE2_CODE_UNIT_WIDTH != 8
6873
  JUMPHERE(jump);
6874
#elif defined SUPPORT_UNICODE
6875
  if (jump != NULL)
6876
    JUMPHERE(jump);
6877
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6878
  }
6879
JUMPHERE(skipread);
6880
6881
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6882
check_str_end(common, &skipread_list);
6883
peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6884
6885
/* Testing char type. This is a code duplication. */
6886
#ifdef SUPPORT_UNICODE
6887
6888
valid_utf = LABEL();
6889
6890
if (ucp)
6891
  {
6892
  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6893
  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6894
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6895
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
6896
  }
6897
else
6898
#endif /* SUPPORT_UNICODE */
6899
  {
6900
#if PCRE2_CODE_UNIT_WIDTH != 8
6901
  /* TMP2 may be destroyed by peek_char. */
6902
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6903
  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6904
#elif defined SUPPORT_UNICODE
6905
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6906
  jump = NULL;
6907
  if (common->utf)
6908
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6909
#endif
6910
  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6911
  OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6912
  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6913
#if PCRE2_CODE_UNIT_WIDTH != 8
6914
  JUMPHERE(jump);
6915
#elif defined SUPPORT_UNICODE
6916
  if (jump != NULL)
6917
    JUMPHERE(jump);
6918
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6919
  }
6920
set_jumps(skipread_list, LABEL());
6921
6922
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6923
OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6924
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6925
6926
#ifdef SUPPORT_UNICODE
6927
if (common->invalid_utf)
6928
  {
6929
  set_jumps(invalid_utf1, LABEL());
6930
6931
  peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6932
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6933
6934
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6935
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6936
  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6937
6938
  set_jumps(invalid_utf2, LABEL());
6939
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6940
  OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6941
  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6942
  }
6943
#endif /* SUPPORT_UNICODE */
6944
}
6945
6946
static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6947
{
6948
/* May destroy TMP1. */
6949
DEFINE_COMPILER;
6950
int ranges[MAX_CLASS_RANGE_SIZE];
6951
sljit_u8 bit, cbit, all;
6952
int i, byte, length = 0;
6953
6954
bit = bits[0] & 0x1;
6955
/* All bits will be zero or one (since bit is zero or one). */
6956
all = (sljit_u8)-bit;
6957
6958
for (i = 0; i < 256; )
6959
  {
6960
  byte = i >> 3;
6961
  if ((i & 0x7) == 0 && bits[byte] == all)
6962
    i += 8;
6963
  else
6964
    {
6965
    cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6966
    if (cbit != bit)
6967
      {
6968
      if (length >= MAX_CLASS_RANGE_SIZE)
6969
        return FALSE;
6970
      ranges[length] = i;
6971
      length++;
6972
      bit = cbit;
6973
      all = (sljit_u8)-cbit; /* sign extend bit into byte */
6974
      }
6975
    i++;
6976
    }
6977
  }
6978
6979
if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6980
  {
6981
  if (length >= MAX_CLASS_RANGE_SIZE)
6982
    return FALSE;
6983
  ranges[length] = 256;
6984
  length++;
6985
  }
6986
6987
if (length < 0 || length > 4)
6988
  return FALSE;
6989
6990
bit = bits[0] & 0x1;
6991
if (invert) bit ^= 0x1;
6992
6993
/* No character is accepted. */
6994
if (length == 0 && bit == 0)
6995
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6996
6997
switch(length)
6998
  {
6999
  case 0:
7000
  /* When bit != 0, all characters are accepted. */
7001
  return TRUE;
7002
7003
  case 1:
7004
  add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7005
  return TRUE;
7006
7007
  case 2:
7008
  if (ranges[0] + 1 != ranges[1])
7009
    {
7010
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7011
    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7012
    }
7013
  else
7014
    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7015
  return TRUE;
7016
7017
  case 3:
7018
  if (bit != 0)
7019
    {
7020
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7021
    if (ranges[0] + 1 != ranges[1])
7022
      {
7023
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7024
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7025
      }
7026
    else
7027
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7028
    return TRUE;
7029
    }
7030
7031
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
7032
  if (ranges[1] + 1 != ranges[2])
7033
    {
7034
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
7035
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7036
    }
7037
  else
7038
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
7039
  return TRUE;
7040
7041
  case 4:
7042
  if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
7043
      && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
7044
      && (ranges[1] & (ranges[2] - ranges[0])) == 0
7045
      && is_powerof2(ranges[2] - ranges[0]))
7046
    {
7047
    SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
7048
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
7049
    if (ranges[2] + 1 != ranges[3])
7050
      {
7051
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
7052
      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7053
      }
7054
    else
7055
      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7056
    return TRUE;
7057
    }
7058
7059
  if (bit != 0)
7060
    {
7061
    i = 0;
7062
    if (ranges[0] + 1 != ranges[1])
7063
      {
7064
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7065
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7066
      i = ranges[0];
7067
      }
7068
    else
7069
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7070
7071
    if (ranges[2] + 1 != ranges[3])
7072
      {
7073
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
7074
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7075
      }
7076
    else
7077
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
7078
    return TRUE;
7079
    }
7080
7081
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7082
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
7083
  if (ranges[1] + 1 != ranges[2])
7084
    {
7085
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
7086
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7087
    }
7088
  else
7089
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7090
  return TRUE;
7091
7092
  default:
7093
  SLJIT_UNREACHABLE();
7094
  return FALSE;
7095
  }
7096
}
7097
7098
static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7099
{
7100
/* May destroy TMP1. */
7101
DEFINE_COMPILER;
7102
uint16_t char_list[MAX_CLASS_CHARS_SIZE];
7103
uint8_t byte;
7104
sljit_s32 type;
7105
int i, j, k, len, c;
7106
7107
if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
7108
  return FALSE;
7109
7110
len = 0;
7111
7112
for (i = 0; i < 32; i++)
7113
  {
7114
  byte = bits[i];
7115
7116
  if (nclass)
7117
    byte = (sljit_u8)~byte;
7118
7119
  j = 0;
7120
  while (byte != 0)
7121
    {
7122
    if (byte & 0x1)
7123
      {
7124
      c = i * 8 + j;
7125
7126
      k = len;
7127
7128
      if ((c & 0x20) != 0)
7129
        {
7130
        for (k = 0; k < len; k++)
7131
          if (char_list[k] == c - 0x20)
7132
            {
7133
            char_list[k] |= 0x120;
7134
            break;
7135
            }
7136
        }
7137
7138
      if (k == len)
7139
        {
7140
        if (len >= MAX_CLASS_CHARS_SIZE)
7141
          return FALSE;
7142
7143
        char_list[len++] = (uint16_t) c;
7144
        }
7145
      }
7146
7147
    byte >>= 1;
7148
    j++;
7149
    }
7150
  }
7151
7152
if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
7153
7154
i = 0;
7155
j = 0;
7156
7157
if (char_list[0] == 0)
7158
  {
7159
  i++;
7160
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
7161
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
7162
  }
7163
else
7164
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7165
7166
while (i < len)
7167
  {
7168
  if ((char_list[i] & 0x100) != 0)
7169
    j++;
7170
  else
7171
    {
7172
    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7173
    SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7174
    }
7175
  i++;
7176
  }
7177
7178
if (j != 0)
7179
  {
7180
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7181
7182
  for (i = 0; i < len; i++)
7183
    if ((char_list[i] & 0x100) != 0)
7184
      {
7185
      j--;
7186
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7187
      SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7188
      }
7189
  }
7190
7191
if (invert)
7192
  nclass = !nclass;
7193
7194
type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7195
add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7196
return TRUE;
7197
}
7198
7199
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7200
{
7201
/* May destroy TMP1. */
7202
if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7203
  return TRUE;
7204
return optimize_class_chars(common, bits, nclass, invert, backtracks);
7205
}
7206
7207
static void check_anynewline(compiler_common *common)
7208
{
7209
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7210
DEFINE_COMPILER;
7211
7212
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7213
7214
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7215
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7216
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7217
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7218
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7219
#if PCRE2_CODE_UNIT_WIDTH == 8
7220
if (common->utf)
7221
  {
7222
#endif
7223
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7224
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7225
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7226
#if PCRE2_CODE_UNIT_WIDTH == 8
7227
  }
7228
#endif
7229
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7230
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7231
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7232
}
7233
7234
static void check_hspace(compiler_common *common)
7235
{
7236
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7237
DEFINE_COMPILER;
7238
7239
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7240
7241
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
7242
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7243
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
7244
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7245
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
7246
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7247
#if PCRE2_CODE_UNIT_WIDTH == 8
7248
if (common->utf)
7249
  {
7250
#endif
7251
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7252
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7253
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7254
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7255
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7256
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7257
  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7258
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7259
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7260
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7261
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7262
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7263
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7264
#if PCRE2_CODE_UNIT_WIDTH == 8
7265
  }
7266
#endif
7267
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7268
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7269
7270
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7271
}
7272
7273
static void check_vspace(compiler_common *common)
7274
{
7275
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7276
DEFINE_COMPILER;
7277
7278
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7279
7280
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7281
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7282
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7283
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7284
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7285
#if PCRE2_CODE_UNIT_WIDTH == 8
7286
if (common->utf)
7287
  {
7288
#endif
7289
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7290
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7291
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7292
#if PCRE2_CODE_UNIT_WIDTH == 8
7293
  }
7294
#endif
7295
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7296
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7297
7298
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7299
}
7300
7301
static void do_casefulcmp(compiler_common *common)
7302
{
7303
DEFINE_COMPILER;
7304
struct sljit_jump *jump;
7305
struct sljit_label *label;
7306
int char1_reg;
7307
int char2_reg;
7308
7309
if (HAS_VIRTUAL_REGISTERS)
7310
  {
7311
  char1_reg = STR_END;
7312
  char2_reg = STACK_TOP;
7313
  }
7314
else
7315
  {
7316
  char1_reg = TMP3;
7317
  char2_reg = RETURN_ADDR;
7318
  }
7319
7320
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7321
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7322
7323
if (char1_reg == STR_END)
7324
  {
7325
  OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7326
  OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7327
  }
7328
7329
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7330
  {
7331
  label = LABEL();
7332
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7333
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7334
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7335
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7336
  JUMPTO(SLJIT_NOT_ZERO, label);
7337
7338
  JUMPHERE(jump);
7339
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7340
  }
7341
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7342
  {
7343
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7344
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7345
7346
  label = LABEL();
7347
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7348
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7349
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7350
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7351
  JUMPTO(SLJIT_NOT_ZERO, label);
7352
7353
  JUMPHERE(jump);
7354
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7355
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7356
  }
7357
else
7358
  {
7359
  label = LABEL();
7360
  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7361
  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7362
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7363
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7364
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7365
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7366
  JUMPTO(SLJIT_NOT_ZERO, label);
7367
7368
  JUMPHERE(jump);
7369
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7370
  }
7371
7372
if (char1_reg == STR_END)
7373
  {
7374
  OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7375
  OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7376
  }
7377
7378
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7379
}
7380
7381
static void do_caselesscmp(compiler_common *common)
7382
{
7383
DEFINE_COMPILER;
7384
struct sljit_jump *jump;
7385
struct sljit_label *label;
7386
int char1_reg = STR_END;
7387
int char2_reg;
7388
int lcc_table;
7389
int opt_type = 0;
7390
7391
if (HAS_VIRTUAL_REGISTERS)
7392
  {
7393
  char2_reg = STACK_TOP;
7394
  lcc_table = STACK_LIMIT;
7395
  }
7396
else
7397
  {
7398
  char2_reg = RETURN_ADDR;
7399
  lcc_table = TMP3;
7400
  }
7401
7402
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7403
  opt_type = 1;
7404
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7405
  opt_type = 2;
7406
7407
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7408
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7409
7410
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7411
7412
if (char2_reg == STACK_TOP)
7413
  {
7414
  OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7415
  OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7416
  }
7417
7418
OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7419
7420
if (opt_type == 1)
7421
  {
7422
  label = LABEL();
7423
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7424
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7425
  }
7426
else if (opt_type == 2)
7427
  {
7428
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7429
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7430
7431
  label = LABEL();
7432
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7433
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7434
  }
7435
else
7436
  {
7437
  label = LABEL();
7438
  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7439
  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7440
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7441
  }
7442
7443
#if PCRE2_CODE_UNIT_WIDTH != 8
7444
jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7445
#endif
7446
OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7447
#if PCRE2_CODE_UNIT_WIDTH != 8
7448
JUMPHERE(jump);
7449
jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7450
#endif
7451
OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7452
#if PCRE2_CODE_UNIT_WIDTH != 8
7453
JUMPHERE(jump);
7454
#endif
7455
7456
if (opt_type == 0)
7457
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7458
7459
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7460
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7461
JUMPTO(SLJIT_NOT_ZERO, label);
7462
7463
JUMPHERE(jump);
7464
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7465
7466
if (opt_type == 2)
7467
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7468
7469
if (char2_reg == STACK_TOP)
7470
  {
7471
  OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7472
  OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7473
  }
7474
7475
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7476
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7477
}
7478
7479
static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7480
    compare_context *context, jump_list **backtracks)
7481
{
7482
DEFINE_COMPILER;
7483
unsigned int othercasebit = 0;
7484
PCRE2_SPTR othercasechar = NULL;
7485
#ifdef SUPPORT_UNICODE
7486
int utflength;
7487
#endif
7488
7489
if (caseless && char_has_othercase(common, cc))
7490
  {
7491
  othercasebit = char_get_othercase_bit(common, cc);
7492
  SLJIT_ASSERT(othercasebit);
7493
  /* Extracting bit difference info. */
7494
#if PCRE2_CODE_UNIT_WIDTH == 8
7495
  othercasechar = cc + (othercasebit >> 8);
7496
  othercasebit &= 0xff;
7497
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7498
  /* Note that this code only handles characters in the BMP. If there
7499
  ever are characters outside the BMP whose othercase differs in only one
7500
  bit from itself (there currently are none), this code will need to be
7501
  revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7502
  othercasechar = cc + (othercasebit >> 9);
7503
  if ((othercasebit & 0x100) != 0)
7504
    othercasebit = (othercasebit & 0xff) << 8;
7505
  else
7506
    othercasebit &= 0xff;
7507
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7508
  }
7509
7510
if (context->sourcereg == -1)
7511
  {
7512
#if PCRE2_CODE_UNIT_WIDTH == 8
7513
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7514
  if (context->length >= 4)
7515
    OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7516
  else if (context->length >= 2)
7517
    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7518
  else
7519
#endif
7520
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7521
#elif PCRE2_CODE_UNIT_WIDTH == 16
7522
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7523
  if (context->length >= 4)
7524
    OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7525
  else
7526
#endif
7527
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7528
#elif PCRE2_CODE_UNIT_WIDTH == 32
7529
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7530
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7531
  context->sourcereg = TMP2;
7532
  }
7533
7534
#ifdef SUPPORT_UNICODE
7535
utflength = 1;
7536
if (common->utf && HAS_EXTRALEN(*cc))
7537
  utflength += GET_EXTRALEN(*cc);
7538
7539
do
7540
  {
7541
#endif
7542
7543
  context->length -= IN_UCHARS(1);
7544
#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7545
7546
  /* Unaligned read is supported. */
7547
  if (othercasebit != 0 && othercasechar == cc)
7548
    {
7549
    context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7550
    context->oc.asuchars[context->ucharptr] = othercasebit;
7551
    }
7552
  else
7553
    {
7554
    context->c.asuchars[context->ucharptr] = *cc;
7555
    context->oc.asuchars[context->ucharptr] = 0;
7556
    }
7557
  context->ucharptr++;
7558
7559
#if PCRE2_CODE_UNIT_WIDTH == 8
7560
  if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7561
#else
7562
  if (context->ucharptr >= 2 || context->length == 0)
7563
#endif
7564
    {
7565
    if (context->length >= 4)
7566
      OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7567
    else if (context->length >= 2)
7568
      OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7569
#if PCRE2_CODE_UNIT_WIDTH == 8
7570
    else if (context->length >= 1)
7571
      OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7572
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7573
    context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7574
7575
    switch(context->ucharptr)
7576
      {
7577
      case 4 / sizeof(PCRE2_UCHAR):
7578
      if (context->oc.asint != 0)
7579
        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7580
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7581
      break;
7582
7583
      case 2 / sizeof(PCRE2_UCHAR):
7584
      if (context->oc.asushort != 0)
7585
        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7586
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7587
      break;
7588
7589
#if PCRE2_CODE_UNIT_WIDTH == 8
7590
      case 1:
7591
      if (context->oc.asbyte != 0)
7592
        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7593
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7594
      break;
7595
#endif
7596
7597
      default:
7598
      SLJIT_UNREACHABLE();
7599
      break;
7600
      }
7601
    context->ucharptr = 0;
7602
    }
7603
7604
#else
7605
7606
  /* Unaligned read is unsupported or in 32 bit mode. */
7607
  if (context->length >= 1)
7608
    OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7609
7610
  context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7611
7612
  if (othercasebit != 0 && othercasechar == cc)
7613
    {
7614
    OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7615
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7616
    }
7617
  else
7618
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7619
7620
#endif
7621
7622
  cc++;
7623
#ifdef SUPPORT_UNICODE
7624
  utflength--;
7625
  }
7626
while (utflength > 0);
7627
#endif
7628
7629
return cc;
7630
}
7631
7632
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7633
7634
#define SET_CHAR_OFFSET(value) \
7635
  if ((value) != charoffset) \
7636
    { \
7637
    if ((value) < charoffset) \
7638
      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7639
    else \
7640
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7641
    } \
7642
  charoffset = (value);
7643
7644
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7645
7646
#ifdef SUPPORT_UNICODE
7647
#define XCLASS_SAVE_CHAR 0x001
7648
#define XCLASS_CHAR_SAVED 0x002
7649
#define XCLASS_HAS_TYPE 0x004
7650
#define XCLASS_HAS_SCRIPT 0x008
7651
#define XCLASS_HAS_SCRIPT_EXTENSION 0x010
7652
#define XCLASS_HAS_BOOL 0x020
7653
#define XCLASS_HAS_BIDICL 0x040
7654
#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
7655
#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
7656
#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
7657
#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7658
#endif /* SUPPORT_UNICODE */
7659
7660
static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7661
{
7662
DEFINE_COMPILER;
7663
jump_list *found = NULL;
7664
jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7665
sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7666
struct sljit_jump *jump = NULL;
7667
PCRE2_SPTR ccbegin;
7668
int compares, invertcmp, numberofcmps;
7669
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7670
BOOL utf = common->utf;
7671
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7672
7673
#ifdef SUPPORT_UNICODE
7674
sljit_u32 unicode_status = 0;
7675
sljit_u32 category_list = 0;
7676
sljit_u32 items;
7677
int typereg = TMP1;
7678
const sljit_u32 *other_cases;
7679
#endif /* SUPPORT_UNICODE */
7680
7681
/* Scanning the necessary info. */
7682
cc++;
7683
ccbegin = cc;
7684
compares = 0;
7685
7686
if (cc[-1] & XCL_MAP)
7687
  {
7688
  min = 0;
7689
  cc += 32 / sizeof(PCRE2_UCHAR);
7690
  }
7691
7692
while (*cc != XCL_END)
7693
  {
7694
  compares++;
7695
7696
  if (*cc == XCL_SINGLE)
7697
    {
7698
    cc ++;
7699
    GETCHARINCTEST(c, cc);
7700
    if (c > max) max = c;
7701
    if (c < min) min = c;
7702
#ifdef SUPPORT_UNICODE
7703
    unicode_status |= XCLASS_SAVE_CHAR;
7704
#endif /* SUPPORT_UNICODE */
7705
    }
7706
  else if (*cc == XCL_RANGE)
7707
    {
7708
    cc ++;
7709
    GETCHARINCTEST(c, cc);
7710
    if (c < min) min = c;
7711
    GETCHARINCTEST(c, cc);
7712
    if (c > max) max = c;
7713
#ifdef SUPPORT_UNICODE
7714
    unicode_status |= XCLASS_SAVE_CHAR;
7715
#endif /* SUPPORT_UNICODE */
7716
    }
7717
#ifdef SUPPORT_UNICODE
7718
  else
7719
    {
7720
    SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7721
    cc++;
7722
7723
    if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
7724
      {
7725
      other_cases = PRIV(ucd_caseless_sets) + cc[1];
7726
      while (*other_cases != NOTACHAR)
7727
        {
7728
        if (*other_cases > max) max = *other_cases;
7729
        if (*other_cases < min) min = *other_cases;
7730
        other_cases++;
7731
        }
7732
      }
7733
    else
7734
      {
7735
      max = READ_CHAR_MAX;
7736
      min = 0;
7737
      }
7738
7739
    items = 0;
7740
7741
    switch(*cc)
7742
      {
7743
      case PT_ANY:
7744
      /* Any either accepts everything or ignored. */
7745
      if (cc[-1] == XCL_PROP)
7746
        items = UCPCAT_ALL;
7747
      else
7748
        compares--;
7749
      break;
7750
7751
      case PT_LAMP:
7752
      items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt);
7753
      break;
7754
7755
      case PT_GC:
7756
      items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]);
7757
      break;
7758
7759
      case PT_PC:
7760
      items = UCPCAT(cc[1]);
7761
      break;
7762
7763
      case PT_WORD:
7764
      items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N;
7765
      break;
7766
7767
      case PT_ALNUM:
7768
      items = UCPCAT_L | UCPCAT_N;
7769
      break;
7770
7771
      case PT_SCX:
7772
      unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION;
7773
      if (cc[-1] == XCL_NOTPROP)
7774
        {
7775
        unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
7776
        break;
7777
        }
7778
      compares++;
7779
      /* Fall through */
7780
7781
      case PT_SC:
7782
      unicode_status |= XCLASS_HAS_SCRIPT;
7783
      break;
7784
7785
      case PT_SPACE:
7786
      case PT_PXSPACE:
7787
      case PT_PXGRAPH:
7788
      case PT_PXPRINT:
7789
      case PT_PXPUNCT:
7790
      unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
7791
      break;
7792
7793
      case PT_CLIST:
7794
      case PT_UCNC:
7795
      case PT_PXXDIGIT:
7796
      unicode_status |= XCLASS_SAVE_CHAR;
7797
      break;
7798
7799
      case PT_BOOL:
7800
      unicode_status |= XCLASS_HAS_BOOL;
7801
      break;
7802
7803
      case PT_BIDICL:
7804
      unicode_status |= XCLASS_HAS_BIDICL;
7805
      break;
7806
7807
      default:
7808
      SLJIT_UNREACHABLE();
7809
      break;
7810
      }
7811
7812
    if (items > 0)
7813
      {
7814
      if (cc[-1] == XCL_NOTPROP)
7815
        items ^= UCPCAT_ALL;
7816
      category_list |= items;
7817
      unicode_status |= XCLASS_HAS_TYPE;
7818
      compares--;
7819
      }
7820
7821
    cc += 2;
7822
    }
7823
#endif /* SUPPORT_UNICODE */
7824
  }
7825
7826
#ifdef SUPPORT_UNICODE
7827
if (category_list == UCPCAT_ALL)
7828
  {
7829
  /* All characters are accepted, same as dotall. */
7830
  compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7831
  if (list == backtracks)
7832
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7833
  return;
7834
  }
7835
7836
if (compares == 0 && category_list == 0)
7837
  {
7838
  /* No characters are accepted, same as (*F) or dotall. */
7839
  compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7840
  if (list != backtracks)
7841
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7842
  return;
7843
  }
7844
#else /* !SUPPORT_UNICODE */
7845
SLJIT_ASSERT(compares > 0);
7846
#endif /* SUPPORT_UNICODE */
7847
7848
/* We are not necessary in utf mode even in 8 bit mode. */
7849
cc = ccbegin;
7850
if ((cc[-1] & XCL_NOT) != 0)
7851
  read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7852
else
7853
  {
7854
#ifdef SUPPORT_UNICODE
7855
  read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
7856
#else /* !SUPPORT_UNICODE */
7857
  read_char(common, min, max, NULL, 0);
7858
#endif /* SUPPORT_UNICODE */
7859
  }
7860
7861
if ((cc[-1] & XCL_HASPROP) == 0)
7862
  {
7863
  if ((cc[-1] & XCL_MAP) != 0)
7864
    {
7865
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7866
    if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7867
      {
7868
      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7869
      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7870
      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7871
      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7872
      OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7873
      add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7874
      }
7875
7876
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7877
    JUMPHERE(jump);
7878
7879
    cc += 32 / sizeof(PCRE2_UCHAR);
7880
    }
7881
  else
7882
    {
7883
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7884
    add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7885
    }
7886
  }
7887
else if ((cc[-1] & XCL_MAP) != 0)
7888
  {
7889
  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7890
#ifdef SUPPORT_UNICODE
7891
  unicode_status |= XCLASS_CHAR_SAVED;
7892
#endif /* SUPPORT_UNICODE */
7893
  if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7894
    {
7895
#if PCRE2_CODE_UNIT_WIDTH == 8
7896
    jump = NULL;
7897
    if (common->utf)
7898
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7899
      jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7900
7901
    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7902
    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7903
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7904
    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7905
    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7906
    add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7907
7908
#if PCRE2_CODE_UNIT_WIDTH == 8
7909
    if (common->utf)
7910
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7911
      JUMPHERE(jump);
7912
    }
7913
7914
  OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7915
  cc += 32 / sizeof(PCRE2_UCHAR);
7916
  }
7917
7918
#ifdef SUPPORT_UNICODE
7919
if (unicode_status & XCLASS_NEEDS_UCD)
7920
  {
7921
  if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
7922
    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7923
7924
#if PCRE2_CODE_UNIT_WIDTH == 32
7925
  if (!common->utf)
7926
    {
7927
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7928
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7929
    JUMPHERE(jump);
7930
    }
7931
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7932
7933
  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7934
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7935
  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7936
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7937
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7938
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7939
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7940
  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7941
  OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7942
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7943
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7944
7945
  ccbegin = cc;
7946
7947
  if (category_list != 0)
7948
    compares++;
7949
7950
  if (unicode_status & XCLASS_HAS_BIDICL)
7951
    {
7952
    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7953
    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
7954
7955
    while (*cc != XCL_END)
7956
      {
7957
      if (*cc == XCL_SINGLE)
7958
        {
7959
        cc ++;
7960
        GETCHARINCTEST(c, cc);
7961
        }
7962
      else if (*cc == XCL_RANGE)
7963
        {
7964
        cc ++;
7965
        GETCHARINCTEST(c, cc);
7966
        GETCHARINCTEST(c, cc);
7967
        }
7968
      else
7969
        {
7970
        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7971
        cc++;
7972
        if (*cc == PT_BIDICL)
7973
          {
7974
          compares--;
7975
          invertcmp = (compares == 0 && list != backtracks);
7976
          if (cc[-1] == XCL_NOTPROP)
7977
            invertcmp ^= 0x1;
7978
          jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7979
          add_jump(compiler, compares > 0 ? list : backtracks, jump);
7980
          }
7981
        cc += 2;
7982
        }
7983
      }
7984
7985
    cc = ccbegin;
7986
    }
7987
7988
  if (unicode_status & XCLASS_HAS_BOOL)
7989
    {
7990
    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
7991
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
7992
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7993
7994
    while (*cc != XCL_END)
7995
      {
7996
      if (*cc == XCL_SINGLE)
7997
        {
7998
        cc ++;
7999
        GETCHARINCTEST(c, cc);
8000
        }
8001
      else if (*cc == XCL_RANGE)
8002
        {
8003
        cc ++;
8004
        GETCHARINCTEST(c, cc);
8005
        GETCHARINCTEST(c, cc);
8006
        }
8007
      else
8008
        {
8009
        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8010
        cc++;
8011
        if (*cc == PT_BOOL)
8012
          {
8013
          compares--;
8014
          invertcmp = (compares == 0 && list != backtracks);
8015
          if (cc[-1] == XCL_NOTPROP)
8016
            invertcmp ^= 0x1;
8017
8018
          OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
8019
          add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8020
          }
8021
        cc += 2;
8022
        }
8023
      }
8024
8025
    cc = ccbegin;
8026
    }
8027
8028
  if (unicode_status & XCLASS_HAS_SCRIPT)
8029
    {
8030
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8031
8032
    while (*cc != XCL_END)
8033
      {
8034
      if (*cc == XCL_SINGLE)
8035
        {
8036
        cc ++;
8037
        GETCHARINCTEST(c, cc);
8038
        }
8039
      else if (*cc == XCL_RANGE)
8040
        {
8041
        cc ++;
8042
        GETCHARINCTEST(c, cc);
8043
        GETCHARINCTEST(c, cc);
8044
        }
8045
      else
8046
        {
8047
        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8048
        cc++;
8049
        switch (*cc)
8050
          {
8051
          case PT_SCX:
8052
          if (cc[-1] == XCL_NOTPROP)
8053
            break;
8054
          /* Fall through */
8055
8056
          case PT_SC:
8057
          compares--;
8058
          invertcmp = (compares == 0 && list != backtracks);
8059
          if (cc[-1] == XCL_NOTPROP)
8060
            invertcmp ^= 0x1;
8061
8062
          add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
8063
          }
8064
        cc += 2;
8065
        }
8066
      }
8067
8068
    cc = ccbegin;
8069
    }
8070
8071
  if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
8072
    {
8073
    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
8074
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
8075
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
8076
8077
    if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
8078
      {
8079
      if (unicode_status & XCLASS_HAS_TYPE)
8080
        {
8081
        if (unicode_status & XCLASS_SAVE_CHAR)
8082
          {
8083
          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0);
8084
          unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0;
8085
          }
8086
        else
8087
          {
8088
          OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
8089
          unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
8090
          }
8091
        }
8092
      OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8093
      }
8094
8095
    while (*cc != XCL_END)
8096
      {
8097
      if (*cc == XCL_SINGLE)
8098
        {
8099
        cc ++;
8100
        GETCHARINCTEST(c, cc);
8101
        }
8102
      else if (*cc == XCL_RANGE)
8103
        {
8104
        cc ++;
8105
        GETCHARINCTEST(c, cc);
8106
        GETCHARINCTEST(c, cc);
8107
        }
8108
      else
8109
        {
8110
        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8111
        cc++;
8112
        if (*cc == PT_SCX)
8113
          {
8114
          compares--;
8115
          invertcmp = (compares == 0 && list != backtracks);
8116
8117
          jump = NULL;
8118
          if (cc[-1] == XCL_NOTPROP)
8119
            {
8120
            jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
8121
            if (invertcmp)
8122
              {
8123
              add_jump(compiler, backtracks, jump);
8124
              jump = NULL;
8125
              }
8126
            invertcmp ^= 0x1;
8127
            }
8128
8129
          OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
8130
          add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8131
8132
          if (jump != NULL)
8133
            JUMPHERE(jump);
8134
          }
8135
        cc += 2;
8136
        }
8137
      }
8138
8139
    if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0)
8140
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
8141
    else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
8142
      OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
8143
    cc = ccbegin;
8144
    }
8145
8146
  if (unicode_status & XCLASS_SAVE_CHAR)
8147
    OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
8148
8149
  if (unicode_status & XCLASS_HAS_TYPE)
8150
    {
8151
    if (unicode_status & XCLASS_SAVE_CHAR)
8152
      typereg = RETURN_ADDR;
8153
8154
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
8155
    OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0);
8156
8157
    if (category_list > 0)
8158
      {
8159
      compares--;
8160
      invertcmp = (compares == 0 && list != backtracks);
8161
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list);
8162
      add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8163
      }
8164
    }
8165
  }
8166
#endif /* SUPPORT_UNICODE */
8167
8168
/* Generating code. */
8169
charoffset = 0;
8170
numberofcmps = 0;
8171
8172
while (*cc != XCL_END)
8173
  {
8174
  compares--;
8175
  invertcmp = (compares == 0 && list != backtracks);
8176
  jump = NULL;
8177
8178
  if (*cc == XCL_SINGLE)
8179
    {
8180
    cc ++;
8181
    GETCHARINCTEST(c, cc);
8182
8183
    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8184
      {
8185
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8186
      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8187
      numberofcmps++;
8188
      }
8189
    else if (numberofcmps > 0)
8190
      {
8191
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8192
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
8193
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8194
      numberofcmps = 0;
8195
      }
8196
    else
8197
      {
8198
      jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8199
      numberofcmps = 0;
8200
      }
8201
    }
8202
  else if (*cc == XCL_RANGE)
8203
    {
8204
    cc ++;
8205
    GETCHARINCTEST(c, cc);
8206
    SET_CHAR_OFFSET(c);
8207
    GETCHARINCTEST(c, cc);
8208
8209
    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8210
      {
8211
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8212
      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8213
      numberofcmps++;
8214
      }
8215
    else if (numberofcmps > 0)
8216
      {
8217
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8218
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8219
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8220
      numberofcmps = 0;
8221
      }
8222
    else
8223
      {
8224
      jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8225
      numberofcmps = 0;
8226
      }
8227
    }
8228
#ifdef SUPPORT_UNICODE
8229
  else
8230
    {
8231
    SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8232
    if (*cc == XCL_NOTPROP)
8233
      invertcmp ^= 0x1;
8234
    cc++;
8235
    switch(*cc)
8236
      {
8237
      case PT_ANY:
8238
      case PT_LAMP:
8239
      case PT_GC:
8240
      case PT_PC:
8241
      case PT_SC:
8242
      case PT_SCX:
8243
      case PT_BOOL:
8244
      case PT_BIDICL:
8245
      case PT_WORD:
8246
      case PT_ALNUM:
8247
      compares++;
8248
      /* Already handled. */
8249
      break;
8250
8251
      case PT_SPACE:
8252
      case PT_PXSPACE:
8253
      SET_CHAR_OFFSET(9);
8254
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
8255
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8256
8257
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
8258
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8259
8260
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
8261
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8262
8263
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8264
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8265
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8266
      break;
8267
8268
      case PT_CLIST:
8269
      other_cases = PRIV(ucd_caseless_sets) + cc[1];
8270
8271
      /* At least three characters are required.
8272
         Otherwise this case would be handled by the normal code path. */
8273
      SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8274
      SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8275
8276
      /* Optimizing character pairs, if their difference is power of 2. */
8277
      if (is_powerof2(other_cases[1] ^ other_cases[0]))
8278
        {
8279
        if (charoffset == 0)
8280
          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8281
        else
8282
          {
8283
          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8284
          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8285
          }
8286
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
8287
        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8288
        other_cases += 2;
8289
        }
8290
      else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8291
        {
8292
        if (charoffset == 0)
8293
          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8294
        else
8295
          {
8296
          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8297
          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8298
          }
8299
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
8300
        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8301
8302
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
8303
        OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8304
8305
        other_cases += 3;
8306
        }
8307
      else
8308
        {
8309
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8310
        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8311
        }
8312
8313
      while (*other_cases != NOTACHAR)
8314
        {
8315
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8316
        OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8317
        }
8318
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8319
      break;
8320
8321
      case PT_UCNC:
8322
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
8323
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8324
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
8325
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8326
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
8327
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8328
8329
      SET_CHAR_OFFSET(0xa0);
8330
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
8331
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8332
      SET_CHAR_OFFSET(0);
8333
      OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
8334
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8335
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8336
      break;
8337
8338
      case PT_PXGRAPH:
8339
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8340
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8341
8342
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8343
      jump = JUMP(SLJIT_ZERO);
8344
8345
      c = charoffset;
8346
      /* In case of ucp_Cf, we overwrite the result. */
8347
      SET_CHAR_OFFSET(0x2066);
8348
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8349
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8350
8351
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8352
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8353
8354
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8355
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8356
8357
      /* Restore charoffset. */
8358
      SET_CHAR_OFFSET(c);
8359
8360
      JUMPHERE(jump);
8361
      jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8362
      break;
8363
8364
      case PT_PXPRINT:
8365
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp));
8366
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8367
8368
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8369
      jump = JUMP(SLJIT_ZERO);
8370
8371
      c = charoffset;
8372
      /* In case of ucp_Cf, we overwrite the result. */
8373
      SET_CHAR_OFFSET(0x2066);
8374
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8375
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8376
8377
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8378
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8379
8380
      /* Restore charoffset. */
8381
      SET_CHAR_OFFSET(c);
8382
8383
      JUMPHERE(jump);
8384
      jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8385
      break;
8386
8387
      case PT_PXPUNCT:
8388
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So));
8389
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8390
8391
      SET_CHAR_OFFSET(0);
8392
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
8393
      OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8394
8395
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps));
8396
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8397
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8398
      break;
8399
8400
      case PT_PXXDIGIT:
8401
      SET_CHAR_OFFSET(CHAR_A);
8402
      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20);
8403
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A);
8404
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8405
8406
      SET_CHAR_OFFSET(CHAR_0);
8407
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0);
8408
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8409
8410
      SET_CHAR_OFFSET(0xff10);
8411
      jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10);
8412
8413
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10);
8414
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8415
8416
      SET_CHAR_OFFSET(0xff21);
8417
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21);
8418
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8419
8420
      SET_CHAR_OFFSET(0xff41);
8421
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41);
8422
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8423
8424
      SET_CHAR_OFFSET(0xff10);
8425
8426
      JUMPHERE(jump);
8427
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
8428
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8429
      break;
8430
8431
      default:
8432
      SLJIT_UNREACHABLE();
8433
      break;
8434
      }
8435
    cc += 2;
8436
    }
8437
#endif /* SUPPORT_UNICODE */
8438
8439
  if (jump != NULL)
8440
    add_jump(compiler, compares > 0 ? list : backtracks, jump);
8441
  }
8442
8443
SLJIT_ASSERT(compares == 0);
8444
if (found != NULL)
8445
  set_jumps(found, LABEL());
8446
}
8447
8448
#undef SET_TYPE_OFFSET
8449
#undef SET_CHAR_OFFSET
8450
8451
#endif
8452
8453
static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8454
{
8455
DEFINE_COMPILER;
8456
struct sljit_jump *jump[4];
8457
8458
switch(type)
8459
  {
8460
  case OP_SOD:
8461
  if (HAS_VIRTUAL_REGISTERS)
8462
    {
8463
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8464
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8465
    }
8466
  else
8467
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8468
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8469
  return cc;
8470
8471
  case OP_SOM:
8472
  if (HAS_VIRTUAL_REGISTERS)
8473
    {
8474
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8475
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8476
    }
8477
  else
8478
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8479
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8480
  return cc;
8481
8482
  case OP_NOT_WORD_BOUNDARY:
8483
  case OP_WORD_BOUNDARY:
8484
  case OP_NOT_UCP_WORD_BOUNDARY:
8485
  case OP_UCP_WORD_BOUNDARY:
8486
  add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));
8487
#ifdef SUPPORT_UNICODE
8488
  if (common->invalid_utf)
8489
    {
8490
    add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8491
    return cc;
8492
    }
8493
#endif /* SUPPORT_UNICODE */
8494
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
8495
  add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8496
  return cc;
8497
8498
  case OP_EODN:
8499
  /* Requires rather complex checks. */
8500
  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8501
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8502
    {
8503
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8504
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8505
    if (common->mode == PCRE2_JIT_COMPLETE)
8506
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8507
    else
8508
      {
8509
      jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8510
      OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
8511
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8512
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8513
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8514
      add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8515
      check_partial(common, TRUE);
8516
      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8517
      JUMPHERE(jump[1]);
8518
      }
8519
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8520
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8521
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8522
    }
8523
  else if (common->nltype == NLTYPE_FIXED)
8524
    {
8525
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8526
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8527
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8528
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8529
    }
8530
  else
8531
    {
8532
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8533
    jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8534
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8535
    OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
8536
    jump[2] = JUMP(SLJIT_GREATER);
8537
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8538
    /* Equal. */
8539
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8540
    jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8541
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8542
8543
    JUMPHERE(jump[1]);
8544
    if (common->nltype == NLTYPE_ANYCRLF)
8545
      {
8546
      OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8547
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8548
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8549
      }
8550
    else
8551
      {
8552
      OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8553
      read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8554
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8555
      add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8556
      sljit_set_current_flags(compiler, SLJIT_SET_Z);
8557
      add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8558
      OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8559
      }
8560
    JUMPHERE(jump[2]);
8561
    JUMPHERE(jump[3]);
8562
    }
8563
  JUMPHERE(jump[0]);
8564
  if (common->mode != PCRE2_JIT_COMPLETE)
8565
    check_partial(common, TRUE);
8566
  return cc;
8567
8568
  case OP_EOD:
8569
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8570
  if (common->mode != PCRE2_JIT_COMPLETE)
8571
    check_partial(common, TRUE);
8572
  return cc;
8573
8574
  case OP_DOLL:
8575
  if (HAS_VIRTUAL_REGISTERS)
8576
    {
8577
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8578
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8579
    }
8580
  else
8581
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8582
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8583
8584
  if (!common->endonly)
8585
    compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8586
  else
8587
    {
8588
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8589
    check_partial(common, FALSE);
8590
    }
8591
  return cc;
8592
8593
  case OP_DOLLM:
8594
  jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8595
  if (HAS_VIRTUAL_REGISTERS)
8596
    {
8597
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8598
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8599
    }
8600
  else
8601
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8602
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8603
  check_partial(common, FALSE);
8604
  jump[0] = JUMP(SLJIT_JUMP);
8605
  JUMPHERE(jump[1]);
8606
8607
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8608
    {
8609
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8610
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8611
    if (common->mode == PCRE2_JIT_COMPLETE)
8612
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8613
    else
8614
      {
8615
      jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8616
      /* STR_PTR = STR_END - IN_UCHARS(1) */
8617
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8618
      check_partial(common, TRUE);
8619
      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8620
      JUMPHERE(jump[1]);
8621
      }
8622
8623
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8624
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8625
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8626
    }
8627
  else
8628
    {
8629
    peek_char(common, common->nlmax, TMP3, 0, NULL);
8630
    check_newlinechar(common, common->nltype, backtracks, FALSE);
8631
    }
8632
  JUMPHERE(jump[0]);
8633
  return cc;
8634
8635
  case OP_CIRC:
8636
  if (HAS_VIRTUAL_REGISTERS)
8637
    {
8638
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8639
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8640
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8641
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8642
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8643
    }
8644
  else
8645
    {
8646
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8647
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8648
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8649
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8650
    }
8651
  return cc;
8652
8653
  case OP_CIRCM:
8654
  /* TMP2 might be used by peek_char_back. */
8655
  if (HAS_VIRTUAL_REGISTERS)
8656
    {
8657
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8658
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8659
    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8660
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8661
    }
8662
  else
8663
    {
8664
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8665
    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8666
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8667
    }
8668
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8669
  jump[0] = JUMP(SLJIT_JUMP);
8670
  JUMPHERE(jump[1]);
8671
8672
  if (!common->alt_circumflex)
8673
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8674
8675
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8676
    {
8677
    OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8678
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8679
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8680
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8681
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8682
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8683
    }
8684
  else
8685
    {
8686
    peek_char_back(common, common->nlmax, backtracks);
8687
    check_newlinechar(common, common->nltype, backtracks, FALSE);
8688
    }
8689
  JUMPHERE(jump[0]);
8690
  return cc;
8691
  }
8692
SLJIT_UNREACHABLE();
8693
return cc;
8694
}
8695
8696
#ifdef SUPPORT_UNICODE
8697
8698
#if PCRE2_CODE_UNIT_WIDTH != 32
8699
8700
/* The code in this function copies the logic of the interpreter function that
8701
is defined in the pcre2_extuni.c source. If that code is updated, this
8702
function, and those below it, must be kept in step (note by PH, June 2024). */
8703
8704
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8705
{
8706
PCRE2_SPTR start_subject = args->begin;
8707
PCRE2_SPTR end_subject = args->end;
8708
int lgb, rgb, ricount;
8709
PCRE2_SPTR prevcc, endcc, bptr;
8710
BOOL first = TRUE;
8711
BOOL was_ep_ZWJ = FALSE;
8712
uint32_t c;
8713
8714
prevcc = cc;
8715
endcc = NULL;
8716
do
8717
  {
8718
  GETCHARINC(c, cc);
8719
  rgb = UCD_GRAPHBREAK(c);
8720
8721
  if (first)
8722
    {
8723
    lgb = rgb;
8724
    endcc = cc;
8725
    first = FALSE;
8726
    continue;
8727
    }
8728
8729
  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8730
    break;
8731
8732
  /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8733
  preceded by Extended Pictographic. */
8734
8735
  if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8736
    break;
8737
8738
  /* Not breaking between Regional Indicators is allowed only if there
8739
  are an even number of preceding RIs. */
8740
8741
  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8742
    {
8743
    ricount = 0;
8744
    bptr = prevcc;
8745
8746
    /* bptr is pointing to the left-hand character */
8747
    while (bptr > start_subject)
8748
      {
8749
      bptr--;
8750
      BACKCHAR(bptr);
8751
      GETCHAR(c, bptr);
8752
8753
      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8754
        break;
8755
8756
      ricount++;
8757
      }
8758
8759
    if ((ricount & 1) != 0) break;  /* Grapheme break required */
8760
    }
8761
8762
  /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8763
  between; see next statement). */
8764
8765
  was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8766
8767
  /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8768
  any number of them before a following ZWJ. */
8769
8770
  if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8771
    lgb = rgb;
8772
8773
  prevcc = endcc;
8774
  endcc = cc;
8775
  }
8776
while (cc < end_subject);
8777
8778
return endcc;
8779
}
8780
8781
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8782
8783
/* The code in this function copies the logic of the interpreter function that
8784
is defined in the pcre2_extuni.c source. If that code is updated, this
8785
function, and the one below it, must be kept in step (note by PH, June 2024). */
8786
8787
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8788
{
8789
PCRE2_SPTR start_subject = args->begin;
8790
PCRE2_SPTR end_subject = args->end;
8791
int lgb, rgb, ricount;
8792
PCRE2_SPTR prevcc, endcc, bptr;
8793
BOOL first = TRUE;
8794
BOOL was_ep_ZWJ = FALSE;
8795
uint32_t c;
8796
8797
prevcc = cc;
8798
endcc = NULL;
8799
do
8800
  {
8801
  GETCHARINC_INVALID(c, cc, end_subject, break);
8802
  rgb = UCD_GRAPHBREAK(c);
8803
8804
  if (first)
8805
    {
8806
    lgb = rgb;
8807
    endcc = cc;
8808
    first = FALSE;
8809
    continue;
8810
    }
8811
8812
  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8813
    break;
8814
8815
  /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8816
  preceded by Extended Pictographic. */
8817
8818
  if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8819
    break;
8820
8821
  /* Not breaking between Regional Indicators is allowed only if there
8822
  are an even number of preceding RIs. */
8823
8824
  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8825
    {
8826
    ricount = 0;
8827
    bptr = prevcc;
8828
8829
    /* bptr is pointing to the left-hand character */
8830
    while (bptr > start_subject)
8831
      {
8832
      GETCHARBACK_INVALID(c, bptr, start_subject, break);
8833
8834
      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8835
        break;
8836
8837
      ricount++;
8838
      }
8839
8840
    if ((ricount & 1) != 0)
8841
      break;  /* Grapheme break required */
8842
    }
8843
8844
  /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8845
  between; see next statement). */
8846
8847
  was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8848
8849
  /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8850
  any number of them before a following ZWJ. */
8851
8852
  if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8853
    lgb = rgb;
8854
8855
  prevcc = endcc;
8856
  endcc = cc;
8857
  }
8858
while (cc < end_subject);
8859
8860
return endcc;
8861
}
8862
8863
/* The code in this function copies the logic of the interpreter function that
8864
is defined in the pcre2_extuni.c source. If that code is updated, this
8865
function must be kept in step (note by PH, June 2024). */
8866
8867
static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8868
{
8869
PCRE2_SPTR start_subject = args->begin;
8870
PCRE2_SPTR end_subject = args->end;
8871
int lgb, rgb, ricount;
8872
PCRE2_SPTR bptr;
8873
uint32_t c;
8874
BOOL was_ep_ZWJ = FALSE;
8875
8876
/* Patch by PH */
8877
/* GETCHARINC(c, cc); */
8878
c = *cc++;
8879
8880
#if PCRE2_CODE_UNIT_WIDTH == 32
8881
if (c >= 0x110000)
8882
  return cc;
8883
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8884
lgb = UCD_GRAPHBREAK(c);
8885
8886
while (cc < end_subject)
8887
  {
8888
  c = *cc;
8889
#if PCRE2_CODE_UNIT_WIDTH == 32
8890
  if (c >= 0x110000)
8891
    break;
8892
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8893
  rgb = UCD_GRAPHBREAK(c);
8894
8895
  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8896
    break;
8897
8898
  /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8899
  preceded by Extended Pictographic. */
8900
8901
  if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8902
    break;
8903
8904
  /* Not breaking between Regional Indicators is allowed only if there
8905
  are an even number of preceding RIs. */
8906
8907
  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8908
    {
8909
    ricount = 0;
8910
    bptr = cc - 1;
8911
8912
    /* bptr is pointing to the left-hand character */
8913
    while (bptr > start_subject)
8914
      {
8915
      bptr--;
8916
      c = *bptr;
8917
#if PCRE2_CODE_UNIT_WIDTH == 32
8918
      if (c >= 0x110000)
8919
        break;
8920
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8921
8922
      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
8923
8924
      ricount++;
8925
      }
8926
8927
    if ((ricount & 1) != 0)
8928
      break;  /* Grapheme break required */
8929
    }
8930
8931
  /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8932
  between; see next statement). */
8933
8934
  was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8935
8936
  /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8937
  any number of them before a following ZWJ. */
8938
8939
  if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8940
    lgb = rgb;
8941
8942
  cc++;
8943
  }
8944
8945
return cc;
8946
}
8947
8948
#endif /* SUPPORT_UNICODE */
8949
8950
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8951
{
8952
DEFINE_COMPILER;
8953
int length;
8954
unsigned int c, oc, bit;
8955
compare_context context;
8956
struct sljit_jump *jump[3];
8957
jump_list *end_list;
8958
#ifdef SUPPORT_UNICODE
8959
PCRE2_UCHAR propdata[5];
8960
#endif /* SUPPORT_UNICODE */
8961
8962
switch(type)
8963
  {
8964
  case OP_NOT_DIGIT:
8965
  case OP_DIGIT:
8966
  /* Digits are usually 0-9, so it is worth to optimize them. */
8967
  if (check_str_ptr)
8968
    detect_partial_match(common, backtracks);
8969
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8970
  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8971
    read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8972
  else
8973
#endif
8974
    read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8975
    /* Flip the starting bit in the negative case. */
8976
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
8977
  add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8978
  return cc;
8979
8980
  case OP_NOT_WHITESPACE:
8981
  case OP_WHITESPACE:
8982
  if (check_str_ptr)
8983
    detect_partial_match(common, backtracks);
8984
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8985
  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8986
    read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8987
  else
8988
#endif
8989
    read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8990
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
8991
  add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8992
  return cc;
8993
8994
  case OP_NOT_WORDCHAR:
8995
  case OP_WORDCHAR:
8996
  if (check_str_ptr)
8997
    detect_partial_match(common, backtracks);
8998
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8999
  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
9000
    read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
9001
  else
9002
#endif
9003
    read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
9004
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
9005
  add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
9006
  return cc;
9007
9008
  case OP_ANY:
9009
  if (check_str_ptr)
9010
    detect_partial_match(common, backtracks);
9011
  read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
9012
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
9013
    {
9014
    jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
9015
    end_list = NULL;
9016
    if (common->mode != PCRE2_JIT_PARTIAL_HARD)
9017
      add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9018
    else
9019
      check_str_end(common, &end_list);
9020
9021
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9022
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
9023
    set_jumps(end_list, LABEL());
9024
    JUMPHERE(jump[0]);
9025
    }
9026
  else
9027
    check_newlinechar(common, common->nltype, backtracks, TRUE);
9028
  return cc;
9029
9030
  case OP_ALLANY:
9031
  if (check_str_ptr)
9032
    detect_partial_match(common, backtracks);
9033
#ifdef SUPPORT_UNICODE
9034
  if (common->utf && common->invalid_utf)
9035
    {
9036
    read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
9037
    return cc;
9038
    }
9039
#endif /* SUPPORT_UNICODE */
9040
9041
  skip_valid_char(common);
9042
  return cc;
9043
9044
  case OP_ANYBYTE:
9045
  if (check_str_ptr)
9046
    detect_partial_match(common, backtracks);
9047
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9048
  return cc;
9049
9050
#ifdef SUPPORT_UNICODE
9051
  case OP_NOTPROP:
9052
  case OP_PROP:
9053
  propdata[0] = XCL_HASPROP;
9054
  propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
9055
  propdata[2] = cc[0];
9056
  propdata[3] = cc[1];
9057
  propdata[4] = XCL_END;
9058
  if (check_str_ptr)
9059
    detect_partial_match(common, backtracks);
9060
  compile_xclass_matchingpath(common, propdata, backtracks);
9061
  return cc + 2;
9062
#endif
9063
9064
  case OP_ANYNL:
9065
  if (check_str_ptr)
9066
    detect_partial_match(common, backtracks);
9067
  read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
9068
  jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
9069
  /* We don't need to handle soft partial matching case. */
9070
  end_list = NULL;
9071
  if (common->mode != PCRE2_JIT_PARTIAL_HARD)
9072
    add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9073
  else
9074
    check_str_end(common, &end_list);
9075
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9076
  jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
9077
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9078
  jump[2] = JUMP(SLJIT_JUMP);
9079
  JUMPHERE(jump[0]);
9080
  check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
9081
  set_jumps(end_list, LABEL());
9082
  JUMPHERE(jump[1]);
9083
  JUMPHERE(jump[2]);
9084
  return cc;
9085
9086
  case OP_NOT_HSPACE:
9087
  case OP_HSPACE:
9088
  if (check_str_ptr)
9089
    detect_partial_match(common, backtracks);
9090
9091
  if (type == OP_NOT_HSPACE)
9092
    read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
9093
  else
9094
    read_char(common, 0x9, 0x3000, NULL, 0);
9095
9096
  add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
9097
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
9098
  add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9099
  return cc;
9100
9101
  case OP_NOT_VSPACE:
9102
  case OP_VSPACE:
9103
  if (check_str_ptr)
9104
    detect_partial_match(common, backtracks);
9105
9106
  if (type == OP_NOT_VSPACE)
9107
    read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
9108
  else
9109
    read_char(common, 0xa, 0x2029, NULL, 0);
9110
9111
  add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
9112
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
9113
  add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9114
  return cc;
9115
9116
#ifdef SUPPORT_UNICODE
9117
  case OP_EXTUNI:
9118
  if (check_str_ptr)
9119
    detect_partial_match(common, backtracks);
9120
9121
  SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9122
  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
9123
9124
#if PCRE2_CODE_UNIT_WIDTH != 32
9125
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9126
    common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9127
  if (common->invalid_utf)
9128
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9129
#else
9130
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9131
    common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9132
  if (common->invalid_utf)
9133
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9134
#endif
9135
9136
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
9137
9138
  if (common->mode == PCRE2_JIT_PARTIAL_HARD)
9139
    {
9140
    jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
9141
    /* Since we successfully read a char above, partial matching must occure. */
9142
    check_partial(common, TRUE);
9143
    JUMPHERE(jump[0]);
9144
    }
9145
  return cc;
9146
#endif
9147
9148
  case OP_CHAR:
9149
  case OP_CHARI:
9150
  length = 1;
9151
#ifdef SUPPORT_UNICODE
9152
  if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
9153
#endif
9154
9155
  if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
9156
    detect_partial_match(common, backtracks);
9157
9158
  if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
9159
    {
9160
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
9161
    if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
9162
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9163
9164
    context.length = IN_UCHARS(length);
9165
    context.sourcereg = -1;
9166
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9167
    context.ucharptr = 0;
9168
#endif
9169
    return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
9170
    }
9171
9172
#ifdef SUPPORT_UNICODE
9173
  if (common->utf)
9174
    {
9175
    GETCHAR(c, cc);
9176
    }
9177
  else
9178
#endif
9179
    c = *cc;
9180
9181
  SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
9182
9183
  if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
9184
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9185
9186
  oc = char_othercase(common, c);
9187
  read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
9188
9189
  SLJIT_ASSERT(!is_powerof2(c ^ oc));
9190
9191
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
9192
    {
9193
    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
9194
    SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1);
9195
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9196
    }
9197
  else
9198
    {
9199
    jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
9200
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9201
    JUMPHERE(jump[0]);
9202
    }
9203
  return cc + length;
9204
9205
  case OP_NOT:
9206
  case OP_NOTI:
9207
  if (check_str_ptr)
9208
    detect_partial_match(common, backtracks);
9209
9210
  length = 1;
9211
#ifdef SUPPORT_UNICODE
9212
  if (common->utf)
9213
    {
9214
#if PCRE2_CODE_UNIT_WIDTH == 8
9215
    c = *cc;
9216
    if (c < 128 && !common->invalid_utf)
9217
      {
9218
      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9219
      if (type == OP_NOT || !char_has_othercase(common, cc))
9220
        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9221
      else
9222
        {
9223
        /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
9224
        OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
9225
        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
9226
        }
9227
      /* Skip the variable-length character. */
9228
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9229
      jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
9230
      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
9231
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
9232
      JUMPHERE(jump[0]);
9233
      return cc + 1;
9234
      }
9235
    else
9236
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
9237
      {
9238
      GETCHARLEN(c, cc, length);
9239
      }
9240
    }
9241
  else
9242
#endif /* SUPPORT_UNICODE */
9243
    c = *cc;
9244
9245
  if (type == OP_NOT || !char_has_othercase(common, cc))
9246
    {
9247
    read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
9248
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9249
    }
9250
  else
9251
    {
9252
    oc = char_othercase(common, c);
9253
    read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
9254
    bit = c ^ oc;
9255
    if (is_powerof2(bit))
9256
      {
9257
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
9258
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
9259
      }
9260
    else
9261
      {
9262
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9263
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9264
      }
9265
    }
9266
  return cc + length;
9267
9268
  case OP_CLASS:
9269
  case OP_NCLASS:
9270
  if (check_str_ptr)
9271
    detect_partial_match(common, backtracks);
9272
9273
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9274
  bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
9275
  if (type == OP_NCLASS)
9276
    read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
9277
  else
9278
    read_char(common, 0, bit, NULL, 0);
9279
#else
9280
  if (type == OP_NCLASS)
9281
    read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
9282
  else
9283
    read_char(common, 0, 255, NULL, 0);
9284
#endif
9285
9286
  if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
9287
    return cc + 32 / sizeof(PCRE2_UCHAR);
9288
9289
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9290
  jump[0] = NULL;
9291
  if (common->utf)
9292
    {
9293
    jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
9294
    if (type == OP_CLASS)
9295
      {
9296
      add_jump(compiler, backtracks, jump[0]);
9297
      jump[0] = NULL;
9298
      }
9299
    }
9300
#elif PCRE2_CODE_UNIT_WIDTH != 8
9301
  jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
9302
  if (type == OP_CLASS)
9303
    {
9304
    add_jump(compiler, backtracks, jump[0]);
9305
    jump[0] = NULL;
9306
    }
9307
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
9308
9309
  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
9310
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
9311
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
9312
  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
9313
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
9314
  add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
9315
9316
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
9317
  if (jump[0] != NULL)
9318
    JUMPHERE(jump[0]);
9319
#endif
9320
  return cc + 32 / sizeof(PCRE2_UCHAR);
9321
9322
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9323
  case OP_XCLASS:
9324
  if (check_str_ptr)
9325
    detect_partial_match(common, backtracks);
9326
  compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
9327
  return cc + GET(cc, 0) - 1;
9328
#endif
9329
  }
9330
SLJIT_UNREACHABLE();
9331
return cc;
9332
}
9333
9334
static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9335
{
9336
/* This function consumes at least one input character. */
9337
/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9338
DEFINE_COMPILER;
9339
PCRE2_SPTR ccbegin = cc;
9340
compare_context context;
9341
int size;
9342
9343
context.length = 0;
9344
do
9345
  {
9346
  if (cc >= ccend)
9347
    break;
9348
9349
  if (*cc == OP_CHAR)
9350
    {
9351
    size = 1;
9352
#ifdef SUPPORT_UNICODE
9353
    if (common->utf && HAS_EXTRALEN(cc[1]))
9354
      size += GET_EXTRALEN(cc[1]);
9355
#endif
9356
    }
9357
  else if (*cc == OP_CHARI)
9358
    {
9359
    size = 1;
9360
#ifdef SUPPORT_UNICODE
9361
    if (common->utf)
9362
      {
9363
      if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9364
        size = 0;
9365
      else if (HAS_EXTRALEN(cc[1]))
9366
        size += GET_EXTRALEN(cc[1]);
9367
      }
9368
    else
9369
#endif
9370
    if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9371
      size = 0;
9372
    }
9373
  else
9374
    size = 0;
9375
9376
  cc += 1 + size;
9377
  context.length += IN_UCHARS(size);
9378
  }
9379
while (size > 0 && context.length <= 128);
9380
9381
cc = ccbegin;
9382
if (context.length > 0)
9383
  {
9384
  /* We have a fixed-length byte sequence. */
9385
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9386
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9387
9388
  context.sourcereg = -1;
9389
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9390
  context.ucharptr = 0;
9391
#endif
9392
  do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9393
  return cc;
9394
  }
9395
9396
/* A non-fixed length character will be checked if length == 0. */
9397
return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9398
}
9399
9400
/* Forward definitions. */
9401
static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9402
static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9403
9404
#define PUSH_BACKTRACK(size, ccstart, error) \
9405
  do \
9406
    { \
9407
    backtrack = sljit_alloc_memory(compiler, (size)); \
9408
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9409
      return error; \
9410
    memset(backtrack, 0, size); \
9411
    backtrack->prev = parent->top; \
9412
    backtrack->cc = (ccstart); \
9413
    parent->top = backtrack; \
9414
    } \
9415
  while (0)
9416
9417
#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9418
  do \
9419
    { \
9420
    backtrack = sljit_alloc_memory(compiler, (size)); \
9421
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9422
      return; \
9423
    memset(backtrack, 0, size); \
9424
    backtrack->prev = parent->top; \
9425
    backtrack->cc = (ccstart); \
9426
    parent->top = backtrack; \
9427
    } \
9428
  while (0)
9429
9430
#define BACKTRACK_AS(type) ((type *)backtrack)
9431
9432
static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9433
{
9434
/* The OVECTOR offset goes to TMP2. */
9435
DEFINE_COMPILER;
9436
int count = GET2(cc, 1 + IMM2_SIZE);
9437
PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9438
unsigned int offset;
9439
jump_list *found = NULL;
9440
9441
SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9442
9443
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9444
9445
count--;
9446
while (count-- > 0)
9447
  {
9448
  offset = GET2(slot, 0) << 1;
9449
  GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9450
  add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9451
  slot += common->name_entry_size;
9452
  }
9453
9454
offset = GET2(slot, 0) << 1;
9455
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9456
if (backtracks != NULL && !common->unset_backref)
9457
  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9458
9459
set_jumps(found, LABEL());
9460
}
9461
9462
static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9463
{
9464
DEFINE_COMPILER;
9465
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9466
int offset = 0;
9467
struct sljit_jump *jump = NULL;
9468
struct sljit_jump *partial;
9469
struct sljit_jump *nopartial;
9470
#if defined SUPPORT_UNICODE
9471
struct sljit_label *loop;
9472
struct sljit_label *caseless_loop;
9473
jump_list *no_match = NULL;
9474
int source_reg = COUNT_MATCH;
9475
int source_end_reg = ARGUMENTS;
9476
int char1_reg = STACK_LIMIT;
9477
#endif /* SUPPORT_UNICODE */
9478
9479
if (ref)
9480
  {
9481
  offset = GET2(cc, 1) << 1;
9482
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9483
  /* OVECTOR(1) contains the "string begin - 1" constant. */
9484
  if (withchecks && !common->unset_backref)
9485
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9486
  }
9487
else
9488
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9489
9490
#if defined SUPPORT_UNICODE
9491
if (common->utf && *cc == OP_REFI)
9492
  {
9493
  SLJIT_ASSERT(common->iref_ptr != 0);
9494
9495
  if (ref)
9496
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9497
  else
9498
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9499
9500
  if (withchecks && emptyfail)
9501
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9502
9503
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9504
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9505
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9506
9507
  OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9508
  OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9509
9510
  loop = LABEL();
9511
  jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9512
  partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9513
9514
  /* Read original character. It must be a valid UTF character. */
9515
  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9516
  OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9517
9518
  read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9519
9520
  OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9521
  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9522
  OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9523
9524
  /* Read second character. */
9525
  read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9526
9527
  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9528
9529
  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9530
9531
  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9532
9533
  OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9534
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9535
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9536
9537
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9538
9539
  OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9540
  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9541
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9542
  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9543
9544
  add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9545
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9546
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9547
9548
  caseless_loop = LABEL();
9549
  OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9550
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9551
  OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
9552
  JUMPTO(SLJIT_EQUAL, loop);
9553
  JUMPTO(SLJIT_LESS, caseless_loop);
9554
9555
  set_jumps(no_match, LABEL());
9556
  if (common->mode == PCRE2_JIT_COMPLETE)
9557
    JUMPHERE(partial);
9558
9559
  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9560
  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9561
  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9562
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9563
9564
  if (common->mode != PCRE2_JIT_COMPLETE)
9565
    {
9566
    JUMPHERE(partial);
9567
    OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9568
    OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9569
    OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9570
9571
    check_partial(common, FALSE);
9572
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9573
    }
9574
9575
  JUMPHERE(jump);
9576
  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9577
  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9578
  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9579
  return;
9580
  }
9581
else
9582
#endif /* SUPPORT_UNICODE */
9583
  {
9584
  if (ref)
9585
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9586
  else
9587
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9588
9589
  if (withchecks)
9590
    jump = JUMP(SLJIT_ZERO);
9591
9592
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9593
  partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9594
  if (common->mode == PCRE2_JIT_COMPLETE)
9595
    add_jump(compiler, backtracks, partial);
9596
9597
  add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9598
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9599
9600
  if (common->mode != PCRE2_JIT_COMPLETE)
9601
    {
9602
    nopartial = JUMP(SLJIT_JUMP);
9603
    JUMPHERE(partial);
9604
    /* TMP2 -= STR_END - STR_PTR */
9605
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9606
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9607
    partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9608
    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9609
    add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9610
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9611
    JUMPHERE(partial);
9612
    check_partial(common, FALSE);
9613
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9614
    JUMPHERE(nopartial);
9615
    }
9616
  }
9617
9618
if (jump != NULL)
9619
  {
9620
  if (emptyfail)
9621
    add_jump(compiler, backtracks, jump);
9622
  else
9623
    JUMPHERE(jump);
9624
  }
9625
}
9626
9627
static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9628
{
9629
DEFINE_COMPILER;
9630
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9631
backtrack_common *backtrack;
9632
PCRE2_UCHAR type;
9633
int offset = 0;
9634
struct sljit_label *label;
9635
struct sljit_jump *zerolength;
9636
struct sljit_jump *jump = NULL;
9637
PCRE2_SPTR ccbegin = cc;
9638
int min = 0, max = 0;
9639
BOOL minimize;
9640
9641
PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9642
9643
if (ref)
9644
  offset = GET2(cc, 1) << 1;
9645
else
9646
  cc += IMM2_SIZE;
9647
type = cc[1 + IMM2_SIZE];
9648
9649
SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9650
minimize = (type & 0x1) != 0;
9651
switch(type)
9652
  {
9653
  case OP_CRSTAR:
9654
  case OP_CRMINSTAR:
9655
  min = 0;
9656
  max = 0;
9657
  cc += 1 + IMM2_SIZE + 1;
9658
  break;
9659
  case OP_CRPLUS:
9660
  case OP_CRMINPLUS:
9661
  min = 1;
9662
  max = 0;
9663
  cc += 1 + IMM2_SIZE + 1;
9664
  break;
9665
  case OP_CRQUERY:
9666
  case OP_CRMINQUERY:
9667
  min = 0;
9668
  max = 1;
9669
  cc += 1 + IMM2_SIZE + 1;
9670
  break;
9671
  case OP_CRRANGE:
9672
  case OP_CRMINRANGE:
9673
  min = GET2(cc, 1 + IMM2_SIZE + 1);
9674
  max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9675
  cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9676
  break;
9677
  default:
9678
  SLJIT_UNREACHABLE();
9679
  break;
9680
  }
9681
9682
if (!minimize)
9683
  {
9684
  if (min == 0)
9685
    {
9686
    allocate_stack(common, 2);
9687
    if (ref)
9688
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9689
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9690
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9691
    /* Temporary release of STR_PTR. */
9692
    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9693
    /* Handles both invalid and empty cases. Since the minimum repeat,
9694
    is zero the invalid case is basically the same as an empty case. */
9695
    if (ref)
9696
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9697
    else
9698
      {
9699
      compile_dnref_search(common, ccbegin, NULL);
9700
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9701
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9702
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9703
      }
9704
    /* Restore if not zero length. */
9705
    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9706
    }
9707
  else
9708
    {
9709
    allocate_stack(common, 1);
9710
    if (ref)
9711
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9712
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9713
9714
    if (ref)
9715
      {
9716
      if (!common->unset_backref)
9717
        add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9718
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9719
      }
9720
    else
9721
      {
9722
      compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9723
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9724
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9725
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9726
      }
9727
    }
9728
9729
  if (min > 1 || max > 1)
9730
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9731
9732
  label = LABEL();
9733
  if (!ref)
9734
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9735
  compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);
9736
9737
  if (min > 1 || max > 1)
9738
    {
9739
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9740
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9741
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9742
    if (min > 1)
9743
      CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9744
    if (max > 1)
9745
      {
9746
      jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9747
      allocate_stack(common, 1);
9748
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9749
      JUMPTO(SLJIT_JUMP, label);
9750
      JUMPHERE(jump);
9751
      }
9752
    }
9753
9754
  if (max == 0)
9755
    {
9756
    /* Includes min > 1 case as well. */
9757
    allocate_stack(common, 1);
9758
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9759
    JUMPTO(SLJIT_JUMP, label);
9760
    }
9761
9762
  JUMPHERE(zerolength);
9763
  BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9764
9765
  count_match(common);
9766
  return cc;
9767
  }
9768
9769
allocate_stack(common, ref ? 2 : 3);
9770
if (ref)
9771
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9772
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9773
if (type != OP_CRMINSTAR)
9774
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9775
9776
if (min == 0)
9777
  {
9778
  /* Handles both invalid and empty cases. Since the minimum repeat,
9779
  is zero the invalid case is basically the same as an empty case. */
9780
  if (ref)
9781
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9782
  else
9783
    {
9784
    compile_dnref_search(common, ccbegin, NULL);
9785
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9786
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9787
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9788
    }
9789
  /* Length is non-zero, we can match real repeats. */
9790
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9791
  jump = JUMP(SLJIT_JUMP);
9792
  }
9793
else
9794
  {
9795
  if (ref)
9796
    {
9797
    if (!common->unset_backref)
9798
      add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9799
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9800
    }
9801
  else
9802
    {
9803
    compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9804
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9805
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9806
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9807
    }
9808
  }
9809
9810
BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9811
if (max > 0)
9812
  add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9813
9814
if (!ref)
9815
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9816
compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);
9817
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9818
9819
if (min > 1)
9820
  {
9821
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9822
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9823
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9824
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9825
  }
9826
else if (max > 0)
9827
  OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9828
9829
if (jump != NULL)
9830
  JUMPHERE(jump);
9831
JUMPHERE(zerolength);
9832
9833
count_match(common);
9834
return cc;
9835
}
9836
9837
static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9838
{
9839
DEFINE_COMPILER;
9840
backtrack_common *backtrack;
9841
recurse_entry *entry = common->entries;
9842
recurse_entry *prev = NULL;
9843
sljit_sw start = GET(cc, 1);
9844
PCRE2_SPTR start_cc;
9845
BOOL needs_control_head;
9846
9847
PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9848
9849
/* Inlining simple patterns. */
9850
if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9851
  {
9852
  start_cc = common->start + start;
9853
  compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9854
  BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9855
  return cc + 1 + LINK_SIZE;
9856
  }
9857
9858
while (entry != NULL)
9859
  {
9860
  if (entry->start == start)
9861
    break;
9862
  prev = entry;
9863
  entry = entry->next;
9864
  }
9865
9866
if (entry == NULL)
9867
  {
9868
  entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9869
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9870
    return NULL;
9871
  entry->next = NULL;
9872
  entry->entry_label = NULL;
9873
  entry->backtrack_label = NULL;
9874
  entry->entry_calls = NULL;
9875
  entry->backtrack_calls = NULL;
9876
  entry->start = start;
9877
9878
  if (prev != NULL)
9879
    prev->next = entry;
9880
  else
9881
    common->entries = entry;
9882
  }
9883
9884
BACKTRACK_AS(recurse_backtrack)->entry = entry;
9885
9886
if (entry->entry_label == NULL)
9887
  add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9888
else
9889
  JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9890
/* Leave if the match is failed. */
9891
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9892
BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9893
return cc + 1 + LINK_SIZE;
9894
}
9895
9896
static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9897
{
9898
PCRE2_SPTR begin;
9899
PCRE2_SIZE *ovector;
9900
sljit_u32 oveccount, capture_top;
9901
9902
if (arguments->callout == NULL)
9903
  return 0;
9904
9905
SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9906
9907
begin = arguments->begin;
9908
ovector = (PCRE2_SIZE*)(callout_block + 1);
9909
oveccount = callout_block->capture_top;
9910
9911
SLJIT_ASSERT(oveccount >= 1);
9912
9913
callout_block->version = 2;
9914
callout_block->callout_flags = 0;
9915
9916
/* Offsets in subject. */
9917
callout_block->subject_length = arguments->end - arguments->begin;
9918
callout_block->start_match = jit_ovector[0] - begin;
9919
callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9920
callout_block->subject = begin;
9921
9922
/* Convert and copy the JIT offset vector to the ovector array. */
9923
callout_block->capture_top = 1;
9924
callout_block->offset_vector = ovector;
9925
9926
ovector[0] = PCRE2_UNSET;
9927
ovector[1] = PCRE2_UNSET;
9928
ovector += 2;
9929
jit_ovector += 2;
9930
capture_top = 1;
9931
9932
/* Convert pointers to sizes. */
9933
while (--oveccount != 0)
9934
  {
9935
  capture_top++;
9936
9937
  ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9938
  ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9939
9940
  if (ovector[0] != PCRE2_UNSET)
9941
    callout_block->capture_top = capture_top;
9942
9943
  ovector += 2;
9944
  jit_ovector += 2;
9945
  }
9946
9947
return (arguments->callout)(callout_block, arguments->callout_data);
9948
}
9949
9950
#define CALLOUT_ARG_OFFSET(arg) \
9951
    SLJIT_OFFSETOF(pcre2_callout_block, arg)
9952
9953
static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9954
{
9955
DEFINE_COMPILER;
9956
backtrack_common *backtrack;
9957
sljit_s32 mov_opcode;
9958
unsigned int callout_length = (*cc == OP_CALLOUT)
9959
    ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9960
sljit_sw value1;
9961
sljit_sw value2;
9962
sljit_sw value3;
9963
sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
9964
9965
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9966
9967
callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9968
9969
allocate_stack(common, callout_arg_size);
9970
9971
SLJIT_ASSERT(common->capture_last_ptr != 0);
9972
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9973
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9974
value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9975
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9976
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9977
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9978
9979
/* These pointer sized fields temporarly stores internal variables. */
9980
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9981
9982
if (common->mark_ptr != 0)
9983
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9984
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9985
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9986
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9987
9988
if (*cc == OP_CALLOUT)
9989
  {
9990
  value1 = 0;
9991
  value2 = 0;
9992
  value3 = 0;
9993
  }
9994
else
9995
  {
9996
  value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9997
  value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9998
  value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9999
  }
10000
10001
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
10002
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
10003
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
10004
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
10005
10006
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10007
10008
/* Needed to save important temporary registers. */
10009
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
10010
/* SLJIT_R0 = arguments */
10011
OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
10012
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
10013
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
10014
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10015
free_stack(common, callout_arg_size);
10016
10017
/* Check return value. */
10018
OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
10019
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));
10020
if (common->abort_label == NULL)
10021
  add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
10022
else
10023
  JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
10024
return cc + callout_length;
10025
}
10026
10027
#undef CALLOUT_ARG_SIZE
10028
#undef CALLOUT_ARG_OFFSET
10029
10030
static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10031
{
10032
DEFINE_COMPILER;
10033
backtrack_common *backtrack = NULL;
10034
jump_list **reverse_failed;
10035
unsigned int lmin, lmax;
10036
#ifdef SUPPORT_UNICODE
10037
struct sljit_jump *jump;
10038
struct sljit_label *label;
10039
#endif
10040
10041
SLJIT_ASSERT(parent->top == NULL);
10042
10043
if (*cc == OP_REVERSE)
10044
  {
10045
  reverse_failed = &parent->own_backtracks;
10046
  lmin = GET2(cc, 1);
10047
  lmax = lmin;
10048
  cc += 1 + IMM2_SIZE;
10049
10050
  SLJIT_ASSERT(lmin > 0);
10051
  }
10052
else
10053
  {
10054
  SLJIT_ASSERT(*cc == OP_VREVERSE);
10055
  PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL);
10056
10057
  reverse_failed = &backtrack->own_backtracks;
10058
  lmin = GET2(cc, 1);
10059
  lmax = GET2(cc, 1 + IMM2_SIZE);
10060
  cc += 1 + 2 * IMM2_SIZE;
10061
10062
  SLJIT_ASSERT(lmin < lmax);
10063
  }
10064
10065
if (HAS_VIRTUAL_REGISTERS)
10066
  {
10067
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10068
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
10069
  }
10070
else
10071
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
10072
10073
#ifdef SUPPORT_UNICODE
10074
if (common->utf)
10075
  {
10076
  if (lmin > 0)
10077
    {
10078
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);
10079
    label = LABEL();
10080
    add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
10081
    move_back(common, reverse_failed, FALSE);
10082
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10083
    JUMPTO(SLJIT_NOT_ZERO, label);
10084
    }
10085
10086
  if (lmin < lmax)
10087
    {
10088
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10089
10090
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);
10091
    label = LABEL();
10092
    jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
10093
    move_back(common, reverse_failed, FALSE);
10094
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10095
    JUMPTO(SLJIT_NOT_ZERO, label);
10096
10097
    JUMPHERE(jump);
10098
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10099
    }
10100
  }
10101
else
10102
#endif
10103
  {
10104
  if (lmin > 0)
10105
    {
10106
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));
10107
    add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
10108
    }
10109
10110
  if (lmin < lmax)
10111
    {
10112
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10113
10114
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));
10115
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);
10116
    SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
10117
10118
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10119
    }
10120
  }
10121
10122
check_start_used_ptr(common);
10123
10124
if (lmin < lmax)
10125
  BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();
10126
10127
return cc;
10128
}
10129
10130
static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
10131
{
10132
while (TRUE)
10133
  {
10134
  switch (*cc)
10135
    {
10136
    case OP_CALLOUT_STR:
10137
    cc += GET(cc, 1 + 2*LINK_SIZE);
10138
    break;
10139
10140
    case OP_NOT_WORD_BOUNDARY:
10141
    case OP_WORD_BOUNDARY:
10142
    case OP_CIRC:
10143
    case OP_CIRCM:
10144
    case OP_DOLL:
10145
    case OP_DOLLM:
10146
    case OP_CALLOUT:
10147
    case OP_ALT:
10148
    case OP_NOT_UCP_WORD_BOUNDARY:
10149
    case OP_UCP_WORD_BOUNDARY:
10150
    cc += PRIV(OP_lengths)[*cc];
10151
    break;
10152
10153
    case OP_KET:
10154
    return FALSE;
10155
10156
    default:
10157
    return TRUE;
10158
    }
10159
  }
10160
}
10161
10162
static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
10163
{
10164
DEFINE_COMPILER;
10165
int framesize;
10166
int extrasize;
10167
BOOL local_quit_available = FALSE;
10168
BOOL needs_control_head;
10169
BOOL end_block_size = 0;
10170
BOOL has_vreverse;
10171
int private_data_ptr;
10172
backtrack_common altbacktrack;
10173
PCRE2_SPTR ccbegin;
10174
PCRE2_UCHAR opcode;
10175
PCRE2_UCHAR bra = OP_BRA;
10176
jump_list *tmp = NULL;
10177
jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;
10178
jump_list **found;
10179
/* Saving previous accept variables. */
10180
BOOL save_local_quit_available = common->local_quit_available;
10181
BOOL save_in_positive_assertion = common->in_positive_assertion;
10182
then_trap_backtrack *save_then_trap = common->then_trap;
10183
struct sljit_label *save_quit_label = common->quit_label;
10184
struct sljit_label *save_accept_label = common->accept_label;
10185
jump_list *save_quit = common->quit;
10186
jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
10187
jump_list *save_accept = common->accept;
10188
struct sljit_jump *jump;
10189
struct sljit_jump *brajump = NULL;
10190
10191
/* Assert captures then. */
10192
common->then_trap = NULL;
10193
10194
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10195
  {
10196
  SLJIT_ASSERT(!conditional);
10197
  bra = *cc;
10198
  cc++;
10199
  }
10200
10201
private_data_ptr = PRIVATE_DATA(cc);
10202
SLJIT_ASSERT(private_data_ptr != 0);
10203
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10204
backtrack->framesize = framesize;
10205
backtrack->private_data_ptr = private_data_ptr;
10206
opcode = *cc;
10207
SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
10208
found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
10209
ccbegin = cc;
10210
cc += GET(cc, 1);
10211
10212
if (bra == OP_BRAMINZERO)
10213
  {
10214
  /* This is a braminzero backtrack path. */
10215
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10216
  free_stack(common, 1);
10217
  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10218
  }
10219
10220
if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))
10221
  end_block_size = 3;
10222
10223
if (framesize < 0)
10224
  {
10225
  extrasize = 1;
10226
  if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
10227
    extrasize = 0;
10228
10229
  extrasize += end_block_size;
10230
10231
  if (needs_control_head)
10232
    extrasize++;
10233
10234
  if (framesize == no_frame)
10235
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10236
10237
  if (extrasize > 0)
10238
    allocate_stack(common, extrasize);
10239
10240
  if (needs_control_head)
10241
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10242
10243
  if (extrasize > 0)
10244
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10245
10246
  if (needs_control_head)
10247
    {
10248
    SLJIT_ASSERT(extrasize == end_block_size + 2);
10249
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10250
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10251
    }
10252
  }
10253
else
10254
  {
10255
  extrasize = (needs_control_head ? 3 : 2) + end_block_size;
10256
10257
  OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
10258
  allocate_stack(common, framesize + extrasize);
10259
10260
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10261
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10262
  if (needs_control_head)
10263
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10264
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10265
10266
  if (needs_control_head)
10267
    {
10268
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);
10269
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);
10270
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10271
    }
10272
  else
10273
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10274
10275
  init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
10276
  }
10277
10278
if (end_block_size > 0)
10279
  {
10280
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);
10281
  OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
10282
  }
10283
10284
memset(&altbacktrack, 0, sizeof(backtrack_common));
10285
if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
10286
  {
10287
  /* Control verbs cannot escape from these asserts. */
10288
  local_quit_available = TRUE;
10289
  common->local_quit_available = TRUE;
10290
  common->quit_label = NULL;
10291
  common->quit = NULL;
10292
  }
10293
10294
common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
10295
common->positive_assertion_quit = NULL;
10296
10297
while (1)
10298
  {
10299
  common->accept_label = NULL;
10300
  common->accept = NULL;
10301
  altbacktrack.top = NULL;
10302
  altbacktrack.own_backtracks = NULL;
10303
10304
  if (*ccbegin == OP_ALT && extrasize > 0)
10305
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10306
10307
  altbacktrack.cc = ccbegin;
10308
  ccbegin += 1 + LINK_SIZE;
10309
10310
  has_vreverse = (*ccbegin == OP_VREVERSE);
10311
  if (*ccbegin == OP_REVERSE || has_vreverse)
10312
    ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);
10313
10314
  compile_matchingpath(common, ccbegin, cc, &altbacktrack);
10315
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10316
    {
10317
    if (local_quit_available)
10318
      {
10319
      common->local_quit_available = save_local_quit_available;
10320
      common->quit_label = save_quit_label;
10321
      common->quit = save_quit;
10322
      }
10323
    common->in_positive_assertion = save_in_positive_assertion;
10324
    common->then_trap = save_then_trap;
10325
    common->accept_label = save_accept_label;
10326
    common->positive_assertion_quit = save_positive_assertion_quit;
10327
    common->accept = save_accept;
10328
    return NULL;
10329
    }
10330
10331
  if (has_vreverse)
10332
    {
10333
    SLJIT_ASSERT(altbacktrack.top != NULL);
10334
    add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
10335
    }
10336
10337
  common->accept_label = LABEL();
10338
  if (common->accept != NULL)
10339
    set_jumps(common->accept, common->accept_label);
10340
10341
  /* Reset stack. */
10342
  if (framesize < 0)
10343
    {
10344
    if (framesize == no_frame)
10345
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10346
    else if (extrasize > 0)
10347
      free_stack(common, extrasize);
10348
10349
    if (end_block_size > 0)
10350
      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10351
10352
    if (needs_control_head)
10353
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10354
    }
10355
  else
10356
    {
10357
    if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
10358
      {
10359
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10360
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10361
10362
      if (end_block_size > 0)
10363
        OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));
10364
10365
      if (needs_control_head)
10366
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10367
      }
10368
    else
10369
      {
10370
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10371
10372
      if (end_block_size > 0)
10373
        OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));
10374
10375
      if (needs_control_head)
10376
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
10377
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10378
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10379
      }
10380
    }
10381
10382
  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
10383
    {
10384
    /* We know that STR_PTR was stored on the top of the stack. */
10385
    if (conditional)
10386
      {
10387
      if (extrasize > 0)
10388
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));
10389
      }
10390
    else if (bra == OP_BRAZERO)
10391
      {
10392
      if (framesize < 0)
10393
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10394
      else
10395
        {
10396
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10397
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
10398
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10399
        }
10400
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10401
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10402
      }
10403
    else if (framesize >= 0)
10404
      {
10405
      /* For OP_BRA and OP_BRAMINZERO. */
10406
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10407
      }
10408
    }
10409
  add_jump(compiler, found, JUMP(SLJIT_JUMP));
10410
10411
  compile_backtrackingpath(common, altbacktrack.top);
10412
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10413
    {
10414
    if (local_quit_available)
10415
      {
10416
      common->local_quit_available = save_local_quit_available;
10417
      common->quit_label = save_quit_label;
10418
      common->quit = save_quit;
10419
      }
10420
    common->in_positive_assertion = save_in_positive_assertion;
10421
    common->then_trap = save_then_trap;
10422
    common->accept_label = save_accept_label;
10423
    common->positive_assertion_quit = save_positive_assertion_quit;
10424
    common->accept = save_accept;
10425
    return NULL;
10426
    }
10427
  set_jumps(altbacktrack.own_backtracks, LABEL());
10428
10429
  if (*cc != OP_ALT)
10430
    break;
10431
10432
  ccbegin = cc;
10433
  cc += GET(cc, 1);
10434
  }
10435
10436
if (local_quit_available)
10437
  {
10438
  SLJIT_ASSERT(common->positive_assertion_quit == NULL);
10439
  /* Makes the check less complicated below. */
10440
  common->positive_assertion_quit = common->quit;
10441
  }
10442
10443
/* None of them matched. */
10444
if (common->positive_assertion_quit != NULL)
10445
  {
10446
  jump = JUMP(SLJIT_JUMP);
10447
  set_jumps(common->positive_assertion_quit, LABEL());
10448
  SLJIT_ASSERT(framesize != no_stack);
10449
  if (framesize < 0)
10450
    OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
10451
  else
10452
    {
10453
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10454
    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10455
    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
10456
    }
10457
  JUMPHERE(jump);
10458
  }
10459
10460
if (end_block_size > 0)
10461
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10462
10463
if (needs_control_head)
10464
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));
10465
10466
if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
10467
  {
10468
  /* Assert is failed. */
10469
  if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
10470
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10471
10472
  if (framesize < 0)
10473
    {
10474
    /* The topmost item should be 0. */
10475
    if (bra == OP_BRAZERO)
10476
      {
10477
      if (extrasize >= 2)
10478
        free_stack(common, extrasize - 1);
10479
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10480
      }
10481
    else if (extrasize > 0)
10482
      free_stack(common, extrasize);
10483
    }
10484
  else
10485
    {
10486
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10487
    /* The topmost item should be 0. */
10488
    if (bra == OP_BRAZERO)
10489
      {
10490
      free_stack(common, framesize + extrasize - 1);
10491
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10492
      }
10493
    else
10494
      free_stack(common, framesize + extrasize);
10495
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10496
    }
10497
  jump = JUMP(SLJIT_JUMP);
10498
  if (bra != OP_BRAZERO)
10499
    add_jump(compiler, target, jump);
10500
10501
  /* Assert is successful. */
10502
  set_jumps(tmp, LABEL());
10503
  if (framesize < 0)
10504
    {
10505
    /* We know that STR_PTR was stored on the top of the stack. */
10506
    if (extrasize > 0)
10507
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10508
10509
    /* Keep the STR_PTR on the top of the stack. */
10510
    if (bra == OP_BRAZERO)
10511
      {
10512
      /* This allocation is always successful. */
10513
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10514
      if (extrasize >= 2)
10515
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10516
      }
10517
    else if (bra == OP_BRAMINZERO)
10518
      {
10519
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10520
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10521
      }
10522
    }
10523
  else
10524
    {
10525
    if (bra == OP_BRA)
10526
      {
10527
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10528
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10529
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10530
      }
10531
    else
10532
      {
10533
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10534
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));
10535
10536
      if (extrasize == 2 + end_block_size)
10537
        {
10538
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10539
        if (bra == OP_BRAMINZERO)
10540
          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10541
        }
10542
      else
10543
        {
10544
        SLJIT_ASSERT(extrasize == 3 + end_block_size);
10545
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10546
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10547
        }
10548
      }
10549
    }
10550
10551
  if (bra == OP_BRAZERO)
10552
    {
10553
    backtrack->matchingpath = LABEL();
10554
    SET_LABEL(jump, backtrack->matchingpath);
10555
    }
10556
  else if (bra == OP_BRAMINZERO)
10557
    {
10558
    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10559
    JUMPHERE(brajump);
10560
    if (framesize >= 0)
10561
      {
10562
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10563
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10564
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10565
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10566
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10567
      }
10568
    set_jumps(backtrack->common.own_backtracks, LABEL());
10569
    }
10570
  }
10571
else
10572
  {
10573
  /* AssertNot is successful. */
10574
  if (framesize < 0)
10575
    {
10576
    if (extrasize > 0)
10577
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10578
10579
    if (bra != OP_BRA)
10580
      {
10581
      if (extrasize >= 2)
10582
        free_stack(common, extrasize - 1);
10583
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10584
      }
10585
    else if (extrasize > 0)
10586
      free_stack(common, extrasize);
10587
    }
10588
  else
10589
    {
10590
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10591
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10592
    /* The topmost item should be 0. */
10593
    if (bra != OP_BRA)
10594
      {
10595
      free_stack(common, framesize + extrasize - 1);
10596
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10597
      }
10598
    else
10599
      free_stack(common, framesize + extrasize);
10600
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10601
    }
10602
10603
  if (bra == OP_BRAZERO)
10604
    backtrack->matchingpath = LABEL();
10605
  else if (bra == OP_BRAMINZERO)
10606
    {
10607
    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10608
    JUMPHERE(brajump);
10609
    }
10610
10611
  if (bra != OP_BRA)
10612
    {
10613
    SLJIT_ASSERT(found == &backtrack->common.own_backtracks);
10614
    set_jumps(backtrack->common.own_backtracks, LABEL());
10615
    backtrack->common.own_backtracks = NULL;
10616
    }
10617
  }
10618
10619
if (local_quit_available)
10620
  {
10621
  common->local_quit_available = save_local_quit_available;
10622
  common->quit_label = save_quit_label;
10623
  common->quit = save_quit;
10624
  }
10625
common->in_positive_assertion = save_in_positive_assertion;
10626
common->then_trap = save_then_trap;
10627
common->accept_label = save_accept_label;
10628
common->positive_assertion_quit = save_positive_assertion_quit;
10629
common->accept = save_accept;
10630
return cc + 1 + LINK_SIZE;
10631
}
10632
10633
static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10634
{
10635
DEFINE_COMPILER;
10636
int stacksize;
10637
10638
if (framesize < 0)
10639
  {
10640
  if (framesize == no_frame)
10641
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10642
  else
10643
    {
10644
    stacksize = needs_control_head ? 1 : 0;
10645
    if (ket != OP_KET || has_alternatives)
10646
      stacksize++;
10647
10648
    if (stacksize > 0)
10649
      free_stack(common, stacksize);
10650
    }
10651
10652
  if (needs_control_head)
10653
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10654
10655
  /* TMP2 which is set here used by OP_KETRMAX below. */
10656
  if (ket == OP_KETRMAX)
10657
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10658
  else if (ket == OP_KETRMIN)
10659
    {
10660
    /* Move the STR_PTR to the private_data_ptr. */
10661
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10662
    }
10663
  }
10664
else
10665
  {
10666
  stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10667
  OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10668
  if (needs_control_head)
10669
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10670
10671
  if (ket == OP_KETRMAX)
10672
    {
10673
    /* TMP2 which is set here used by OP_KETRMAX below. */
10674
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10675
    }
10676
  }
10677
if (needs_control_head)
10678
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10679
}
10680
10681
static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10682
{
10683
DEFINE_COMPILER;
10684
10685
if (common->capture_last_ptr != 0)
10686
  {
10687
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10688
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10689
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10690
  stacksize++;
10691
  }
10692
if (common->optimized_cbracket[offset >> 1] == 0)
10693
  {
10694
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10695
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10696
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10697
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10698
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10699
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10700
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10701
  stacksize += 2;
10702
  }
10703
return stacksize;
10704
}
10705
10706
static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10707
{
10708
  if (PRIV(script_run)(ptr, endptr, FALSE))
10709
    return endptr;
10710
  return NULL;
10711
}
10712
10713
#ifdef SUPPORT_UNICODE
10714
10715
static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10716
{
10717
  if (PRIV(script_run)(ptr, endptr, TRUE))
10718
    return endptr;
10719
  return NULL;
10720
}
10721
10722
#endif /* SUPPORT_UNICODE */
10723
10724
static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10725
{
10726
DEFINE_COMPILER;
10727
10728
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10729
10730
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10731
#ifdef SUPPORT_UNICODE
10732
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
10733
  common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
10734
#else
10735
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
10736
#endif
10737
10738
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10739
add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10740
}
10741
10742
/*
10743
  Handling bracketed expressions is probably the most complex part.
10744
10745
  Stack layout naming characters:
10746
    S - Push the current STR_PTR
10747
    0 - Push a 0 (NULL)
10748
    A - Push the current STR_PTR. Needed for restoring the STR_PTR
10749
        before the next alternative. Not pushed if there are no alternatives.
10750
    M - Any values pushed by the current alternative. Can be empty, or anything.
10751
    C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10752
    L - Push the previous local (pointed by localptr) to the stack
10753
   () - opional values stored on the stack
10754
  ()* - optonal, can be stored multiple times
10755
10756
  The following list shows the regular expression templates, their PCRE byte codes
10757
  and stack layout supported by pcre-sljit.
10758
10759
  (?:)                     OP_BRA     | OP_KET                A M
10760
  ()                       OP_CBRA    | OP_KET                C M
10761
  (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
10762
                           OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
10763
  (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
10764
                           OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
10765
  ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
10766
                           OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
10767
  ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
10768
                           OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
10769
  (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
10770
  (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
10771
  ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
10772
  ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
10773
  (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
10774
           OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
10775
  (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
10776
           OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
10777
  ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
10778
           OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
10779
  ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
10780
           OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
10781
10782
10783
  Stack layout naming characters:
10784
    A - Push the alternative index (starting from 0) on the stack.
10785
        Not pushed if there is no alternatives.
10786
    M - Any values pushed by the current alternative. Can be empty, or anything.
10787
10788
  The next list shows the possible content of a bracket:
10789
  (|)     OP_*BRA    | OP_ALT ...         M A
10790
  (?()|)  OP_*COND   | OP_ALT             M A
10791
  (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
10792
                                          Or nothing, if trace is unnecessary
10793
*/
10794
10795
static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10796
{
10797
DEFINE_COMPILER;
10798
backtrack_common *backtrack;
10799
PCRE2_UCHAR opcode;
10800
int private_data_ptr = 0;
10801
int offset = 0;
10802
int i, stacksize;
10803
int repeat_ptr = 0, repeat_length = 0;
10804
int repeat_type = 0, repeat_count = 0;
10805
PCRE2_SPTR ccbegin;
10806
PCRE2_SPTR matchingpath;
10807
PCRE2_SPTR slot;
10808
PCRE2_UCHAR bra = OP_BRA;
10809
PCRE2_UCHAR ket;
10810
assert_backtrack *assert;
10811
BOOL has_alternatives;
10812
BOOL needs_control_head = FALSE;
10813
BOOL has_vreverse = FALSE;
10814
struct sljit_jump *jump;
10815
struct sljit_jump *skip;
10816
struct sljit_label *rmax_label = NULL;
10817
struct sljit_jump *braminzero = NULL;
10818
10819
PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10820
10821
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10822
  {
10823
  bra = *cc;
10824
  cc++;
10825
  opcode = *cc;
10826
  }
10827
10828
opcode = *cc;
10829
ccbegin = cc;
10830
matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10831
ket = *matchingpath;
10832
if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10833
  {
10834
  repeat_ptr = PRIVATE_DATA(matchingpath);
10835
  repeat_length = PRIVATE_DATA(matchingpath + 1);
10836
  repeat_type = PRIVATE_DATA(matchingpath + 2);
10837
  repeat_count = PRIVATE_DATA(matchingpath + 3);
10838
  SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10839
  if (repeat_type == OP_UPTO)
10840
    ket = OP_KETRMAX;
10841
  if (repeat_type == OP_MINUPTO)
10842
    ket = OP_KETRMIN;
10843
  }
10844
10845
matchingpath = ccbegin + 1 + LINK_SIZE;
10846
SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10847
SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10848
cc += GET(cc, 1);
10849
10850
has_alternatives = *cc == OP_ALT;
10851
if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10852
  {
10853
  SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10854
    compile_time_checks_must_be_grouped_together);
10855
  has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10856
  }
10857
10858
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10859
  opcode = OP_SCOND;
10860
10861
if (opcode == OP_CBRA || opcode == OP_SCBRA)
10862
  {
10863
  /* Capturing brackets has a pre-allocated space. */
10864
  offset = GET2(ccbegin, 1 + LINK_SIZE);
10865
  if (common->optimized_cbracket[offset] == 0)
10866
    {
10867
    private_data_ptr = OVECTOR_PRIV(offset);
10868
    offset <<= 1;
10869
    }
10870
  else
10871
    {
10872
    offset <<= 1;
10873
    private_data_ptr = OVECTOR(offset);
10874
    }
10875
  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10876
  matchingpath += IMM2_SIZE;
10877
  }
10878
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10879
  {
10880
  /* Other brackets simply allocate the next entry. */
10881
  private_data_ptr = PRIVATE_DATA(ccbegin);
10882
  SLJIT_ASSERT(private_data_ptr != 0);
10883
  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10884
  if (opcode == OP_ONCE)
10885
    BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10886
  }
10887
10888
/* Instructions before the first alternative. */
10889
stacksize = 0;
10890
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10891
  stacksize++;
10892
if (bra == OP_BRAZERO)
10893
  stacksize++;
10894
10895
if (stacksize > 0)
10896
  allocate_stack(common, stacksize);
10897
10898
stacksize = 0;
10899
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10900
  {
10901
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10902
  stacksize++;
10903
  }
10904
10905
if (bra == OP_BRAZERO)
10906
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10907
10908
if (bra == OP_BRAMINZERO)
10909
  {
10910
  /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10911
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10912
  if (ket != OP_KETRMIN)
10913
    {
10914
    free_stack(common, 1);
10915
    braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10916
    }
10917
  else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10918
    {
10919
    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10920
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10921
    /* Nothing stored during the first run. */
10922
    skip = JUMP(SLJIT_JUMP);
10923
    JUMPHERE(jump);
10924
    /* Checking zero-length iteration. */
10925
    if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10926
      {
10927
      /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10928
      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10929
      }
10930
    else
10931
      {
10932
      /* Except when the whole stack frame must be saved. */
10933
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10934
      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10935
      }
10936
    JUMPHERE(skip);
10937
    }
10938
  else
10939
    {
10940
    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10941
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10942
    JUMPHERE(jump);
10943
    }
10944
  }
10945
10946
if (repeat_type != 0)
10947
  {
10948
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10949
  if (repeat_type == OP_EXACT)
10950
    rmax_label = LABEL();
10951
  }
10952
10953
if (ket == OP_KETRMIN)
10954
  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10955
10956
if (ket == OP_KETRMAX)
10957
  {
10958
  rmax_label = LABEL();
10959
  if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10960
    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10961
  }
10962
10963
/* Handling capturing brackets and alternatives. */
10964
if (opcode == OP_ONCE)
10965
  {
10966
  stacksize = 0;
10967
  if (needs_control_head)
10968
    {
10969
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10970
    stacksize++;
10971
    }
10972
10973
  if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10974
    {
10975
    /* Neither capturing brackets nor recursions are found in the block. */
10976
    if (ket == OP_KETRMIN)
10977
      {
10978
      stacksize += 2;
10979
      if (!needs_control_head)
10980
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10981
      }
10982
    else
10983
      {
10984
      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10985
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10986
      if (ket == OP_KETRMAX || has_alternatives)
10987
        stacksize++;
10988
      }
10989
10990
    if (stacksize > 0)
10991
      allocate_stack(common, stacksize);
10992
10993
    stacksize = 0;
10994
    if (needs_control_head)
10995
      {
10996
      stacksize++;
10997
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10998
      }
10999
11000
    if (ket == OP_KETRMIN)
11001
      {
11002
      if (needs_control_head)
11003
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11004
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11005
      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
11006
        OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
11007
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
11008
      }
11009
    else if (ket == OP_KETRMAX || has_alternatives)
11010
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11011
    }
11012
  else
11013
    {
11014
    if (ket != OP_KET || has_alternatives)
11015
      stacksize++;
11016
11017
    stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
11018
    allocate_stack(common, stacksize);
11019
11020
    if (needs_control_head)
11021
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11022
11023
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11024
    OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11025
11026
    stacksize = needs_control_head ? 1 : 0;
11027
    if (ket != OP_KET || has_alternatives)
11028
      {
11029
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11030
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
11031
      stacksize++;
11032
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
11033
      }
11034
    else
11035
      {
11036
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
11037
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
11038
      }
11039
    init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
11040
    }
11041
  }
11042
else if (opcode == OP_CBRA || opcode == OP_SCBRA)
11043
  {
11044
  /* Saving the previous values. */
11045
  if (common->optimized_cbracket[offset >> 1] != 0)
11046
    {
11047
    SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
11048
    allocate_stack(common, 2);
11049
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11050
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11051
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11052
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11053
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11054
    }
11055
  else
11056
    {
11057
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11058
    allocate_stack(common, 1);
11059
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11060
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11061
    }
11062
  }
11063
else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
11064
  {
11065
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11066
  allocate_stack(common, 4);
11067
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11068
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11069
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
11070
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11071
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
11072
  OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
11073
11074
  has_vreverse = (*matchingpath == OP_VREVERSE);
11075
  if (*matchingpath == OP_REVERSE || has_vreverse)
11076
    matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11077
  }
11078
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
11079
  {
11080
  /* Saving the previous value. */
11081
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11082
  allocate_stack(common, 1);
11083
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11084
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11085
11086
  if (*matchingpath == OP_REVERSE)
11087
    matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11088
  }
11089
else if (has_alternatives)
11090
  {
11091
  /* Pushing the starting string pointer. */
11092
  allocate_stack(common, 1);
11093
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11094
  }
11095
11096
/* Generating code for the first alternative. */
11097
if (opcode == OP_COND || opcode == OP_SCOND)
11098
  {
11099
  if (*matchingpath == OP_CREF)
11100
    {
11101
    SLJIT_ASSERT(has_alternatives);
11102
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
11103
      CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
11104
    matchingpath += 1 + IMM2_SIZE;
11105
    }
11106
  else if (*matchingpath == OP_DNCREF)
11107
    {
11108
    SLJIT_ASSERT(has_alternatives);
11109
11110
    i = GET2(matchingpath, 1 + IMM2_SIZE);
11111
    slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11112
    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11113
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
11114
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11115
    slot += common->name_entry_size;
11116
    i--;
11117
    while (i-- > 0)
11118
      {
11119
      OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11120
      OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
11121
      slot += common->name_entry_size;
11122
      }
11123
    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11124
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
11125
    matchingpath += 1 + 2 * IMM2_SIZE;
11126
    }
11127
  else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
11128
    {
11129
    /* Never has other case. */
11130
    BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
11131
    SLJIT_ASSERT(!has_alternatives);
11132
11133
    if (*matchingpath == OP_TRUE)
11134
      {
11135
      stacksize = 1;
11136
      matchingpath++;
11137
      }
11138
    else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
11139
      stacksize = 0;
11140
    else if (*matchingpath == OP_RREF)
11141
      {
11142
      stacksize = GET2(matchingpath, 1);
11143
      if (common->currententry == NULL)
11144
        stacksize = 0;
11145
      else if (stacksize == RREF_ANY)
11146
        stacksize = 1;
11147
      else if (common->currententry->start == 0)
11148
        stacksize = stacksize == 0;
11149
      else
11150
        stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11151
11152
      if (stacksize != 0)
11153
        matchingpath += 1 + IMM2_SIZE;
11154
      }
11155
    else
11156
      {
11157
      if (common->currententry == NULL || common->currententry->start == 0)
11158
        stacksize = 0;
11159
      else
11160
        {
11161
        stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
11162
        slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11163
        i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11164
        while (stacksize > 0)
11165
          {
11166
          if ((int)GET2(slot, 0) == i)
11167
            break;
11168
          slot += common->name_entry_size;
11169
          stacksize--;
11170
          }
11171
        }
11172
11173
      if (stacksize != 0)
11174
        matchingpath += 1 + 2 * IMM2_SIZE;
11175
      }
11176
11177
      /* The stacksize == 0 is a common "else" case. */
11178
      if (stacksize == 0)
11179
        {
11180
        if (*cc == OP_ALT)
11181
          {
11182
          matchingpath = cc + 1 + LINK_SIZE;
11183
          cc += GET(cc, 1);
11184
          }
11185
        else
11186
          matchingpath = cc;
11187
        }
11188
    }
11189
  else
11190
    {
11191
    SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
11192
    /* Similar code as PUSH_BACKTRACK macro. */
11193
    assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
11194
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11195
      return NULL;
11196
    memset(assert, 0, sizeof(assert_backtrack));
11197
    assert->common.cc = matchingpath;
11198
    BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
11199
    matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
11200
    }
11201
  }
11202
11203
compile_matchingpath(common, matchingpath, cc, backtrack);
11204
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11205
  return NULL;
11206
11207
switch (opcode)
11208
  {
11209
  case OP_ASSERTBACK_NA:
11210
    if (has_vreverse)
11211
      {
11212
      SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));
11213
      add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
11214
      }
11215
11216
    if (PRIVATE_DATA(ccbegin + 1))
11217
      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11218
    break;
11219
  case OP_ASSERT_NA:
11220
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11221
    break;
11222
  case OP_ONCE:
11223
    match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
11224
    break;
11225
  case OP_SCRIPT_RUN:
11226
    match_script_run_common(common, private_data_ptr, backtrack);
11227
    break;
11228
  }
11229
11230
stacksize = 0;
11231
if (repeat_type == OP_MINUPTO)
11232
  {
11233
  /* We need to preserve the counter. TMP2 will be used below. */
11234
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11235
  stacksize++;
11236
  }
11237
if (ket != OP_KET || bra != OP_BRA)
11238
  stacksize++;
11239
if (offset != 0)
11240
  {
11241
  if (common->capture_last_ptr != 0)
11242
    stacksize++;
11243
  if (common->optimized_cbracket[offset >> 1] == 0)
11244
    stacksize += 2;
11245
  }
11246
if (has_alternatives && opcode != OP_ONCE)
11247
  stacksize++;
11248
11249
if (stacksize > 0)
11250
  allocate_stack(common, stacksize);
11251
11252
stacksize = 0;
11253
if (repeat_type == OP_MINUPTO)
11254
  {
11255
  /* TMP2 was set above. */
11256
  OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
11257
  stacksize++;
11258
  }
11259
11260
if (ket != OP_KET || bra != OP_BRA)
11261
  {
11262
  if (ket != OP_KET)
11263
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11264
  else
11265
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11266
  stacksize++;
11267
  }
11268
11269
if (offset != 0)
11270
  stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
11271
11272
/* Skip and count the other alternatives. */
11273
i = 1;
11274
while (*cc == OP_ALT)
11275
  {
11276
  cc += GET(cc, 1);
11277
  i++;
11278
  }
11279
11280
if (has_alternatives)
11281
  {
11282
  if (opcode != OP_ONCE)
11283
    {
11284
    if (i <= 3)
11285
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11286
    else
11287
      BACKTRACK_AS(bracket_backtrack)->u.matching_mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
11288
    }
11289
  if (ket != OP_KETRMAX)
11290
    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11291
  }
11292
11293
/* Must be after the matchingpath label. */
11294
if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
11295
  {
11296
  SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
11297
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11298
  }
11299
11300
if (ket == OP_KETRMAX)
11301
  {
11302
  if (repeat_type != 0)
11303
    {
11304
    if (has_alternatives)
11305
      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11306
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11307
    JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11308
    /* Drop STR_PTR for greedy plus quantifier. */
11309
    if (opcode != OP_ONCE)
11310
      free_stack(common, 1);
11311
    }
11312
  else if (opcode < OP_BRA || opcode >= OP_SBRA)
11313
    {
11314
    if (has_alternatives)
11315
      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11316
11317
    /* Checking zero-length iteration. */
11318
    if (opcode != OP_ONCE)
11319
      {
11320
      /* This case includes opcodes such as OP_SCRIPT_RUN. */
11321
      CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
11322
      /* Drop STR_PTR for greedy plus quantifier. */
11323
      if (bra != OP_BRAZERO)
11324
        free_stack(common, 1);
11325
      }
11326
    else
11327
      /* TMP2 must contain the starting STR_PTR. */
11328
      CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
11329
    }
11330
  else
11331
    JUMPTO(SLJIT_JUMP, rmax_label);
11332
  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
11333
  }
11334
11335
if (repeat_type == OP_EXACT)
11336
  {
11337
  count_match(common);
11338
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11339
  JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11340
  }
11341
else if (repeat_type == OP_UPTO)
11342
  {
11343
  /* We need to preserve the counter. */
11344
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11345
  allocate_stack(common, 1);
11346
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11347
  }
11348
11349
if (bra == OP_BRAZERO)
11350
  BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
11351
11352
if (bra == OP_BRAMINZERO)
11353
  {
11354
  /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
11355
  JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
11356
  if (braminzero != NULL)
11357
    {
11358
    JUMPHERE(braminzero);
11359
    /* We need to release the end pointer to perform the
11360
    backtrack for the zero-length iteration. When
11361
    framesize is < 0, OP_ONCE will do the release itself. */
11362
    if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
11363
      {
11364
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11365
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11366
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
11367
      }
11368
    else if (ket == OP_KETRMIN && opcode != OP_ONCE)
11369
      free_stack(common, 1);
11370
    }
11371
  /* Continue to the normal backtrack. */
11372
  }
11373
11374
if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT))
11375
  count_match(common);
11376
11377
cc += 1 + LINK_SIZE;
11378
11379
if (opcode == OP_ONCE)
11380
  {
11381
  int data;
11382
  int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;
11383
11384
  SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2);
11385
  /* We temporarily encode the needs_control_head in the lowest bit.
11386
     The real value should be short enough for this operation to work
11387
     without triggering Undefined Behaviour. */
11388
  data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0));
11389
  BACKTRACK_AS(bracket_backtrack)->u.framesize = data;
11390
  }
11391
return cc + repeat_length;
11392
}
11393
11394
static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11395
{
11396
DEFINE_COMPILER;
11397
backtrack_common *backtrack;
11398
PCRE2_UCHAR opcode;
11399
int private_data_ptr;
11400
int cbraprivptr = 0;
11401
BOOL needs_control_head;
11402
int framesize;
11403
int stacksize;
11404
int offset = 0;
11405
BOOL zero = FALSE;
11406
PCRE2_SPTR ccbegin = NULL;
11407
int stack; /* Also contains the offset of control head. */
11408
struct sljit_label *loop = NULL;
11409
struct jump_list *emptymatch = NULL;
11410
11411
PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
11412
if (*cc == OP_BRAPOSZERO)
11413
  {
11414
  zero = TRUE;
11415
  cc++;
11416
  }
11417
11418
opcode = *cc;
11419
private_data_ptr = PRIVATE_DATA(cc);
11420
SLJIT_ASSERT(private_data_ptr != 0);
11421
BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
11422
switch(opcode)
11423
  {
11424
  case OP_BRAPOS:
11425
  case OP_SBRAPOS:
11426
  ccbegin = cc + 1 + LINK_SIZE;
11427
  break;
11428
11429
  case OP_CBRAPOS:
11430
  case OP_SCBRAPOS:
11431
  offset = GET2(cc, 1 + LINK_SIZE);
11432
  /* This case cannot be optimized in the same way as
11433
  normal capturing brackets. */
11434
  SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
11435
  cbraprivptr = OVECTOR_PRIV(offset);
11436
  offset <<= 1;
11437
  ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
11438
  break;
11439
11440
  default:
11441
  SLJIT_UNREACHABLE();
11442
  break;
11443
  }
11444
11445
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
11446
BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
11447
if (framesize < 0)
11448
  {
11449
  if (offset != 0)
11450
    {
11451
    stacksize = 2;
11452
    if (common->capture_last_ptr != 0)
11453
      stacksize++;
11454
    }
11455
  else
11456
    stacksize = 1;
11457
11458
  if (needs_control_head)
11459
    stacksize++;
11460
  if (!zero)
11461
    stacksize++;
11462
11463
  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11464
  allocate_stack(common, stacksize);
11465
  if (framesize == no_frame)
11466
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
11467
11468
  stack = 0;
11469
  if (offset != 0)
11470
    {
11471
    stack = 2;
11472
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
11473
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
11474
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11475
    if (common->capture_last_ptr != 0)
11476
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
11477
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11478
    if (needs_control_head)
11479
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11480
    if (common->capture_last_ptr != 0)
11481
      {
11482
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
11483
      stack = 3;
11484
      }
11485
    }
11486
  else
11487
    {
11488
    if (needs_control_head)
11489
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11490
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11491
    stack = 1;
11492
    }
11493
11494
  if (needs_control_head)
11495
    stack++;
11496
  if (!zero)
11497
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
11498
  if (needs_control_head)
11499
    {
11500
    stack--;
11501
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11502
    }
11503
  }
11504
else
11505
  {
11506
  stacksize = framesize + 1;
11507
  if (!zero)
11508
    stacksize++;
11509
  if (needs_control_head)
11510
    stacksize++;
11511
  if (offset == 0)
11512
    stacksize++;
11513
  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11514
11515
  allocate_stack(common, stacksize);
11516
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11517
  if (needs_control_head)
11518
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11519
  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11520
11521
  stack = 0;
11522
  if (!zero)
11523
    {
11524
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11525
    stack = 1;
11526
    }
11527
  if (needs_control_head)
11528
    {
11529
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11530
    stack++;
11531
    }
11532
  if (offset == 0)
11533
    {
11534
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11535
    stack++;
11536
    }
11537
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11538
  init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11539
  stack -= 1 + (offset == 0);
11540
  }
11541
11542
if (offset != 0)
11543
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11544
11545
loop = LABEL();
11546
while (*cc != OP_KETRPOS)
11547
  {
11548
  backtrack->top = NULL;
11549
  backtrack->own_backtracks = NULL;
11550
  cc += GET(cc, 1);
11551
11552
  compile_matchingpath(common, ccbegin, cc, backtrack);
11553
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11554
    return NULL;
11555
11556
  if (framesize < 0)
11557
    {
11558
    if (framesize == no_frame)
11559
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11560
11561
    if (offset != 0)
11562
      {
11563
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11564
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11565
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11566
      if (common->capture_last_ptr != 0)
11567
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11568
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11569
      }
11570
    else
11571
      {
11572
      if (opcode == OP_SBRAPOS)
11573
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11574
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11575
      }
11576
11577
    /* Even if the match is empty, we need to reset the control head. */
11578
    if (needs_control_head)
11579
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11580
11581
    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11582
      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11583
11584
    if (!zero)
11585
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11586
    }
11587
  else
11588
    {
11589
    if (offset != 0)
11590
      {
11591
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11592
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11593
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11594
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11595
      if (common->capture_last_ptr != 0)
11596
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11597
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11598
      }
11599
    else
11600
      {
11601
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11602
      OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11603
      if (opcode == OP_SBRAPOS)
11604
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11605
      OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11606
      }
11607
11608
    /* Even if the match is empty, we need to reset the control head. */
11609
    if (needs_control_head)
11610
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11611
11612
    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11613
      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11614
11615
    if (!zero)
11616
      {
11617
      if (framesize < 0)
11618
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11619
      else
11620
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11621
      }
11622
    }
11623
11624
  JUMPTO(SLJIT_JUMP, loop);
11625
  flush_stubs(common);
11626
11627
  compile_backtrackingpath(common, backtrack->top);
11628
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11629
    return NULL;
11630
  set_jumps(backtrack->own_backtracks, LABEL());
11631
11632
  if (framesize < 0)
11633
    {
11634
    if (offset != 0)
11635
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11636
    else
11637
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11638
    }
11639
  else
11640
    {
11641
    if (offset != 0)
11642
      {
11643
      /* Last alternative. */
11644
      if (*cc == OP_KETRPOS)
11645
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11646
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11647
      }
11648
    else
11649
      {
11650
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11651
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11652
      }
11653
    }
11654
11655
  if (*cc == OP_KETRPOS)
11656
    break;
11657
  ccbegin = cc + 1 + LINK_SIZE;
11658
  }
11659
11660
/* We don't have to restore the control head in case of a failed match. */
11661
11662
backtrack->own_backtracks = NULL;
11663
if (!zero)
11664
  {
11665
  if (framesize < 0)
11666
    add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11667
  else /* TMP2 is set to [private_data_ptr] above. */
11668
    add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11669
  }
11670
11671
/* None of them matched. */
11672
set_jumps(emptymatch, LABEL());
11673
count_match(common);
11674
return cc + 1 + LINK_SIZE;
11675
}
11676
11677
static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11678
{
11679
int class_len;
11680
11681
*opcode = *cc;
11682
*exact = 0;
11683
11684
if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11685
  {
11686
  cc++;
11687
  *type = OP_CHAR;
11688
  }
11689
else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11690
  {
11691
  cc++;
11692
  *type = OP_CHARI;
11693
  *opcode -= OP_STARI - OP_STAR;
11694
  }
11695
else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11696
  {
11697
  cc++;
11698
  *type = OP_NOT;
11699
  *opcode -= OP_NOTSTAR - OP_STAR;
11700
  }
11701
else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11702
  {
11703
  cc++;
11704
  *type = OP_NOTI;
11705
  *opcode -= OP_NOTSTARI - OP_STAR;
11706
  }
11707
else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11708
  {
11709
  cc++;
11710
  *opcode -= OP_TYPESTAR - OP_STAR;
11711
  *type = OP_END;
11712
  }
11713
else
11714
  {
11715
  SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11716
  *type = *opcode;
11717
  cc++;
11718
  class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11719
  *opcode = cc[class_len - 1];
11720
11721
  if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11722
    {
11723
    *opcode -= OP_CRSTAR - OP_STAR;
11724
    *end = cc + class_len;
11725
11726
    if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11727
      {
11728
      *exact = 1;
11729
      *opcode -= OP_PLUS - OP_STAR;
11730
      }
11731
    }
11732
  else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11733
    {
11734
    *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11735
    *end = cc + class_len;
11736
11737
    if (*opcode == OP_POSPLUS)
11738
      {
11739
      *exact = 1;
11740
      *opcode = OP_POSSTAR;
11741
      }
11742
    }
11743
  else
11744
    {
11745
    SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11746
    *max = GET2(cc, (class_len + IMM2_SIZE));
11747
    *exact = GET2(cc, class_len);
11748
11749
    if (*max == 0)
11750
      {
11751
      if (*opcode == OP_CRPOSRANGE)
11752
        *opcode = OP_POSSTAR;
11753
      else
11754
        *opcode -= OP_CRRANGE - OP_STAR;
11755
      }
11756
    else
11757
      {
11758
      *max -= *exact;
11759
      if (*max == 0)
11760
        *opcode = OP_EXACT;
11761
      else if (*max == 1)
11762
        {
11763
        if (*opcode == OP_CRPOSRANGE)
11764
          *opcode = OP_POSQUERY;
11765
        else
11766
          *opcode -= OP_CRRANGE - OP_QUERY;
11767
        }
11768
      else
11769
        {
11770
        if (*opcode == OP_CRPOSRANGE)
11771
          *opcode = OP_POSUPTO;
11772
        else
11773
          *opcode -= OP_CRRANGE - OP_UPTO;
11774
        }
11775
      }
11776
    *end = cc + class_len + 2 * IMM2_SIZE;
11777
    }
11778
  return cc;
11779
  }
11780
11781
switch(*opcode)
11782
  {
11783
  case OP_EXACT:
11784
  *exact = GET2(cc, 0);
11785
  cc += IMM2_SIZE;
11786
  break;
11787
11788
  case OP_PLUS:
11789
  case OP_MINPLUS:
11790
  *exact = 1;
11791
  *opcode -= OP_PLUS - OP_STAR;
11792
  break;
11793
11794
  case OP_POSPLUS:
11795
  *exact = 1;
11796
  *opcode = OP_POSSTAR;
11797
  break;
11798
11799
  case OP_UPTO:
11800
  case OP_MINUPTO:
11801
  case OP_POSUPTO:
11802
  *max = GET2(cc, 0);
11803
  cc += IMM2_SIZE;
11804
  break;
11805
  }
11806
11807
if (*type == OP_END)
11808
  {
11809
  *type = *cc;
11810
  *end = next_opcode(common, cc);
11811
  cc++;
11812
  return cc;
11813
  }
11814
11815
*end = cc + 1;
11816
#ifdef SUPPORT_UNICODE
11817
if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11818
#endif
11819
return cc;
11820
}
11821
11822
static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11823
{
11824
DEFINE_COMPILER;
11825
backtrack_common *backtrack;
11826
PCRE2_UCHAR opcode;
11827
PCRE2_UCHAR type;
11828
sljit_u32 max = 0, exact;
11829
sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11830
sljit_s32 early_fail_type;
11831
BOOL charpos_enabled;
11832
PCRE2_UCHAR charpos_char;
11833
unsigned int charpos_othercasebit;
11834
PCRE2_SPTR end;
11835
jump_list *no_match = NULL;
11836
jump_list *no_char1_match = NULL;
11837
struct sljit_jump *jump = NULL;
11838
struct sljit_label *label;
11839
int private_data_ptr = PRIVATE_DATA(cc);
11840
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11841
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11842
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
11843
int tmp_base, tmp_offset;
11844
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11845
BOOL use_tmp;
11846
#endif
11847
11848
PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11849
11850
early_fail_type = (early_fail_ptr & 0x7);
11851
early_fail_ptr >>= 3;
11852
11853
/* During recursion, these optimizations are disabled. */
11854
if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11855
  {
11856
  early_fail_ptr = 0;
11857
  early_fail_type = type_skip;
11858
  }
11859
11860
SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11861
  || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11862
11863
if (early_fail_type == type_fail)
11864
  add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11865
11866
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11867
11868
if (type != OP_EXTUNI)
11869
  {
11870
  tmp_base = TMP3;
11871
  tmp_offset = 0;
11872
  }
11873
else
11874
  {
11875
  tmp_base = SLJIT_MEM1(SLJIT_SP);
11876
  tmp_offset = POSSESSIVE0;
11877
  }
11878
11879
/* Handle fixed part first. */
11880
if (exact > 1)
11881
  {
11882
  SLJIT_ASSERT(early_fail_ptr == 0);
11883
11884
  if (common->mode == PCRE2_JIT_COMPLETE
11885
#ifdef SUPPORT_UNICODE
11886
      && !common->utf
11887
#endif
11888
      && type != OP_ANYNL && type != OP_EXTUNI)
11889
    {
11890
    OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11891
    add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11892
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11893
    label = LABEL();
11894
    compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
11895
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11896
    JUMPTO(SLJIT_NOT_ZERO, label);
11897
    }
11898
  else
11899
    {
11900
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11901
    label = LABEL();
11902
    compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11903
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11904
    JUMPTO(SLJIT_NOT_ZERO, label);
11905
    }
11906
  }
11907
else if (exact == 1)
11908
  compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11909
11910
if (early_fail_type == type_fail_range)
11911
  {
11912
  /* Range end first, followed by range start. */
11913
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11914
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
11915
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11916
  OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11917
  add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11918
11919
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11920
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
11921
  }
11922
11923
switch(opcode)
11924
  {
11925
  case OP_STAR:
11926
  case OP_UPTO:
11927
  SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11928
11929
  if (type == OP_ANYNL || type == OP_EXTUNI)
11930
    {
11931
    SLJIT_ASSERT(private_data_ptr == 0);
11932
    SLJIT_ASSERT(early_fail_ptr == 0);
11933
11934
    allocate_stack(common, 2);
11935
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11936
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11937
11938
    if (opcode == OP_UPTO)
11939
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11940
11941
    label = LABEL();
11942
    compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11943
    if (opcode == OP_UPTO)
11944
      {
11945
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11946
      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11947
      jump = JUMP(SLJIT_ZERO);
11948
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11949
      }
11950
11951
    /* We cannot use TMP3 because of allocate_stack. */
11952
    allocate_stack(common, 1);
11953
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11954
    JUMPTO(SLJIT_JUMP, label);
11955
    if (jump != NULL)
11956
      JUMPHERE(jump);
11957
    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11958
    break;
11959
    }
11960
#ifdef SUPPORT_UNICODE
11961
  else if (type == OP_ALLANY && !common->invalid_utf)
11962
#else
11963
  else if (type == OP_ALLANY)
11964
#endif
11965
    {
11966
    if (opcode == OP_STAR)
11967
      {
11968
      if (private_data_ptr == 0)
11969
        allocate_stack(common, 2);
11970
11971
      OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11972
      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11973
11974
      OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11975
      process_partial_match(common);
11976
11977
      if (early_fail_ptr != 0)
11978
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11979
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11980
      break;
11981
      }
11982
#ifdef SUPPORT_UNICODE
11983
    else if (!common->utf)
11984
#else
11985
    else
11986
#endif
11987
      {
11988
      if (private_data_ptr == 0)
11989
        allocate_stack(common, 2);
11990
11991
      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11992
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11993
11994
      if (common->mode == PCRE2_JIT_COMPLETE)
11995
        {
11996
        OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11997
        SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
11998
        }
11999
      else
12000
        {
12001
        jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
12002
        process_partial_match(common);
12003
        JUMPHERE(jump);
12004
        }
12005
12006
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12007
12008
      if (early_fail_ptr != 0)
12009
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12010
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12011
      break;
12012
      }
12013
    }
12014
12015
  charpos_enabled = FALSE;
12016
  charpos_char = 0;
12017
  charpos_othercasebit = 0;
12018
12019
  if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
12020
    {
12021
#ifdef SUPPORT_UNICODE
12022
    charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
12023
#else
12024
    charpos_enabled = TRUE;
12025
#endif
12026
    if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
12027
      {
12028
      charpos_othercasebit = char_get_othercase_bit(common, end + 1);
12029
      if (charpos_othercasebit == 0)
12030
        charpos_enabled = FALSE;
12031
      }
12032
12033
    if (charpos_enabled)
12034
      {
12035
      charpos_char = end[1];
12036
      /* Consume the OP_CHAR opcode. */
12037
      end += 2;
12038
#if PCRE2_CODE_UNIT_WIDTH == 8
12039
      SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
12040
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12041
      SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
12042
      if ((charpos_othercasebit & 0x100) != 0)
12043
        charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
12044
#endif
12045
      if (charpos_othercasebit != 0)
12046
        charpos_char |= charpos_othercasebit;
12047
12048
      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
12049
      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
12050
      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
12051
      }
12052
    }
12053
12054
  if (charpos_enabled)
12055
    {
12056
    if (opcode == OP_UPTO)
12057
      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
12058
12059
    /* Search the first instance of charpos_char. */
12060
    jump = JUMP(SLJIT_JUMP);
12061
    label = LABEL();
12062
    if (opcode == OP_UPTO)
12063
      {
12064
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12065
      add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_ZERO));
12066
      }
12067
    compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
12068
    if (early_fail_ptr != 0)
12069
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12070
    JUMPHERE(jump);
12071
12072
    detect_partial_match(common, &backtrack->own_backtracks);
12073
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12074
    if (charpos_othercasebit != 0)
12075
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12076
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12077
12078
    if (private_data_ptr == 0)
12079
      allocate_stack(common, 2);
12080
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12081
    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12082
12083
    if (opcode == OP_UPTO)
12084
      {
12085
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12086
      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12087
      }
12088
12089
    /* Search the last instance of charpos_char. */
12090
    label = LABEL();
12091
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12092
    if (early_fail_ptr != 0)
12093
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12094
    detect_partial_match(common, &no_match);
12095
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12096
    if (charpos_othercasebit != 0)
12097
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12098
12099
    if (opcode == OP_STAR)
12100
      {
12101
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12102
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12103
      JUMPTO(SLJIT_JUMP, label);
12104
      }
12105
    else
12106
      {
12107
      jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
12108
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12109
      JUMPHERE(jump);
12110
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12111
      JUMPTO(SLJIT_NOT_ZERO, label);
12112
      }
12113
12114
    set_jumps(no_match, LABEL());
12115
    OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
12116
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12117
    }
12118
  else
12119
    {
12120
    if (private_data_ptr == 0)
12121
      allocate_stack(common, 2);
12122
12123
    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12124
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12125
    use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
12126
    SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
12127
12128
    if (common->utf)
12129
      OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12130
#endif
12131
    if (opcode == OP_UPTO)
12132
      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12133
12134
    detect_partial_match(common, &no_match);
12135
    label = LABEL();
12136
    compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12137
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12138
    if (common->utf)
12139
      OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12140
#endif
12141
12142
    if (opcode == OP_UPTO)
12143
      {
12144
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12145
      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12146
      }
12147
12148
    detect_partial_match_to(common, label);
12149
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12150
12151
    set_jumps(no_char1_match, LABEL());
12152
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12153
    if (common->utf)
12154
      {
12155
      set_jumps(no_match, LABEL());
12156
      if (use_tmp)
12157
        {
12158
        OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
12159
        OP1(SLJIT_MOV, base, offset0, TMP3, 0);
12160
        }
12161
      else
12162
        OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12163
      }
12164
    else
12165
#endif
12166
      {
12167
      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12168
      set_jumps(no_match, LABEL());
12169
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12170
      }
12171
12172
    if (early_fail_ptr != 0)
12173
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12174
    }
12175
12176
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12177
  break;
12178
12179
  case OP_MINSTAR:
12180
  if (private_data_ptr == 0)
12181
    allocate_stack(common, 1);
12182
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12183
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12184
  if (early_fail_ptr != 0)
12185
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12186
  break;
12187
12188
  case OP_MINUPTO:
12189
  SLJIT_ASSERT(early_fail_ptr == 0);
12190
  if (private_data_ptr == 0)
12191
    allocate_stack(common, 2);
12192
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12193
  OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
12194
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12195
  break;
12196
12197
  case OP_QUERY:
12198
  case OP_MINQUERY:
12199
  SLJIT_ASSERT(early_fail_ptr == 0);
12200
  if (private_data_ptr == 0)
12201
    allocate_stack(common, 1);
12202
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12203
  if (opcode == OP_QUERY)
12204
    compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
12205
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12206
  break;
12207
12208
  case OP_EXACT:
12209
  break;
12210
12211
  case OP_POSSTAR:
12212
#if defined SUPPORT_UNICODE
12213
  if (type == OP_ALLANY && !common->invalid_utf)
12214
#else
12215
  if (type == OP_ALLANY)
12216
#endif
12217
    {
12218
    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
12219
    process_partial_match(common);
12220
    if (early_fail_ptr != 0)
12221
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
12222
    break;
12223
    }
12224
12225
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12226
  if (type == OP_EXTUNI || common->utf)
12227
    {
12228
    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12229
    detect_partial_match(common, &no_match);
12230
    label = LABEL();
12231
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12232
    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12233
    detect_partial_match_to(common, label);
12234
12235
    set_jumps(no_match, LABEL());
12236
    OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12237
    if (early_fail_ptr != 0)
12238
      {
12239
      if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
12240
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
12241
      else
12242
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12243
      }
12244
    break;
12245
    }
12246
#endif
12247
12248
  detect_partial_match(common, &no_match);
12249
  label = LABEL();
12250
  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12251
  detect_partial_match_to(common, label);
12252
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12253
12254
  set_jumps(no_char1_match, LABEL());
12255
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12256
  set_jumps(no_match, LABEL());
12257
  if (early_fail_ptr != 0)
12258
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12259
  break;
12260
12261
  case OP_POSUPTO:
12262
  SLJIT_ASSERT(early_fail_ptr == 0);
12263
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12264
  if (common->utf)
12265
    {
12266
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12267
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12268
12269
    detect_partial_match(common, &no_match);
12270
    label = LABEL();
12271
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12272
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12273
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12274
    add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12275
    detect_partial_match_to(common, label);
12276
12277
    set_jumps(no_match, LABEL());
12278
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
12279
    break;
12280
    }
12281
#endif
12282
12283
  if (type == OP_ALLANY)
12284
    {
12285
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
12286
12287
    if (common->mode == PCRE2_JIT_COMPLETE)
12288
      {
12289
      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
12290
      SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
12291
      }
12292
    else
12293
      {
12294
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
12295
      process_partial_match(common);
12296
      JUMPHERE(jump);
12297
      }
12298
    break;
12299
    }
12300
12301
  OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12302
12303
  detect_partial_match(common, &no_match);
12304
  label = LABEL();
12305
  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12306
  OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12307
  add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12308
  detect_partial_match_to(common, label);
12309
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12310
12311
  set_jumps(no_char1_match, LABEL());
12312
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12313
  set_jumps(no_match, LABEL());
12314
  break;
12315
12316
  case OP_POSQUERY:
12317
  SLJIT_ASSERT(early_fail_ptr == 0);
12318
  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12319
  compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
12320
  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12321
  set_jumps(no_match, LABEL());
12322
  OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12323
  break;
12324
12325
  default:
12326
  SLJIT_UNREACHABLE();
12327
  break;
12328
  }
12329
12330
count_match(common);
12331
return end;
12332
}
12333
12334
static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12335
{
12336
DEFINE_COMPILER;
12337
backtrack_common *backtrack;
12338
12339
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12340
12341
if (*cc == OP_FAIL)
12342
  {
12343
  add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12344
  return cc + 1;
12345
  }
12346
12347
if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
12348
  add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
12349
12350
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
12351
  {
12352
  /* No need to check notempty conditions. */
12353
  if (common->accept_label == NULL)
12354
    add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
12355
  else
12356
    JUMPTO(SLJIT_JUMP, common->accept_label);
12357
  return cc + 1;
12358
  }
12359
12360
if (common->accept_label == NULL)
12361
  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
12362
else
12363
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
12364
12365
if (HAS_VIRTUAL_REGISTERS)
12366
  {
12367
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12368
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
12369
  }
12370
else
12371
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
12372
12373
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
12374
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));
12375
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
12376
if (common->accept_label == NULL)
12377
  add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
12378
else
12379
  JUMPTO(SLJIT_ZERO, common->accept_label);
12380
12381
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
12382
if (common->accept_label == NULL)
12383
  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
12384
else
12385
  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
12386
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12387
return cc + 1;
12388
}
12389
12390
static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
12391
{
12392
DEFINE_COMPILER;
12393
int offset = GET2(cc, 1);
12394
BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
12395
12396
/* Data will be discarded anyway... */
12397
if (common->currententry != NULL)
12398
  return cc + 1 + IMM2_SIZE;
12399
12400
if (!optimized_cbracket)
12401
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
12402
offset <<= 1;
12403
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12404
if (!optimized_cbracket)
12405
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12406
return cc + 1 + IMM2_SIZE;
12407
}
12408
12409
static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12410
{
12411
DEFINE_COMPILER;
12412
backtrack_common *backtrack;
12413
PCRE2_UCHAR opcode = *cc;
12414
PCRE2_SPTR ccend = cc + 1;
12415
12416
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
12417
    opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
12418
  ccend += 2 + cc[1];
12419
12420
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12421
12422
if (opcode == OP_SKIP)
12423
  {
12424
  allocate_stack(common, 1);
12425
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12426
  return ccend;
12427
  }
12428
12429
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
12430
  {
12431
  if (HAS_VIRTUAL_REGISTERS)
12432
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12433
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12434
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12435
  OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12436
  }
12437
12438
return ccend;
12439
}
12440
12441
static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
12442
12443
static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12444
{
12445
DEFINE_COMPILER;
12446
backtrack_common *backtrack;
12447
BOOL needs_control_head;
12448
int size;
12449
12450
PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12451
common->then_trap = BACKTRACK_AS(then_trap_backtrack);
12452
BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12453
BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
12454
BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
12455
12456
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12457
size = 3 + (size < 0 ? 0 : size);
12458
12459
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12460
allocate_stack(common, size);
12461
if (size > 3)
12462
  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
12463
else
12464
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12465
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
12466
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
12467
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
12468
12469
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12470
if (size >= 0)
12471
  init_frame(common, cc, ccend, size - 1, 0);
12472
}
12473
12474
static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12475
{
12476
DEFINE_COMPILER;
12477
backtrack_common *backtrack;
12478
BOOL has_then_trap = FALSE;
12479
then_trap_backtrack *save_then_trap = NULL;
12480
12481
SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
12482
12483
if (common->has_then && common->then_offsets[cc - common->start] != 0)
12484
  {
12485
  SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
12486
  has_then_trap = TRUE;
12487
  save_then_trap = common->then_trap;
12488
  /* Tail item on backtrack. */
12489
  compile_then_trap_matchingpath(common, cc, ccend, parent);
12490
  }
12491
12492
while (cc < ccend)
12493
  {
12494
  switch(*cc)
12495
    {
12496
    case OP_SOD:
12497
    case OP_SOM:
12498
    case OP_NOT_WORD_BOUNDARY:
12499
    case OP_WORD_BOUNDARY:
12500
    case OP_EODN:
12501
    case OP_EOD:
12502
    case OP_DOLL:
12503
    case OP_DOLLM:
12504
    case OP_CIRC:
12505
    case OP_CIRCM:
12506
    case OP_NOT_UCP_WORD_BOUNDARY:
12507
    case OP_UCP_WORD_BOUNDARY:
12508
    cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12509
    break;
12510
12511
    case OP_NOT_DIGIT:
12512
    case OP_DIGIT:
12513
    case OP_NOT_WHITESPACE:
12514
    case OP_WHITESPACE:
12515
    case OP_NOT_WORDCHAR:
12516
    case OP_WORDCHAR:
12517
    case OP_ANY:
12518
    case OP_ALLANY:
12519
    case OP_ANYBYTE:
12520
    case OP_NOTPROP:
12521
    case OP_PROP:
12522
    case OP_ANYNL:
12523
    case OP_NOT_HSPACE:
12524
    case OP_HSPACE:
12525
    case OP_NOT_VSPACE:
12526
    case OP_VSPACE:
12527
    case OP_EXTUNI:
12528
    case OP_NOT:
12529
    case OP_NOTI:
12530
    cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12531
    break;
12532
12533
    case OP_SET_SOM:
12534
    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12535
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12536
    allocate_stack(common, 1);
12537
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12538
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12539
    cc++;
12540
    break;
12541
12542
    case OP_CHAR:
12543
    case OP_CHARI:
12544
    if (common->mode == PCRE2_JIT_COMPLETE)
12545
      cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12546
    else
12547
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12548
    break;
12549
12550
    case OP_STAR:
12551
    case OP_MINSTAR:
12552
    case OP_PLUS:
12553
    case OP_MINPLUS:
12554
    case OP_QUERY:
12555
    case OP_MINQUERY:
12556
    case OP_UPTO:
12557
    case OP_MINUPTO:
12558
    case OP_EXACT:
12559
    case OP_POSSTAR:
12560
    case OP_POSPLUS:
12561
    case OP_POSQUERY:
12562
    case OP_POSUPTO:
12563
    case OP_STARI:
12564
    case OP_MINSTARI:
12565
    case OP_PLUSI:
12566
    case OP_MINPLUSI:
12567
    case OP_QUERYI:
12568
    case OP_MINQUERYI:
12569
    case OP_UPTOI:
12570
    case OP_MINUPTOI:
12571
    case OP_EXACTI:
12572
    case OP_POSSTARI:
12573
    case OP_POSPLUSI:
12574
    case OP_POSQUERYI:
12575
    case OP_POSUPTOI:
12576
    case OP_NOTSTAR:
12577
    case OP_NOTMINSTAR:
12578
    case OP_NOTPLUS:
12579
    case OP_NOTMINPLUS:
12580
    case OP_NOTQUERY:
12581
    case OP_NOTMINQUERY:
12582
    case OP_NOTUPTO:
12583
    case OP_NOTMINUPTO:
12584
    case OP_NOTEXACT:
12585
    case OP_NOTPOSSTAR:
12586
    case OP_NOTPOSPLUS:
12587
    case OP_NOTPOSQUERY:
12588
    case OP_NOTPOSUPTO:
12589
    case OP_NOTSTARI:
12590
    case OP_NOTMINSTARI:
12591
    case OP_NOTPLUSI:
12592
    case OP_NOTMINPLUSI:
12593
    case OP_NOTQUERYI:
12594
    case OP_NOTMINQUERYI:
12595
    case OP_NOTUPTOI:
12596
    case OP_NOTMINUPTOI:
12597
    case OP_NOTEXACTI:
12598
    case OP_NOTPOSSTARI:
12599
    case OP_NOTPOSPLUSI:
12600
    case OP_NOTPOSQUERYI:
12601
    case OP_NOTPOSUPTOI:
12602
    case OP_TYPESTAR:
12603
    case OP_TYPEMINSTAR:
12604
    case OP_TYPEPLUS:
12605
    case OP_TYPEMINPLUS:
12606
    case OP_TYPEQUERY:
12607
    case OP_TYPEMINQUERY:
12608
    case OP_TYPEUPTO:
12609
    case OP_TYPEMINUPTO:
12610
    case OP_TYPEEXACT:
12611
    case OP_TYPEPOSSTAR:
12612
    case OP_TYPEPOSPLUS:
12613
    case OP_TYPEPOSQUERY:
12614
    case OP_TYPEPOSUPTO:
12615
    cc = compile_iterator_matchingpath(common, cc, parent);
12616
    break;
12617
12618
    case OP_CLASS:
12619
    case OP_NCLASS:
12620
    if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12621
      cc = compile_iterator_matchingpath(common, cc, parent);
12622
    else
12623
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12624
    break;
12625
12626
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12627
    case OP_XCLASS:
12628
    if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12629
      cc = compile_iterator_matchingpath(common, cc, parent);
12630
    else
12631
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12632
    break;
12633
#endif
12634
12635
    case OP_REF:
12636
    case OP_REFI:
12637
    if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12638
      cc = compile_ref_iterator_matchingpath(common, cc, parent);
12639
    else
12640
      {
12641
      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12642
      cc += 1 + IMM2_SIZE;
12643
      }
12644
    break;
12645
12646
    case OP_DNREF:
12647
    case OP_DNREFI:
12648
    if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12649
      cc = compile_ref_iterator_matchingpath(common, cc, parent);
12650
    else
12651
      {
12652
      compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12653
      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12654
      cc += 1 + 2 * IMM2_SIZE;
12655
      }
12656
    break;
12657
12658
    case OP_RECURSE:
12659
    cc = compile_recurse_matchingpath(common, cc, parent);
12660
    break;
12661
12662
    case OP_CALLOUT:
12663
    case OP_CALLOUT_STR:
12664
    cc = compile_callout_matchingpath(common, cc, parent);
12665
    break;
12666
12667
    case OP_ASSERT:
12668
    case OP_ASSERT_NOT:
12669
    case OP_ASSERTBACK:
12670
    case OP_ASSERTBACK_NOT:
12671
    PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12672
    cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12673
    break;
12674
12675
    case OP_BRAMINZERO:
12676
    PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12677
    cc = bracketend(cc + 1);
12678
    if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12679
      {
12680
      allocate_stack(common, 1);
12681
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12682
      }
12683
    else
12684
      {
12685
      allocate_stack(common, 2);
12686
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12687
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12688
      }
12689
    BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12690
    count_match(common);
12691
    break;
12692
12693
    case OP_ASSERT_NA:
12694
    case OP_ASSERTBACK_NA:
12695
    case OP_ONCE:
12696
    case OP_SCRIPT_RUN:
12697
    case OP_BRA:
12698
    case OP_CBRA:
12699
    case OP_COND:
12700
    case OP_SBRA:
12701
    case OP_SCBRA:
12702
    case OP_SCOND:
12703
    cc = compile_bracket_matchingpath(common, cc, parent);
12704
    break;
12705
12706
    case OP_BRAZERO:
12707
    if (cc[1] > OP_ASSERTBACK_NOT)
12708
      cc = compile_bracket_matchingpath(common, cc, parent);
12709
    else
12710
      {
12711
      PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12712
      cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12713
      }
12714
    break;
12715
12716
    case OP_BRAPOS:
12717
    case OP_CBRAPOS:
12718
    case OP_SBRAPOS:
12719
    case OP_SCBRAPOS:
12720
    case OP_BRAPOSZERO:
12721
    cc = compile_bracketpos_matchingpath(common, cc, parent);
12722
    break;
12723
12724
    case OP_MARK:
12725
    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12726
    SLJIT_ASSERT(common->mark_ptr != 0);
12727
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12728
    allocate_stack(common, common->has_skip_arg ? 5 : 1);
12729
    if (HAS_VIRTUAL_REGISTERS)
12730
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12731
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12732
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12733
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12734
    OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12735
    if (common->has_skip_arg)
12736
      {
12737
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12738
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12739
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12740
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12741
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12742
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12743
      }
12744
    cc += 1 + 2 + cc[1];
12745
    break;
12746
12747
    case OP_PRUNE:
12748
    case OP_PRUNE_ARG:
12749
    case OP_SKIP:
12750
    case OP_SKIP_ARG:
12751
    case OP_THEN:
12752
    case OP_THEN_ARG:
12753
    case OP_COMMIT:
12754
    case OP_COMMIT_ARG:
12755
    cc = compile_control_verb_matchingpath(common, cc, parent);
12756
    break;
12757
12758
    case OP_FAIL:
12759
    case OP_ACCEPT:
12760
    case OP_ASSERT_ACCEPT:
12761
    cc = compile_fail_accept_matchingpath(common, cc, parent);
12762
    break;
12763
12764
    case OP_CLOSE:
12765
    cc = compile_close_matchingpath(common, cc);
12766
    break;
12767
12768
    case OP_SKIPZERO:
12769
    cc = bracketend(cc + 1);
12770
    break;
12771
12772
    default:
12773
    SLJIT_UNREACHABLE();
12774
    return;
12775
    }
12776
  if (cc == NULL)
12777
    return;
12778
  }
12779
12780
if (has_then_trap)
12781
  {
12782
  /* Head item on backtrack. */
12783
  PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12784
  BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12785
  BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12786
  common->then_trap = save_then_trap;
12787
  }
12788
SLJIT_ASSERT(cc == ccend);
12789
}
12790
12791
#undef PUSH_BACKTRACK
12792
#undef PUSH_BACKTRACK_NOVALUE
12793
#undef BACKTRACK_AS
12794
12795
#define COMPILE_BACKTRACKINGPATH(current) \
12796
  do \
12797
    { \
12798
    compile_backtrackingpath(common, (current)); \
12799
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12800
      return; \
12801
    } \
12802
  while (0)
12803
12804
#define CURRENT_AS(type) ((type *)current)
12805
12806
static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12807
{
12808
DEFINE_COMPILER;
12809
PCRE2_SPTR cc = current->cc;
12810
PCRE2_UCHAR opcode;
12811
PCRE2_UCHAR type;
12812
sljit_u32 max = 0, exact;
12813
struct sljit_label *label = NULL;
12814
struct sljit_jump *jump = NULL;
12815
jump_list *jumplist = NULL;
12816
PCRE2_SPTR end;
12817
int private_data_ptr = PRIVATE_DATA(cc);
12818
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12819
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12820
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
12821
12822
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12823
12824
switch(opcode)
12825
  {
12826
  case OP_STAR:
12827
  case OP_UPTO:
12828
  if (type == OP_ANYNL || type == OP_EXTUNI)
12829
    {
12830
    SLJIT_ASSERT(private_data_ptr == 0);
12831
    set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12832
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12833
    free_stack(common, 1);
12834
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12835
    }
12836
  else
12837
    {
12838
    if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12839
      {
12840
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12841
      OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12842
      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12843
12844
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12845
      label = LABEL();
12846
      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12847
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12848
      if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12849
        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12850
      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12851
      move_back(common, NULL, TRUE);
12852
      CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12853
      }
12854
    else
12855
      {
12856
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12857
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12858
      move_back(common, NULL, TRUE);
12859
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12860
      JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12861
      }
12862
    JUMPHERE(jump);
12863
    if (private_data_ptr == 0)
12864
      free_stack(common, 2);
12865
    }
12866
  break;
12867
12868
  case OP_MINSTAR:
12869
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12870
  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12871
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12872
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12873
  set_jumps(jumplist, LABEL());
12874
  if (private_data_ptr == 0)
12875
    free_stack(common, 1);
12876
  break;
12877
12878
  case OP_MINUPTO:
12879
  OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12880
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12881
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12882
  add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12883
12884
  OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12885
  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12886
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12887
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12888
12889
  set_jumps(jumplist, LABEL());
12890
  if (private_data_ptr == 0)
12891
    free_stack(common, 2);
12892
  break;
12893
12894
  case OP_QUERY:
12895
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12896
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12897
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12898
  jump = JUMP(SLJIT_JUMP);
12899
  set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12900
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12901
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12902
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12903
  JUMPHERE(jump);
12904
  if (private_data_ptr == 0)
12905
    free_stack(common, 1);
12906
  break;
12907
12908
  case OP_MINQUERY:
12909
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12910
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12911
  jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12912
  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12913
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12914
  set_jumps(jumplist, LABEL());
12915
  JUMPHERE(jump);
12916
  if (private_data_ptr == 0)
12917
    free_stack(common, 1);
12918
  break;
12919
12920
  case OP_EXACT:
12921
  case OP_POSSTAR:
12922
  case OP_POSQUERY:
12923
  case OP_POSUPTO:
12924
  break;
12925
12926
  default:
12927
  SLJIT_UNREACHABLE();
12928
  break;
12929
  }
12930
12931
set_jumps(current->own_backtracks, LABEL());
12932
}
12933
12934
static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12935
{
12936
DEFINE_COMPILER;
12937
PCRE2_SPTR cc = current->cc;
12938
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12939
PCRE2_UCHAR type;
12940
12941
type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12942
12943
if ((type & 0x1) == 0)
12944
  {
12945
  /* Maximize case. */
12946
  set_jumps(current->own_backtracks, LABEL());
12947
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12948
  free_stack(common, 1);
12949
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12950
  return;
12951
  }
12952
12953
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12954
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12955
set_jumps(current->own_backtracks, LABEL());
12956
free_stack(common, ref ? 2 : 3);
12957
}
12958
12959
static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12960
{
12961
DEFINE_COMPILER;
12962
recurse_entry *entry;
12963
12964
if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12965
  {
12966
  entry = CURRENT_AS(recurse_backtrack)->entry;
12967
  if (entry->backtrack_label == NULL)
12968
    add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12969
  else
12970
    JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12971
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12972
  }
12973
else
12974
  compile_backtrackingpath(common, current->top);
12975
12976
set_jumps(current->own_backtracks, LABEL());
12977
}
12978
12979
static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12980
{
12981
DEFINE_COMPILER;
12982
PCRE2_SPTR cc = current->cc;
12983
PCRE2_UCHAR bra = OP_BRA;
12984
struct sljit_jump *brajump = NULL;
12985
12986
SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12987
if (*cc == OP_BRAZERO)
12988
  {
12989
  bra = *cc;
12990
  cc++;
12991
  }
12992
12993
if (bra == OP_BRAZERO)
12994
  {
12995
  SLJIT_ASSERT(current->own_backtracks == NULL);
12996
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12997
  }
12998
12999
if (CURRENT_AS(assert_backtrack)->framesize < 0)
13000
  {
13001
  set_jumps(current->own_backtracks, LABEL());
13002
13003
  if (bra == OP_BRAZERO)
13004
    {
13005
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13006
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
13007
    free_stack(common, 1);
13008
    }
13009
  return;
13010
  }
13011
13012
if (bra == OP_BRAZERO)
13013
  {
13014
  if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
13015
    {
13016
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13017
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
13018
    free_stack(common, 1);
13019
    return;
13020
    }
13021
  free_stack(common, 1);
13022
  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
13023
  }
13024
13025
if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
13026
  {
13027
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
13028
  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13029
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13030
  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
13031
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
13032
13033
  set_jumps(current->own_backtracks, LABEL());
13034
  }
13035
else
13036
  set_jumps(current->own_backtracks, LABEL());
13037
13038
if (bra == OP_BRAZERO)
13039
  {
13040
  /* We know there is enough place on the stack. */
13041
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
13042
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13043
  JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
13044
  JUMPHERE(brajump);
13045
  }
13046
}
13047
13048
static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13049
{
13050
DEFINE_COMPILER;
13051
int opcode, stacksize, alt_count, alt_max;
13052
int offset = 0;
13053
int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
13054
int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
13055
PCRE2_SPTR cc = current->cc;
13056
PCRE2_SPTR ccbegin;
13057
PCRE2_SPTR ccprev;
13058
PCRE2_UCHAR bra = OP_BRA;
13059
PCRE2_UCHAR ket;
13060
assert_backtrack *assert;
13061
BOOL has_alternatives;
13062
BOOL needs_control_head = FALSE;
13063
BOOL has_vreverse;
13064
struct sljit_jump *brazero = NULL;
13065
struct sljit_jump *next_alt = NULL;
13066
struct sljit_jump *once = NULL;
13067
struct sljit_jump *cond = NULL;
13068
struct sljit_label *rmin_label = NULL;
13069
struct sljit_label *exact_label = NULL;
13070
struct sljit_jump *mov_addr = NULL;
13071
13072
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
13073
  {
13074
  bra = *cc;
13075
  cc++;
13076
  }
13077
13078
opcode = *cc;
13079
ccbegin = bracketend(cc) - 1 - LINK_SIZE;
13080
ket = *ccbegin;
13081
if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
13082
  {
13083
  repeat_ptr = PRIVATE_DATA(ccbegin);
13084
  repeat_type = PRIVATE_DATA(ccbegin + 2);
13085
  repeat_count = PRIVATE_DATA(ccbegin + 3);
13086
  SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
13087
  if (repeat_type == OP_UPTO)
13088
    ket = OP_KETRMAX;
13089
  if (repeat_type == OP_MINUPTO)
13090
    ket = OP_KETRMIN;
13091
  }
13092
ccbegin = cc;
13093
cc += GET(cc, 1);
13094
has_alternatives = *cc == OP_ALT;
13095
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13096
  has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
13097
if (opcode == OP_CBRA || opcode == OP_SCBRA)
13098
  offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
13099
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
13100
  opcode = OP_SCOND;
13101
13102
alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
13103
13104
/* Decoding the needs_control_head in framesize. */
13105
if (opcode == OP_ONCE)
13106
  {
13107
  needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
13108
  CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
13109
  }
13110
13111
if (ket != OP_KET && repeat_type != 0)
13112
  {
13113
  /* TMP1 is used in OP_KETRMIN below. */
13114
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13115
  free_stack(common, 1);
13116
  if (repeat_type == OP_UPTO)
13117
    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
13118
  else
13119
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13120
  }
13121
13122
if (ket == OP_KETRMAX)
13123
  {
13124
  if (bra == OP_BRAZERO)
13125
    {
13126
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13127
    free_stack(common, 1);
13128
    brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13129
    }
13130
  }
13131
else if (ket == OP_KETRMIN)
13132
  {
13133
  if (bra != OP_BRAMINZERO)
13134
    {
13135
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13136
    if (repeat_type != 0)
13137
      {
13138
      /* TMP1 was set a few lines above. */
13139
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13140
      /* Drop STR_PTR for non-greedy plus quantifier. */
13141
      if (opcode != OP_ONCE)
13142
        free_stack(common, 1);
13143
      }
13144
    else if (opcode >= OP_SBRA || opcode == OP_ONCE)
13145
      {
13146
      /* Checking zero-length iteration. */
13147
      if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
13148
        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13149
      else
13150
        {
13151
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13152
        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13153
        }
13154
      /* Drop STR_PTR for non-greedy plus quantifier. */
13155
      if (opcode != OP_ONCE)
13156
        free_stack(common, 1);
13157
      }
13158
    else
13159
      JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13160
    }
13161
  rmin_label = LABEL();
13162
  if (repeat_type != 0)
13163
    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13164
  }
13165
else if (bra == OP_BRAZERO)
13166
  {
13167
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13168
  free_stack(common, 1);
13169
  brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13170
  }
13171
else if (repeat_type == OP_EXACT)
13172
  {
13173
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13174
  exact_label = LABEL();
13175
  }
13176
13177
if (offset != 0)
13178
  {
13179
  if (common->capture_last_ptr != 0)
13180
    {
13181
    SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
13182
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13183
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13184
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13185
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13186
    free_stack(common, 3);
13187
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
13188
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
13189
    }
13190
  else if (common->optimized_cbracket[offset >> 1] == 0)
13191
    {
13192
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13193
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13194
    free_stack(common, 2);
13195
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13196
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13197
    }
13198
  }
13199
13200
if (SLJIT_UNLIKELY(opcode == OP_ONCE))
13201
  {
13202
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13203
    {
13204
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13205
    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13206
    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
13207
    }
13208
  once = JUMP(SLJIT_JUMP);
13209
  }
13210
else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13211
  {
13212
  if (has_alternatives)
13213
    {
13214
    /* Always exactly one alternative. */
13215
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13216
    free_stack(common, 1);
13217
13218
    alt_max = 2;
13219
    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13220
    }
13221
  }
13222
else if (has_alternatives)
13223
  {
13224
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13225
  free_stack(common, 1);
13226
13227
  if (alt_max > 3)
13228
    {
13229
    sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13230
13231
    SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr);
13232
    sljit_set_label(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr, LABEL());
13233
    sljit_emit_op0(compiler, SLJIT_ENDBR);
13234
    }
13235
  else
13236
    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13237
  }
13238
13239
COMPILE_BACKTRACKINGPATH(current->top);
13240
if (current->own_backtracks)
13241
  set_jumps(current->own_backtracks, LABEL());
13242
13243
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13244
  {
13245
  /* Conditional block always has at most one alternative. */
13246
  if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
13247
    {
13248
    SLJIT_ASSERT(has_alternatives);
13249
    assert = CURRENT_AS(bracket_backtrack)->u.assert;
13250
    if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
13251
      {
13252
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13253
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13254
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13255
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13256
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13257
      }
13258
    cond = JUMP(SLJIT_JUMP);
13259
    set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
13260
    }
13261
  else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
13262
    {
13263
    SLJIT_ASSERT(has_alternatives);
13264
    cond = JUMP(SLJIT_JUMP);
13265
    set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
13266
    }
13267
  else
13268
    SLJIT_ASSERT(!has_alternatives);
13269
  }
13270
13271
if (has_alternatives)
13272
  {
13273
  alt_count = 1;
13274
  do
13275
    {
13276
    current->top = NULL;
13277
    current->own_backtracks = NULL;
13278
    current->simple_backtracks = NULL;
13279
    /* Conditional blocks always have an additional alternative, even if it is empty. */
13280
    if (*cc == OP_ALT)
13281
      {
13282
      ccprev = cc + 1 + LINK_SIZE;
13283
      cc += GET(cc, 1);
13284
13285
      has_vreverse = FALSE;
13286
      if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA)
13287
        {
13288
        SLJIT_ASSERT(private_data_ptr != 0);
13289
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13290
13291
        has_vreverse = (*ccprev == OP_VREVERSE);
13292
        if (*ccprev == OP_REVERSE || has_vreverse)
13293
          ccprev = compile_reverse_matchingpath(common, ccprev, current);
13294
        }
13295
      else if (opcode != OP_COND && opcode != OP_SCOND)
13296
        {
13297
        if (opcode != OP_ONCE)
13298
          {
13299
          if (private_data_ptr != 0)
13300
            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13301
          else
13302
            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13303
          }
13304
        else
13305
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
13306
        }
13307
13308
      compile_matchingpath(common, ccprev, cc, current);
13309
      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13310
        return;
13311
13312
      switch (opcode)
13313
        {
13314
        case OP_ASSERTBACK_NA:
13315
          if (has_vreverse)
13316
            {
13317
            SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));
13318
            add_jump(compiler, &current->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
13319
            }
13320
13321
          if (PRIVATE_DATA(ccbegin + 1))
13322
            OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13323
          break;
13324
        case OP_ASSERT_NA:
13325
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13326
          break;
13327
        case OP_SCRIPT_RUN:
13328
          match_script_run_common(common, private_data_ptr, current);
13329
          break;
13330
        }
13331
      }
13332
13333
    /* Instructions after the current alternative is successfully matched. */
13334
    /* There is a similar code in compile_bracket_matchingpath. */
13335
    if (opcode == OP_ONCE)
13336
      match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
13337
13338
    stacksize = 0;
13339
    if (repeat_type == OP_MINUPTO)
13340
      {
13341
      /* We need to preserve the counter. TMP2 will be used below. */
13342
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
13343
      stacksize++;
13344
      }
13345
    if (ket != OP_KET || bra != OP_BRA)
13346
      stacksize++;
13347
    if (offset != 0)
13348
      {
13349
      if (common->capture_last_ptr != 0)
13350
        stacksize++;
13351
      if (common->optimized_cbracket[offset >> 1] == 0)
13352
        stacksize += 2;
13353
      }
13354
    if (opcode != OP_ONCE)
13355
      stacksize++;
13356
13357
    if (stacksize > 0)
13358
      allocate_stack(common, stacksize);
13359
13360
    stacksize = 0;
13361
    if (repeat_type == OP_MINUPTO)
13362
      {
13363
      /* TMP2 was set above. */
13364
      OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
13365
      stacksize++;
13366
      }
13367
13368
    if (ket != OP_KET || bra != OP_BRA)
13369
      {
13370
      if (ket != OP_KET)
13371
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
13372
      else
13373
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
13374
      stacksize++;
13375
      }
13376
13377
    if (offset != 0)
13378
      stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
13379
13380
    if (opcode != OP_ONCE)
13381
      {
13382
      if (alt_max <= 3)
13383
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
13384
      else
13385
        mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
13386
      }
13387
13388
    if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
13389
      {
13390
      /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
13391
      SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
13392
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
13393
      }
13394
13395
    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
13396
13397
    if (opcode != OP_ONCE)
13398
      {
13399
      if (alt_max <= 3)
13400
        {
13401
        JUMPHERE(next_alt);
13402
        alt_count++;
13403
        if (alt_count < alt_max)
13404
          {
13405
          SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
13406
          next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13407
          }
13408
        }
13409
      else
13410
        {
13411
        sljit_set_label(mov_addr, LABEL());
13412
        sljit_emit_op0(compiler, SLJIT_ENDBR);
13413
        }
13414
      }
13415
13416
    COMPILE_BACKTRACKINGPATH(current->top);
13417
    if (current->own_backtracks)
13418
      set_jumps(current->own_backtracks, LABEL());
13419
    SLJIT_ASSERT(!current->simple_backtracks);
13420
    }
13421
  while (*cc == OP_ALT);
13422
13423
  if (cond != NULL)
13424
    {
13425
    SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
13426
    assert = CURRENT_AS(bracket_backtrack)->u.assert;
13427
    if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
13428
      {
13429
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13430
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13431
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13432
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13433
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13434
      }
13435
    JUMPHERE(cond);
13436
    }
13437
13438
  /* Free the STR_PTR. */
13439
  if (private_data_ptr == 0)
13440
    free_stack(common, 1);
13441
  }
13442
13443
if (offset != 0)
13444
  {
13445
  /* Using both tmp register is better for instruction scheduling. */
13446
  if (common->optimized_cbracket[offset >> 1] != 0)
13447
    {
13448
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13449
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13450
    free_stack(common, 2);
13451
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13452
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13453
    }
13454
  else
13455
    {
13456
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13457
    free_stack(common, 1);
13458
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13459
    }
13460
  }
13461
else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
13462
  {
13463
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13464
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13465
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13466
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13467
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
13468
  free_stack(common, 4);
13469
  }
13470
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
13471
  {
13472
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
13473
  free_stack(common, 1);
13474
  }
13475
else if (opcode == OP_ONCE)
13476
  {
13477
  cc = ccbegin + GET(ccbegin, 1);
13478
  stacksize = needs_control_head ? 1 : 0;
13479
13480
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13481
    {
13482
    /* Reset head and drop saved frame. */
13483
    stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
13484
    }
13485
  else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
13486
    {
13487
    /* The STR_PTR must be released. */
13488
    stacksize++;
13489
    }
13490
13491
  if (stacksize > 0)
13492
    free_stack(common, stacksize);
13493
13494
  JUMPHERE(once);
13495
  /* Restore previous private_data_ptr */
13496
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13497
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
13498
  else if (ket == OP_KETRMIN)
13499
    {
13500
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13501
    /* See the comment below. */
13502
    free_stack(common, 2);
13503
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13504
    }
13505
  }
13506
13507
if (repeat_type == OP_EXACT)
13508
  {
13509
  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13510
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13511
  CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
13512
  }
13513
else if (ket == OP_KETRMAX)
13514
  {
13515
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13516
  if (bra != OP_BRAZERO)
13517
    free_stack(common, 1);
13518
13519
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13520
  if (bra == OP_BRAZERO)
13521
    {
13522
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13523
    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13524
    JUMPHERE(brazero);
13525
    free_stack(common, 1);
13526
    }
13527
  }
13528
else if (ket == OP_KETRMIN)
13529
  {
13530
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13531
13532
  /* OP_ONCE removes everything in case of a backtrack, so we don't
13533
  need to explicitly release the STR_PTR. The extra release would
13534
  affect badly the free_stack(2) above. */
13535
  if (opcode != OP_ONCE)
13536
    free_stack(common, 1);
13537
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
13538
  if (opcode == OP_ONCE)
13539
    free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
13540
  else if (bra == OP_BRAMINZERO)
13541
    free_stack(common, 1);
13542
  }
13543
else if (bra == OP_BRAZERO)
13544
  {
13545
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13546
  JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13547
  JUMPHERE(brazero);
13548
  }
13549
}
13550
13551
static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13552
{
13553
DEFINE_COMPILER;
13554
int offset;
13555
struct sljit_jump *jump;
13556
PCRE2_SPTR cc;
13557
13558
/* No retry on backtrack, just drop everything. */
13559
if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
13560
  {
13561
  cc = current->cc;
13562
13563
  if (*cc == OP_BRAPOSZERO)
13564
    cc++;
13565
13566
  if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)
13567
    {
13568
    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
13569
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13570
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13571
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13572
    if (common->capture_last_ptr != 0)
13573
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13574
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13575
    if (common->capture_last_ptr != 0)
13576
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13577
    }
13578
  set_jumps(current->own_backtracks, LABEL());
13579
  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13580
  return;
13581
  }
13582
13583
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
13584
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13585
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
13586
13587
if (current->own_backtracks)
13588
  {
13589
  jump = JUMP(SLJIT_JUMP);
13590
  set_jumps(current->own_backtracks, LABEL());
13591
  /* Drop the stack frame. */
13592
  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13593
  JUMPHERE(jump);
13594
  }
13595
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13596
}
13597
13598
static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13599
{
13600
assert_backtrack backtrack;
13601
13602
current->top = NULL;
13603
current->own_backtracks = NULL;
13604
current->simple_backtracks = NULL;
13605
if (current->cc[1] > OP_ASSERTBACK_NOT)
13606
  {
13607
  /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13608
  compile_bracket_matchingpath(common, current->cc, current);
13609
  compile_bracket_backtrackingpath(common, current->top);
13610
  }
13611
else
13612
  {
13613
  memset(&backtrack, 0, sizeof(backtrack));
13614
  backtrack.common.cc = current->cc;
13615
  backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13616
  /* Manual call of compile_assert_matchingpath. */
13617
  compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13618
  }
13619
SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);
13620
}
13621
13622
static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13623
{
13624
DEFINE_COMPILER;
13625
PCRE2_UCHAR opcode = *current->cc;
13626
struct sljit_label *loop;
13627
struct sljit_jump *jump;
13628
13629
if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13630
  {
13631
  if (common->then_trap != NULL)
13632
    {
13633
    SLJIT_ASSERT(common->control_head_ptr != 0);
13634
13635
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13636
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13637
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13638
    jump = JUMP(SLJIT_JUMP);
13639
13640
    loop = LABEL();
13641
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13642
    JUMPHERE(jump);
13643
    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13644
    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13645
    add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13646
    return;
13647
    }
13648
  else if (!common->local_quit_available && common->in_positive_assertion)
13649
    {
13650
    add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13651
    return;
13652
    }
13653
  }
13654
13655
if (common->local_quit_available)
13656
  {
13657
  /* Abort match with a fail. */
13658
  if (common->quit_label == NULL)
13659
    add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13660
  else
13661
    JUMPTO(SLJIT_JUMP, common->quit_label);
13662
  return;
13663
  }
13664
13665
if (opcode == OP_SKIP_ARG)
13666
  {
13667
  SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13668
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13669
  OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13670
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
13671
13672
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13673
  add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13674
  return;
13675
  }
13676
13677
if (opcode == OP_SKIP)
13678
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13679
else
13680
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13681
add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13682
}
13683
13684
static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13685
{
13686
DEFINE_COMPILER;
13687
struct sljit_jump *jump;
13688
struct sljit_label *label;
13689
13690
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13691
jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));
13692
skip_valid_char(common);
13693
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
13694
JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);
13695
13696
label = LABEL();
13697
sljit_set_label(jump, label);
13698
set_jumps(current->own_backtracks, label);
13699
}
13700
13701
static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13702
{
13703
DEFINE_COMPILER;
13704
struct sljit_jump *jump;
13705
int size;
13706
13707
if (CURRENT_AS(then_trap_backtrack)->then_trap)
13708
  {
13709
  common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13710
  return;
13711
  }
13712
13713
size = CURRENT_AS(then_trap_backtrack)->framesize;
13714
size = 3 + (size < 0 ? 0 : size);
13715
13716
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13717
free_stack(common, size);
13718
jump = JUMP(SLJIT_JUMP);
13719
13720
set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13721
/* STACK_TOP is set by THEN. */
13722
if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13723
  {
13724
  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13725
  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13726
  }
13727
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13728
free_stack(common, 3);
13729
13730
JUMPHERE(jump);
13731
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13732
}
13733
13734
static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13735
{
13736
DEFINE_COMPILER;
13737
then_trap_backtrack *save_then_trap = common->then_trap;
13738
13739
while (current)
13740
  {
13741
  if (current->simple_backtracks != NULL)
13742
    set_jumps(current->simple_backtracks, LABEL());
13743
  switch(*current->cc)
13744
    {
13745
    case OP_SET_SOM:
13746
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13747
    free_stack(common, 1);
13748
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13749
    break;
13750
13751
    case OP_STAR:
13752
    case OP_MINSTAR:
13753
    case OP_PLUS:
13754
    case OP_MINPLUS:
13755
    case OP_QUERY:
13756
    case OP_MINQUERY:
13757
    case OP_UPTO:
13758
    case OP_MINUPTO:
13759
    case OP_EXACT:
13760
    case OP_POSSTAR:
13761
    case OP_POSPLUS:
13762
    case OP_POSQUERY:
13763
    case OP_POSUPTO:
13764
    case OP_STARI:
13765
    case OP_MINSTARI:
13766
    case OP_PLUSI:
13767
    case OP_MINPLUSI:
13768
    case OP_QUERYI:
13769
    case OP_MINQUERYI:
13770
    case OP_UPTOI:
13771
    case OP_MINUPTOI:
13772
    case OP_EXACTI:
13773
    case OP_POSSTARI:
13774
    case OP_POSPLUSI:
13775
    case OP_POSQUERYI:
13776
    case OP_POSUPTOI:
13777
    case OP_NOTSTAR:
13778
    case OP_NOTMINSTAR:
13779
    case OP_NOTPLUS:
13780
    case OP_NOTMINPLUS:
13781
    case OP_NOTQUERY:
13782
    case OP_NOTMINQUERY:
13783
    case OP_NOTUPTO:
13784
    case OP_NOTMINUPTO:
13785
    case OP_NOTEXACT:
13786
    case OP_NOTPOSSTAR:
13787
    case OP_NOTPOSPLUS:
13788
    case OP_NOTPOSQUERY:
13789
    case OP_NOTPOSUPTO:
13790
    case OP_NOTSTARI:
13791
    case OP_NOTMINSTARI:
13792
    case OP_NOTPLUSI:
13793
    case OP_NOTMINPLUSI:
13794
    case OP_NOTQUERYI:
13795
    case OP_NOTMINQUERYI:
13796
    case OP_NOTUPTOI:
13797
    case OP_NOTMINUPTOI:
13798
    case OP_NOTEXACTI:
13799
    case OP_NOTPOSSTARI:
13800
    case OP_NOTPOSPLUSI:
13801
    case OP_NOTPOSQUERYI:
13802
    case OP_NOTPOSUPTOI:
13803
    case OP_TYPESTAR:
13804
    case OP_TYPEMINSTAR:
13805
    case OP_TYPEPLUS:
13806
    case OP_TYPEMINPLUS:
13807
    case OP_TYPEQUERY:
13808
    case OP_TYPEMINQUERY:
13809
    case OP_TYPEUPTO:
13810
    case OP_TYPEMINUPTO:
13811
    case OP_TYPEEXACT:
13812
    case OP_TYPEPOSSTAR:
13813
    case OP_TYPEPOSPLUS:
13814
    case OP_TYPEPOSQUERY:
13815
    case OP_TYPEPOSUPTO:
13816
    case OP_CLASS:
13817
    case OP_NCLASS:
13818
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13819
    case OP_XCLASS:
13820
#endif
13821
    compile_iterator_backtrackingpath(common, current);
13822
    break;
13823
13824
    case OP_REF:
13825
    case OP_REFI:
13826
    case OP_DNREF:
13827
    case OP_DNREFI:
13828
    compile_ref_iterator_backtrackingpath(common, current);
13829
    break;
13830
13831
    case OP_RECURSE:
13832
    compile_recurse_backtrackingpath(common, current);
13833
    break;
13834
13835
    case OP_ASSERT:
13836
    case OP_ASSERT_NOT:
13837
    case OP_ASSERTBACK:
13838
    case OP_ASSERTBACK_NOT:
13839
    compile_assert_backtrackingpath(common, current);
13840
    break;
13841
13842
    case OP_ASSERT_NA:
13843
    case OP_ASSERTBACK_NA:
13844
    case OP_ONCE:
13845
    case OP_SCRIPT_RUN:
13846
    case OP_BRA:
13847
    case OP_CBRA:
13848
    case OP_COND:
13849
    case OP_SBRA:
13850
    case OP_SCBRA:
13851
    case OP_SCOND:
13852
    compile_bracket_backtrackingpath(common, current);
13853
    break;
13854
13855
    case OP_BRAZERO:
13856
    if (current->cc[1] > OP_ASSERTBACK_NOT)
13857
      compile_bracket_backtrackingpath(common, current);
13858
    else
13859
      compile_assert_backtrackingpath(common, current);
13860
    break;
13861
13862
    case OP_BRAPOS:
13863
    case OP_CBRAPOS:
13864
    case OP_SBRAPOS:
13865
    case OP_SCBRAPOS:
13866
    case OP_BRAPOSZERO:
13867
    compile_bracketpos_backtrackingpath(common, current);
13868
    break;
13869
13870
    case OP_BRAMINZERO:
13871
    compile_braminzero_backtrackingpath(common, current);
13872
    break;
13873
13874
    case OP_MARK:
13875
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13876
    if (common->has_skip_arg)
13877
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13878
    free_stack(common, common->has_skip_arg ? 5 : 1);
13879
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13880
    if (common->has_skip_arg)
13881
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13882
    break;
13883
13884
    case OP_THEN:
13885
    case OP_THEN_ARG:
13886
    case OP_PRUNE:
13887
    case OP_PRUNE_ARG:
13888
    case OP_SKIP:
13889
    case OP_SKIP_ARG:
13890
    compile_control_verb_backtrackingpath(common, current);
13891
    break;
13892
13893
    case OP_COMMIT:
13894
    case OP_COMMIT_ARG:
13895
    if (!common->local_quit_available)
13896
      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13897
    if (common->quit_label == NULL)
13898
      add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13899
    else
13900
      JUMPTO(SLJIT_JUMP, common->quit_label);
13901
    break;
13902
13903
    case OP_CALLOUT:
13904
    case OP_CALLOUT_STR:
13905
    case OP_FAIL:
13906
    case OP_ACCEPT:
13907
    case OP_ASSERT_ACCEPT:
13908
    set_jumps(current->own_backtracks, LABEL());
13909
    break;
13910
13911
    case OP_VREVERSE:
13912
    compile_vreverse_backtrackingpath(common, current);
13913
    break;
13914
13915
    case OP_THEN_TRAP:
13916
    /* A virtual opcode for then traps. */
13917
    compile_then_trap_backtrackingpath(common, current);
13918
    break;
13919
13920
    default:
13921
    SLJIT_UNREACHABLE();
13922
    break;
13923
    }
13924
  current = current->prev;
13925
  }
13926
common->then_trap = save_then_trap;
13927
}
13928
13929
static SLJIT_INLINE void compile_recurse(compiler_common *common)
13930
{
13931
DEFINE_COMPILER;
13932
PCRE2_SPTR cc = common->start + common->currententry->start;
13933
PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13934
PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13935
uint32_t recurse_flags = 0;
13936
int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13937
int alt_count, alt_max, local_size;
13938
backtrack_common altbacktrack;
13939
jump_list *match = NULL;
13940
struct sljit_jump *next_alt = NULL;
13941
struct sljit_jump *accept_exit = NULL;
13942
struct sljit_label *quit;
13943
struct sljit_jump *mov_addr = NULL;
13944
13945
/* Recurse captures then. */
13946
common->then_trap = NULL;
13947
13948
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13949
13950
alt_max = no_alternatives(cc);
13951
alt_count = 0;
13952
13953
/* Matching path. */
13954
SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13955
common->currententry->entry_label = LABEL();
13956
set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13957
13958
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);
13959
count_match(common);
13960
13961
local_size = (alt_max > 1) ? 2 : 1;
13962
13963
/* (Reversed) stack layout:
13964
   [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13965
13966
allocate_stack(common, private_data_size + local_size);
13967
/* Save return address. */
13968
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13969
13970
copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13971
13972
/* This variable is saved and restored all time when we enter or exit from a recursive context. */
13973
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13974
13975
if (recurse_flags & recurse_flag_control_head_found)
13976
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13977
13978
if (alt_max > 1)
13979
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13980
13981
memset(&altbacktrack, 0, sizeof(backtrack_common));
13982
common->quit_label = NULL;
13983
common->accept_label = NULL;
13984
common->quit = NULL;
13985
common->accept = NULL;
13986
altbacktrack.cc = ccbegin;
13987
cc += GET(cc, 1);
13988
while (1)
13989
  {
13990
  altbacktrack.top = NULL;
13991
  altbacktrack.own_backtracks = NULL;
13992
13993
  if (altbacktrack.cc != ccbegin)
13994
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13995
13996
  compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13997
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13998
    return;
13999
14000
  allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
14001
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14002
14003
  if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
14004
    {
14005
    if (alt_max > 3)
14006
      mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
14007
    else
14008
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
14009
    }
14010
14011
  add_jump(compiler, &match, JUMP(SLJIT_JUMP));
14012
14013
  if (alt_count == 0)
14014
    {
14015
    /* Backtracking path entry. */
14016
    SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
14017
    common->currententry->backtrack_label = LABEL();
14018
    set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
14019
14020
    sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);
14021
14022
    if (recurse_flags & recurse_flag_accept_found)
14023
      accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
14024
14025
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
14026
    /* Save return address. */
14027
    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
14028
14029
    copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
14030
14031
    if (alt_max > 1)
14032
      {
14033
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
14034
      free_stack(common, 2);
14035
14036
      if (alt_max > 3)
14037
        {
14038
        sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
14039
        sljit_set_label(mov_addr, LABEL());
14040
        sljit_emit_op0(compiler, SLJIT_ENDBR);
14041
        }
14042
      else
14043
        next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
14044
      }
14045
    else
14046
      free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
14047
    }
14048
  else if (alt_max > 3)
14049
    {
14050
    sljit_set_label(mov_addr, LABEL());
14051
    sljit_emit_op0(compiler, SLJIT_ENDBR);
14052
    }
14053
  else
14054
    {
14055
    JUMPHERE(next_alt);
14056
    if (alt_count + 1 < alt_max)
14057
      {
14058
      SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
14059
      next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
14060
      }
14061
    }
14062
14063
  alt_count++;
14064
14065
  compile_backtrackingpath(common, altbacktrack.top);
14066
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14067
    return;
14068
  set_jumps(altbacktrack.own_backtracks, LABEL());
14069
14070
  if (*cc != OP_ALT)
14071
    break;
14072
14073
  altbacktrack.cc = cc + 1 + LINK_SIZE;
14074
  cc += GET(cc, 1);
14075
  }
14076
14077
/* No alternative is matched. */
14078
14079
quit = LABEL();
14080
14081
copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
14082
14083
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14084
free_stack(common, private_data_size + local_size);
14085
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14086
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14087
14088
if (common->quit != NULL)
14089
  {
14090
  SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
14091
14092
  set_jumps(common->quit, LABEL());
14093
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14094
  copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14095
  JUMPTO(SLJIT_JUMP, quit);
14096
  }
14097
14098
if (recurse_flags & recurse_flag_accept_found)
14099
  {
14100
  JUMPHERE(accept_exit);
14101
  free_stack(common, 2);
14102
14103
  /* Save return address. */
14104
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
14105
14106
  copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14107
14108
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14109
  free_stack(common, private_data_size + local_size);
14110
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14111
  OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14112
  }
14113
14114
if (common->accept != NULL)
14115
  {
14116
  SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
14117
14118
  set_jumps(common->accept, LABEL());
14119
14120
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14121
  OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
14122
14123
  allocate_stack(common, 2);
14124
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
14125
  }
14126
14127
set_jumps(match, LABEL());
14128
14129
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
14130
14131
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
14132
14133
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
14134
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
14135
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14136
}
14137
14138
#undef COMPILE_BACKTRACKINGPATH
14139
#undef CURRENT_AS
14140
14141
#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
14142
  (PCRE2_JIT_INVALID_UTF)
14143
14144
static int jit_compile(pcre2_code *code, sljit_u32 mode)
14145
{
14146
pcre2_real_code *re = (pcre2_real_code *)code;
14147
struct sljit_compiler *compiler;
14148
backtrack_common rootbacktrack;
14149
compiler_common common_data;
14150
compiler_common *common = &common_data;
14151
const sljit_u8 *tables = re->tables;
14152
void *allocator_data = &re->memctl;
14153
int private_data_size;
14154
PCRE2_SPTR ccend;
14155
executable_functions *functions;
14156
void *executable_func;
14157
sljit_uw executable_size;
14158
sljit_uw total_length;
14159
struct sljit_label *mainloop_label = NULL;
14160
struct sljit_label *continue_match_label;
14161
struct sljit_label *empty_match_found_label = NULL;
14162
struct sljit_label *empty_match_backtrack_label = NULL;
14163
struct sljit_label *reset_match_label;
14164
struct sljit_label *quit_label;
14165
struct sljit_jump *jump;
14166
struct sljit_jump *minlength_check_failed = NULL;
14167
struct sljit_jump *empty_match = NULL;
14168
struct sljit_jump *end_anchor_failed = NULL;
14169
jump_list *reqcu_not_found = NULL;
14170
14171
SLJIT_ASSERT(tables);
14172
14173
#if HAS_VIRTUAL_REGISTERS == 1
14174
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);
14175
#elif HAS_VIRTUAL_REGISTERS == 0
14176
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);
14177
#else
14178
#error "Invalid value for HAS_VIRTUAL_REGISTERS"
14179
#endif
14180
14181
memset(&rootbacktrack, 0, sizeof(backtrack_common));
14182
memset(common, 0, sizeof(compiler_common));
14183
common->re = re;
14184
common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
14185
rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
14186
14187
#ifdef SUPPORT_UNICODE
14188
common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
14189
#endif /* SUPPORT_UNICODE */
14190
mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
14191
14192
common->start = rootbacktrack.cc;
14193
common->read_only_data_head = NULL;
14194
common->fcc = tables + fcc_offset;
14195
common->lcc = (sljit_sw)(tables + lcc_offset);
14196
common->mode = mode;
14197
common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
14198
common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
14199
common->nltype = NLTYPE_FIXED;
14200
switch(re->newline_convention)
14201
  {
14202
  case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
14203
  case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
14204
  case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
14205
  case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
14206
  case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
14207
  case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
14208
  default: return PCRE2_ERROR_INTERNAL;
14209
  }
14210
common->nlmax = READ_CHAR_MAX;
14211
common->nlmin = 0;
14212
if (re->bsr_convention == PCRE2_BSR_UNICODE)
14213
  common->bsr_nltype = NLTYPE_ANY;
14214
else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
14215
  common->bsr_nltype = NLTYPE_ANYCRLF;
14216
else
14217
  {
14218
#ifdef BSR_ANYCRLF
14219
  common->bsr_nltype = NLTYPE_ANYCRLF;
14220
#else
14221
  common->bsr_nltype = NLTYPE_ANY;
14222
#endif
14223
  }
14224
common->bsr_nlmax = READ_CHAR_MAX;
14225
common->bsr_nlmin = 0;
14226
common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
14227
common->ctypes = (sljit_sw)(tables + ctypes_offset);
14228
common->name_count = re->name_count;
14229
common->name_entry_size = re->name_entry_size;
14230
common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
14231
common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
14232
#ifdef SUPPORT_UNICODE
14233
/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
14234
common->utf = (re->overall_options & PCRE2_UTF) != 0;
14235
common->ucp = (re->overall_options & PCRE2_UCP) != 0;
14236
if (common->utf)
14237
  {
14238
  if (common->nltype == NLTYPE_ANY)
14239
    common->nlmax = 0x2029;
14240
  else if (common->nltype == NLTYPE_ANYCRLF)
14241
    common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14242
  else
14243
    {
14244
    /* We only care about the first newline character. */
14245
    common->nlmax = common->newline & 0xff;
14246
    }
14247
14248
  if (common->nltype == NLTYPE_FIXED)
14249
    common->nlmin = common->newline & 0xff;
14250
  else
14251
    common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14252
14253
  if (common->bsr_nltype == NLTYPE_ANY)
14254
    common->bsr_nlmax = 0x2029;
14255
  else
14256
    common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14257
  common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14258
  }
14259
else
14260
  common->invalid_utf = FALSE;
14261
#endif /* SUPPORT_UNICODE */
14262
ccend = bracketend(common->start);
14263
14264
/* Calculate the local space size on the stack. */
14265
common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
14266
common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
14267
if (!common->optimized_cbracket)
14268
  return PCRE2_ERROR_NOMEMORY;
14269
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
14270
memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14271
#else
14272
memset(common->optimized_cbracket, 1, re->top_bracket + 1);
14273
#endif
14274
14275
SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
14276
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
14277
common->capture_last_ptr = common->ovector_start;
14278
common->ovector_start += sizeof(sljit_sw);
14279
#endif
14280
if (!check_opcode_types(common, common->start, ccend))
14281
  {
14282
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14283
  return PCRE2_ERROR_NOMEMORY;
14284
  }
14285
14286
/* Checking flags and updating ovector_start. */
14287
if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14288
  {
14289
  common->req_char_ptr = common->ovector_start;
14290
  common->ovector_start += sizeof(sljit_sw);
14291
  }
14292
if (mode != PCRE2_JIT_COMPLETE)
14293
  {
14294
  common->start_used_ptr = common->ovector_start;
14295
  common->ovector_start += sizeof(sljit_sw);
14296
  if (mode == PCRE2_JIT_PARTIAL_SOFT)
14297
    {
14298
    common->hit_start = common->ovector_start;
14299
    common->ovector_start += sizeof(sljit_sw);
14300
    }
14301
  }
14302
if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
14303
  {
14304
  common->match_end_ptr = common->ovector_start;
14305
  common->ovector_start += sizeof(sljit_sw);
14306
  }
14307
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
14308
common->control_head_ptr = 1;
14309
#endif
14310
if (common->control_head_ptr != 0)
14311
  {
14312
  common->control_head_ptr = common->ovector_start;
14313
  common->ovector_start += sizeof(sljit_sw);
14314
  }
14315
if (common->has_set_som)
14316
  {
14317
  /* Saving the real start pointer is necessary. */
14318
  common->start_ptr = common->ovector_start;
14319
  common->ovector_start += sizeof(sljit_sw);
14320
  }
14321
14322
/* Aligning ovector to even number of sljit words. */
14323
if ((common->ovector_start & sizeof(sljit_sw)) != 0)
14324
  common->ovector_start += sizeof(sljit_sw);
14325
14326
if (common->start_ptr == 0)
14327
  common->start_ptr = OVECTOR(0);
14328
14329
/* Capturing brackets cannot be optimized if callouts are allowed. */
14330
if (common->capture_last_ptr != 0)
14331
  memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14332
14333
SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
14334
common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
14335
14336
total_length = ccend - common->start;
14337
common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
14338
if (!common->private_data_ptrs)
14339
  {
14340
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14341
  return PCRE2_ERROR_NOMEMORY;
14342
  }
14343
memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
14344
14345
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
14346
14347
if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
14348
  detect_early_fail(common, common->start, &private_data_size, 0, 0);
14349
14350
set_private_data_ptrs(common, &private_data_size, ccend);
14351
14352
SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
14353
14354
if (private_data_size > 65536)
14355
  {
14356
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
14357
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14358
  return PCRE2_ERROR_NOMEMORY;
14359
  }
14360
14361
if (common->has_then)
14362
  {
14363
  common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
14364
  memset(common->then_offsets, 0, total_length);
14365
  set_then_offsets(common, common->start, NULL);
14366
  }
14367
14368
compiler = sljit_create_compiler(allocator_data);
14369
if (!compiler)
14370
  {
14371
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14372
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
14373
  return PCRE2_ERROR_NOMEMORY;
14374
  }
14375
common->compiler = compiler;
14376
14377
/* Main pcre2_jit_exec entry. */
14378
SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
14379
sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, private_data_size);
14380
14381
/* Register init. */
14382
reset_ovector(common, (re->top_bracket + 1) * 2);
14383
if (common->req_char_ptr != 0)
14384
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
14385
14386
OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
14387
OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
14388
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14389
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
14390
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
14391
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
14392
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
14393
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
14394
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
14395
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
14396
14397
if (common->early_fail_start_ptr < common->early_fail_end_ptr)
14398
  reset_early_fail(common);
14399
14400
if (mode == PCRE2_JIT_PARTIAL_SOFT)
14401
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14402
if (common->mark_ptr != 0)
14403
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
14404
if (common->control_head_ptr != 0)
14405
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
14406
14407
/* Main part of the matching */
14408
if ((re->overall_options & PCRE2_ANCHORED) == 0)
14409
  {
14410
  mainloop_label = mainloop_entry(common);
14411
  continue_match_label = LABEL();
14412
  /* Forward search if possible. */
14413
  if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14414
    {
14415
    if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
14416
      ;
14417
    else if ((re->flags & PCRE2_FIRSTSET) != 0)
14418
      fast_forward_first_char(common);
14419
    else if ((re->flags & PCRE2_STARTLINE) != 0)
14420
      fast_forward_newline(common);
14421
    else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
14422
      fast_forward_start_bits(common);
14423
    }
14424
  }
14425
else
14426
  continue_match_label = LABEL();
14427
14428
if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14429
  {
14430
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14431
  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
14432
  minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
14433
  }
14434
if (common->req_char_ptr != 0)
14435
  reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
14436
14437
/* Store the current STR_PTR in OVECTOR(0). */
14438
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
14439
/* Copy the limit of allowed recursions. */
14440
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
14441
if (common->capture_last_ptr != 0)
14442
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
14443
if (common->fast_forward_bc_ptr != NULL)
14444
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
14445
14446
if (common->start_ptr != OVECTOR(0))
14447
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
14448
14449
/* Copy the beginning of the string. */
14450
if (mode == PCRE2_JIT_PARTIAL_SOFT)
14451
  {
14452
  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14453
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14454
  JUMPHERE(jump);
14455
  }
14456
else if (mode == PCRE2_JIT_PARTIAL_HARD)
14457
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14458
14459
compile_matchingpath(common, common->start, ccend, &rootbacktrack);
14460
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14461
  {
14462
  sljit_free_compiler(compiler);
14463
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14464
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
14465
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14466
  return PCRE2_ERROR_NOMEMORY;
14467
  }
14468
14469
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14470
  end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
14471
14472
if (common->might_be_empty)
14473
  {
14474
  empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
14475
  empty_match_found_label = LABEL();
14476
  }
14477
14478
common->accept_label = LABEL();
14479
if (common->accept != NULL)
14480
  set_jumps(common->accept, common->accept_label);
14481
14482
/* This means we have a match. Update the ovector. */
14483
copy_ovector(common, re->top_bracket + 1);
14484
common->quit_label = common->abort_label = LABEL();
14485
if (common->quit != NULL)
14486
  set_jumps(common->quit, common->quit_label);
14487
if (common->abort != NULL)
14488
  set_jumps(common->abort, common->abort_label);
14489
if (minlength_check_failed != NULL)
14490
  SET_LABEL(minlength_check_failed, common->abort_label);
14491
14492
sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
14493
sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
14494
14495
if (common->failed_match != NULL)
14496
  {
14497
  SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
14498
  set_jumps(common->failed_match, LABEL());
14499
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14500
  JUMPTO(SLJIT_JUMP, common->abort_label);
14501
  }
14502
14503
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14504
  JUMPHERE(end_anchor_failed);
14505
14506
if (mode != PCRE2_JIT_COMPLETE)
14507
  {
14508
  common->partialmatchlabel = LABEL();
14509
  set_jumps(common->partialmatch, common->partialmatchlabel);
14510
  return_with_partial_match(common, common->quit_label);
14511
  }
14512
14513
if (common->might_be_empty)
14514
  empty_match_backtrack_label = LABEL();
14515
compile_backtrackingpath(common, rootbacktrack.top);
14516
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14517
  {
14518
  sljit_free_compiler(compiler);
14519
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14520
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
14521
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14522
  return PCRE2_ERROR_NOMEMORY;
14523
  }
14524
14525
SLJIT_ASSERT(rootbacktrack.prev == NULL);
14526
reset_match_label = LABEL();
14527
14528
if (mode == PCRE2_JIT_PARTIAL_SOFT)
14529
  {
14530
  /* Update hit_start only in the first time. */
14531
  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
14532
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14533
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
14534
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
14535
  JUMPHERE(jump);
14536
  }
14537
14538
/* Check we have remaining characters. */
14539
if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
14540
  {
14541
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
14542
  }
14543
14544
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
14545
    (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
14546
14547
if ((re->overall_options & PCRE2_ANCHORED) == 0)
14548
  {
14549
  if (common->ff_newline_shortcut != NULL)
14550
    {
14551
    /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
14552
    if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
14553
      {
14554
      if (common->match_end_ptr != 0)
14555
        {
14556
        OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
14557
        OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
14558
        CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
14559
        OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
14560
        }
14561
      else
14562
        CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
14563
      }
14564
    }
14565
  else
14566
    CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
14567
  }
14568
14569
/* No more remaining characters. */
14570
if (reqcu_not_found != NULL)
14571
  set_jumps(reqcu_not_found, LABEL());
14572
14573
if (mode == PCRE2_JIT_PARTIAL_SOFT)
14574
  CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
14575
14576
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14577
JUMPTO(SLJIT_JUMP, common->quit_label);
14578
14579
flush_stubs(common);
14580
14581
if (common->might_be_empty)
14582
  {
14583
  JUMPHERE(empty_match);
14584
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
14585
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
14586
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
14587
  JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
14588
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
14589
  JUMPTO(SLJIT_ZERO, empty_match_found_label);
14590
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14591
  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
14592
  JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
14593
  }
14594
14595
common->fast_forward_bc_ptr = NULL;
14596
common->early_fail_start_ptr = 0;
14597
common->early_fail_end_ptr = 0;
14598
common->currententry = common->entries;
14599
common->local_quit_available = TRUE;
14600
quit_label = common->quit_label;
14601
if (common->currententry != NULL)
14602
  {
14603
  /* A free bit for each private data. */
14604
  common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
14605
  SLJIT_ASSERT(common->recurse_bitset_size > 0);
14606
  common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
14607
14608
  if (common->recurse_bitset != NULL)
14609
    {
14610
    do
14611
      {
14612
      /* Might add new entries. */
14613
      compile_recurse(common);
14614
      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14615
        break;
14616
      flush_stubs(common);
14617
      common->currententry = common->currententry->next;
14618
      }
14619
    while (common->currententry != NULL);
14620
14621
    SLJIT_FREE(common->recurse_bitset, allocator_data);
14622
    }
14623
14624
  if (common->currententry != NULL)
14625
    {
14626
    /* The common->recurse_bitset has been freed. */
14627
    SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14628
14629
    sljit_free_compiler(compiler);
14630
    SLJIT_FREE(common->optimized_cbracket, allocator_data);
14631
    SLJIT_FREE(common->private_data_ptrs, allocator_data);
14632
    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14633
    return PCRE2_ERROR_NOMEMORY;
14634
    }
14635
  }
14636
common->local_quit_available = FALSE;
14637
common->quit_label = quit_label;
14638
14639
/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14640
/* This is a (really) rare case. */
14641
set_jumps(common->stackalloc, LABEL());
14642
/* RETURN_ADDR is not a saved register. */
14643
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14644
14645
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14646
14647
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14648
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14649
OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14650
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14651
OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14652
14653
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
14654
14655
jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14656
OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14657
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14658
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14659
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14660
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14661
14662
/* Allocation failed. */
14663
JUMPHERE(jump);
14664
/* We break the return address cache here, but this is a really rare case. */
14665
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14666
JUMPTO(SLJIT_JUMP, common->quit_label);
14667
14668
/* Call limit reached. */
14669
set_jumps(common->calllimit, LABEL());
14670
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14671
JUMPTO(SLJIT_JUMP, common->quit_label);
14672
14673
if (common->revertframes != NULL)
14674
  {
14675
  set_jumps(common->revertframes, LABEL());
14676
  do_revertframes(common);
14677
  }
14678
if (common->wordboundary != NULL)
14679
  {
14680
  set_jumps(common->wordboundary, LABEL());
14681
  check_wordboundary(common, FALSE);
14682
  }
14683
if (common->ucp_wordboundary != NULL)
14684
  {
14685
  set_jumps(common->ucp_wordboundary, LABEL());
14686
  check_wordboundary(common, TRUE);
14687
  }
14688
if (common->anynewline != NULL)
14689
  {
14690
  set_jumps(common->anynewline, LABEL());
14691
  check_anynewline(common);
14692
  }
14693
if (common->hspace != NULL)
14694
  {
14695
  set_jumps(common->hspace, LABEL());
14696
  check_hspace(common);
14697
  }
14698
if (common->vspace != NULL)
14699
  {
14700
  set_jumps(common->vspace, LABEL());
14701
  check_vspace(common);
14702
  }
14703
if (common->casefulcmp != NULL)
14704
  {
14705
  set_jumps(common->casefulcmp, LABEL());
14706
  do_casefulcmp(common);
14707
  }
14708
if (common->caselesscmp != NULL)
14709
  {
14710
  set_jumps(common->caselesscmp, LABEL());
14711
  do_caselesscmp(common);
14712
  }
14713
if (common->reset_match != NULL || common->restart_match != NULL)
14714
  {
14715
  if (common->restart_match != NULL)
14716
    {
14717
    set_jumps(common->restart_match, LABEL());
14718
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14719
    }
14720
14721
  set_jumps(common->reset_match, LABEL());
14722
  do_reset_match(common, (re->top_bracket + 1) * 2);
14723
  /* The value of restart_match is in TMP1. */
14724
  CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14725
  OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14726
  JUMPTO(SLJIT_JUMP, reset_match_label);
14727
  }
14728
#ifdef SUPPORT_UNICODE
14729
#if PCRE2_CODE_UNIT_WIDTH == 8
14730
if (common->utfreadchar != NULL)
14731
  {
14732
  set_jumps(common->utfreadchar, LABEL());
14733
  do_utfreadchar(common);
14734
  }
14735
if (common->utfreadtype8 != NULL)
14736
  {
14737
  set_jumps(common->utfreadtype8, LABEL());
14738
  do_utfreadtype8(common);
14739
  }
14740
if (common->utfpeakcharback != NULL)
14741
  {
14742
  set_jumps(common->utfpeakcharback, LABEL());
14743
  do_utfpeakcharback(common);
14744
  }
14745
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14746
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14747
if (common->utfreadchar_invalid != NULL)
14748
  {
14749
  set_jumps(common->utfreadchar_invalid, LABEL());
14750
  do_utfreadchar_invalid(common);
14751
  }
14752
if (common->utfreadnewline_invalid != NULL)
14753
  {
14754
  set_jumps(common->utfreadnewline_invalid, LABEL());
14755
  do_utfreadnewline_invalid(common);
14756
  }
14757
if (common->utfmoveback_invalid)
14758
  {
14759
  set_jumps(common->utfmoveback_invalid, LABEL());
14760
  do_utfmoveback_invalid(common);
14761
  }
14762
if (common->utfpeakcharback_invalid)
14763
  {
14764
  set_jumps(common->utfpeakcharback_invalid, LABEL());
14765
  do_utfpeakcharback_invalid(common);
14766
  }
14767
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14768
if (common->getucd != NULL)
14769
  {
14770
  set_jumps(common->getucd, LABEL());
14771
  do_getucd(common);
14772
  }
14773
if (common->getucdtype != NULL)
14774
  {
14775
  set_jumps(common->getucdtype, LABEL());
14776
  do_getucdtype(common);
14777
  }
14778
#endif /* SUPPORT_UNICODE */
14779
14780
SLJIT_FREE(common->optimized_cbracket, allocator_data);
14781
SLJIT_FREE(common->private_data_ptrs, allocator_data);
14782
14783
executable_func = sljit_generate_code(compiler, 0, NULL);
14784
executable_size = sljit_get_generated_code_size(compiler);
14785
sljit_free_compiler(compiler);
14786
14787
if (executable_func == NULL)
14788
  {
14789
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14790
  return PCRE2_ERROR_NOMEMORY;
14791
  }
14792
14793
/* Reuse the function descriptor if possible. */
14794
if (re->executable_jit != NULL)
14795
  functions = (executable_functions *)re->executable_jit;
14796
else
14797
  {
14798
  functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14799
  if (functions == NULL)
14800
    {
14801
    /* This case is highly unlikely since we just recently
14802
    freed a lot of memory. Not impossible though. */
14803
    sljit_free_code(executable_func, NULL);
14804
    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14805
    return PCRE2_ERROR_NOMEMORY;
14806
    }
14807
  memset(functions, 0, sizeof(executable_functions));
14808
  functions->top_bracket = re->top_bracket + 1;
14809
  functions->limit_match = re->limit_match;
14810
  re->executable_jit = functions;
14811
  }
14812
14813
/* Turn mode into an index. */
14814
if (mode == PCRE2_JIT_COMPLETE)
14815
  mode = 0;
14816
else
14817
  mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14818
14819
SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14820
functions->executable_funcs[mode] = executable_func;
14821
functions->read_only_data_heads[mode] = common->read_only_data_head;
14822
functions->executable_sizes[mode] = executable_size;
14823
return 0;
14824
}
14825
14826
#endif
14827
14828
/*************************************************
14829
*        JIT compile a Regular Expression        *
14830
*************************************************/
14831
14832
/* This function used JIT to convert a previously-compiled pattern into machine
14833
code.
14834
14835
Arguments:
14836
  code          a compiled pattern
14837
  options       JIT option bits
14838
14839
Returns:        0: success or (*NOJIT) was used
14840
               <0: an error code
14841
*/
14842
14843
#define PUBLIC_JIT_COMPILE_OPTIONS \
14844
0
  (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14845
14846
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
14847
pcre2_jit_compile(pcre2_code *code, uint32_t options)
14848
0
{
14849
0
pcre2_real_code *re = (pcre2_real_code *)code;
14850
#ifdef SUPPORT_JIT
14851
executable_functions *functions;
14852
static int executable_allocator_is_working = -1;
14853
#endif
14854
14855
0
if (code == NULL)
14856
0
  return PCRE2_ERROR_NULL;
14857
14858
0
if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14859
0
  return PCRE2_ERROR_JIT_BADOPTION;
14860
14861
/* Support for invalid UTF was first introduced in JIT, with the option
14862
PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14863
compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14864
preferred feature, with the earlier option deprecated. However, for backward
14865
compatibility, if the earlier option is set, it forces the new option so that
14866
if JIT matching falls back to the interpreter, there is still support for
14867
invalid UTF. However, if this function has already been successfully called
14868
without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14869
non-invalid-supporting JIT code was compiled), give an error.
14870
14871
If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14872
actions are needed:
14873
14874
  1. Remove the definition from pcre2.h.in and from the list in
14875
     PUBLIC_JIT_COMPILE_OPTIONS above.
14876
14877
  2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14878
14879
  3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14880
14881
  4. Delete the following short block of code. The setting of "re" and
14882
     "functions" can be moved into the JIT-only block below, but if that is
14883
     done, (void)re and (void)functions will be needed in the non-JIT case, to
14884
     avoid compiler warnings.
14885
*/
14886
14887
#ifdef SUPPORT_JIT
14888
functions = (executable_functions *)re->executable_jit;
14889
#endif
14890
14891
0
if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14892
0
  {
14893
0
  if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14894
0
    {
14895
#ifdef SUPPORT_JIT
14896
    if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14897
#endif
14898
0
    re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14899
0
    }
14900
0
  }
14901
14902
/* The above tests are run with and without JIT support. This means that
14903
PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14904
interpreter support) even in the absence of JIT. But now, if there is no JIT
14905
support, give an error return. */
14906
14907
0
#ifndef SUPPORT_JIT
14908
0
return PCRE2_ERROR_JIT_BADOPTION;
14909
#else  /* SUPPORT_JIT */
14910
14911
/* There is JIT support. Do the necessary. */
14912
14913
if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14914
14915
if (executable_allocator_is_working == -1)
14916
  {
14917
  /* Checks whether the executable allocator is working. This check
14918
     might run multiple times in multi-threaded environments, but the
14919
     result should not be affected by it. */
14920
  void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14921
  if (ptr != NULL)
14922
    {
14923
    SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14924
    executable_allocator_is_working = 1;
14925
    }
14926
  else executable_allocator_is_working = 0;
14927
  }
14928
14929
if (!executable_allocator_is_working)
14930
  return PCRE2_ERROR_NOMEMORY;
14931
14932
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14933
  options |= PCRE2_JIT_INVALID_UTF;
14934
14935
if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14936
    || functions->executable_funcs[0] == NULL)) {
14937
  uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14938
  int result = jit_compile(code, options & ~excluded_options);
14939
  if (result != 0)
14940
    return result;
14941
  }
14942
14943
if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14944
    || functions->executable_funcs[1] == NULL)) {
14945
  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14946
  int result = jit_compile(code, options & ~excluded_options);
14947
  if (result != 0)
14948
    return result;
14949
  }
14950
14951
if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14952
    || functions->executable_funcs[2] == NULL)) {
14953
  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14954
  int result = jit_compile(code, options & ~excluded_options);
14955
  if (result != 0)
14956
    return result;
14957
  }
14958
14959
return 0;
14960
14961
#endif  /* SUPPORT_JIT */
14962
0
}
14963
14964
/* JIT compiler uses an all-in-one approach. This improves security,
14965
   since the code generator functions are not exported. */
14966
14967
#define INCLUDED_FROM_PCRE2_JIT_COMPILE
14968
14969
#include "pcre2_jit_match.c"
14970
#include "pcre2_jit_misc.c"
14971
14972
/* End of pcre2_jit_compile.c */