Coverage Report

Created: 2026-06-30 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/glib/subprojects/pcre2-10.44/src/pcre2_jit_compile.c
Line
Count
Source
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
                    This module by Zoltan Herczeg
10
     Original API code Copyright (c) 1997-2012 University of Cambridge
11
          New API code Copyright (c) 2016-2024 University of Cambridge
12
13
-----------------------------------------------------------------------------
14
Redistribution and use in source and binary forms, with or without
15
modification, are permitted provided that the following conditions are met:
16
17
    * Redistributions of source code must retain the above copyright notice,
18
      this list of conditions and the following disclaimer.
19
20
    * Redistributions in binary form must reproduce the above copyright
21
      notice, this list of conditions and the following disclaimer in the
22
      documentation and/or other materials provided with the distribution.
23
24
    * Neither the name of the University of Cambridge nor the names of its
25
      contributors may be used to endorse or promote products derived from
26
      this software without specific prior written permission.
27
28
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
-----------------------------------------------------------------------------
40
*/
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#if defined(__has_feature)
47
#if __has_feature(memory_sanitizer)
48
#include <sanitizer/msan_interface.h>
49
#endif /* __has_feature(memory_sanitizer) */
50
#endif /* defined(__has_feature) */
51
52
#include "pcre2_internal.h"
53
54
#ifdef SUPPORT_JIT
55
56
/* All-in-one: Since we use the JIT compiler only from here,
57
we just include it. This way we don't need to touch the build
58
system files. */
59
60
#define SLJIT_CONFIG_AUTO 1
61
#define SLJIT_CONFIG_STATIC 1
62
#define SLJIT_VERBOSE 0
63
64
#ifdef PCRE2_DEBUG
65
#define SLJIT_DEBUG 1
66
#else
67
#define SLJIT_DEBUG 0
68
#endif
69
70
0
#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
71
0
#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
72
73
static void * pcre2_jit_malloc(size_t size, void *allocator_data)
74
0
{
75
0
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76
0
return allocator->malloc(size, allocator->memory_data);
77
0
}
78
79
static void pcre2_jit_free(void *ptr, void *allocator_data)
80
0
{
81
0
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
82
0
allocator->free(ptr, allocator->memory_data);
83
0
}
84
85
#include "sljit/sljitLir.c"
86
87
#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
88
#error Unsupported architecture
89
#endif
90
91
/* Defines for debugging purposes. */
92
93
/* 1 - Use unoptimized capturing brackets.
94
   2 - Enable capture_last_ptr (includes option 1). */
95
/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
96
97
/* 1 - Always have a control head. */
98
/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
99
100
/* Allocate memory for the regex stack on the real machine stack.
101
Fast, but limited size. */
102
0
#define MACHINE_STACK_SIZE 32768
103
104
/* Growth rate for stack allocated by the OS. Should be the multiply
105
of page size. */
106
0
#define STACK_GROWTH_RATE 8192
107
108
/* Enable to check that the allocation could destroy temporaries. */
109
#if defined SLJIT_DEBUG && SLJIT_DEBUG
110
#define DESTROY_REGISTERS 1
111
#endif
112
113
/*
114
Short summary about the backtracking mechanism empolyed by the jit code generator:
115
116
The code generator follows the recursive nature of the PERL compatible regular
117
expressions. The basic blocks of regular expressions are condition checkers
118
whose execute different commands depending on the result of the condition check.
119
The relationship between the operators can be horizontal (concatenation) and
120
vertical (sub-expression) (See struct backtrack_common for more details).
121
122
  'ab' - 'a' and 'b' regexps are concatenated
123
  'a+' - 'a' is the sub-expression of the '+' operator
124
125
The condition checkers are boolean (true/false) checkers. Machine code is generated
126
for the checker itself and for the actions depending on the result of the checker.
127
The 'true' case is called as the matching path (expected path), and the other is called as
128
the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
129
branches on the matching path.
130
131
 Greedy star operator (*) :
132
   Matching path: match happens.
133
   Backtrack path: match failed.
134
 Non-greedy star operator (*?) :
135
   Matching path: no need to perform a match.
136
   Backtrack path: match is required.
137
138
The following example shows how the code generated for a capturing bracket
139
with two alternatives. Let A, B, C, D are arbirary regular expressions, and
140
we have the following regular expression:
141
142
   A(B|C)D
143
144
The generated code will be the following:
145
146
 A matching path
147
 '(' matching path (pushing arguments to the stack)
148
 B matching path
149
 ')' matching path (pushing arguments to the stack)
150
 D matching path
151
 return with successful match
152
153
 D backtrack path
154
 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
155
 B backtrack path
156
 C expected path
157
 jump to D matching path
158
 C backtrack path
159
 A backtrack path
160
161
 Notice, that the order of backtrack code paths are the opposite of the fast
162
 code paths. In this way the topmost value on the stack is always belong
163
 to the current backtrack code path. The backtrack path must check
164
 whether there is a next alternative. If so, it needs to jump back to
165
 the matching path eventually. Otherwise it needs to clear out its own stack
166
 frame and continue the execution on the backtrack code paths.
167
*/
168
169
/*
170
Saved stack frames:
171
172
Atomic blocks and asserts require reloading the values of private data
173
when the backtrack mechanism performed. Because of OP_RECURSE, the data
174
are not necessarly known in compile time, thus we need a dynamic restore
175
mechanism.
176
177
The stack frames are stored in a chain list, and have the following format:
178
([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
179
180
Thus we can restore the private data to a particular point in the stack.
181
*/
182
183
typedef struct jit_arguments {
184
  /* Pointers first. */
185
  struct sljit_stack *stack;
186
  PCRE2_SPTR str;
187
  PCRE2_SPTR begin;
188
  PCRE2_SPTR end;
189
  pcre2_match_data *match_data;
190
  PCRE2_SPTR startchar_ptr;
191
  PCRE2_UCHAR *mark_ptr;
192
  int (*callout)(pcre2_callout_block *, void *);
193
  void *callout_data;
194
  /* Everything else after. */
195
  sljit_uw offset_limit;
196
  sljit_u32 limit_match;
197
  sljit_u32 oveccount;
198
  sljit_u32 options;
199
} jit_arguments;
200
201
0
#define JIT_NUMBER_OF_COMPILE_MODES 3
202
203
typedef struct executable_functions {
204
  void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
205
  void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
206
  sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
207
  sljit_u32 top_bracket;
208
  sljit_u32 limit_match;
209
} executable_functions;
210
211
typedef struct jump_list {
212
  struct sljit_jump *jump;
213
  struct jump_list *next;
214
} jump_list;
215
216
typedef struct stub_list {
217
  struct sljit_jump *start;
218
  struct sljit_label *quit;
219
  struct stub_list *next;
220
} stub_list;
221
222
enum frame_types {
223
  no_frame = -1,
224
  no_stack = -2
225
};
226
227
enum control_types {
228
  type_mark = 0,
229
  type_then_trap = 1
230
};
231
232
enum  early_fail_types {
233
  type_skip = 0,
234
  type_fail = 1,
235
  type_fail_range = 2
236
};
237
238
typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
239
240
/* The following structure is the key data type for the recursive
241
code generator. It is allocated by compile_matchingpath, and contains
242
the arguments for compile_backtrackingpath. Must be the first member
243
of its descendants. */
244
typedef struct backtrack_common {
245
  /* Backtracking path of an opcode, which falls back
246
     to our opcode, if it cannot resume matching. */
247
  struct backtrack_common *prev;
248
  /* Backtracks for opcodes without backtracking path.
249
     These opcodes are between 'prev' and the current
250
     opcode, and they never resume the match. */
251
  jump_list *simple_backtracks;
252
  /* Internal backtracking list for block constructs
253
     which contains other opcodes, such as brackets,
254
     asserts, conditionals, etc. */
255
  struct backtrack_common *top;
256
  /* Backtracks used internally by the opcode. For component
257
     opcodes, this list is also used by those opcodes without
258
     backtracking path which follows the 'top' backtrack. */
259
  jump_list *own_backtracks;
260
  /* Opcode pointer. */
261
  PCRE2_SPTR cc;
262
} backtrack_common;
263
264
typedef struct assert_backtrack {
265
  backtrack_common common;
266
  jump_list *condfailed;
267
  /* Less than 0 if a frame is not needed. */
268
  int framesize;
269
  /* Points to our private memory word on the stack. */
270
  int private_data_ptr;
271
  /* For iterators. */
272
  struct sljit_label *matchingpath;
273
} assert_backtrack;
274
275
typedef struct bracket_backtrack {
276
  backtrack_common common;
277
  /* Where to coninue if an alternative is successfully matched. */
278
  struct sljit_label *alternative_matchingpath;
279
  /* For rmin and rmax iterators. */
280
  struct sljit_label *recursive_matchingpath;
281
  /* For greedy ? operator. */
282
  struct sljit_label *zero_matchingpath;
283
  /* Contains the branches of a failed condition. */
284
  union {
285
    /* Both for OP_COND, OP_SCOND. */
286
    jump_list *condfailed;
287
    assert_backtrack *assert;
288
    /* For OP_ONCE. Less than 0 if not needed. */
289
    int framesize;
290
    /* For brackets with >3 alternatives. */
291
    struct sljit_jump *matching_mov_addr;
292
  } u;
293
  /* Points to our private memory word on the stack. */
294
  int private_data_ptr;
295
} bracket_backtrack;
296
297
typedef struct bracketpos_backtrack {
298
  backtrack_common common;
299
  /* Points to our private memory word on the stack. */
300
  int private_data_ptr;
301
  /* Reverting stack is needed. */
302
  int framesize;
303
  /* Allocated stack size. */
304
  int stacksize;
305
} bracketpos_backtrack;
306
307
typedef struct braminzero_backtrack {
308
  backtrack_common common;
309
  struct sljit_label *matchingpath;
310
} braminzero_backtrack;
311
312
typedef struct char_iterator_backtrack {
313
  backtrack_common common;
314
  /* Next iteration. */
315
  struct sljit_label *matchingpath;
316
  union {
317
    jump_list *backtracks;
318
    struct {
319
      unsigned int othercasebit;
320
      PCRE2_UCHAR chr;
321
      BOOL enabled;
322
    } charpos;
323
  } u;
324
} char_iterator_backtrack;
325
326
typedef struct ref_iterator_backtrack {
327
  backtrack_common common;
328
  /* Next iteration. */
329
  struct sljit_label *matchingpath;
330
} ref_iterator_backtrack;
331
332
typedef struct recurse_entry {
333
  struct recurse_entry *next;
334
  /* Contains the function entry label. */
335
  struct sljit_label *entry_label;
336
  /* Contains the function entry label. */
337
  struct sljit_label *backtrack_label;
338
  /* Collects the entry calls until the function is not created. */
339
  jump_list *entry_calls;
340
  /* Collects the backtrack calls until the function is not created. */
341
  jump_list *backtrack_calls;
342
  /* Points to the starting opcode. */
343
  sljit_sw start;
344
} recurse_entry;
345
346
typedef struct recurse_backtrack {
347
  backtrack_common common;
348
  /* Return to the matching path. */
349
  struct sljit_label *matchingpath;
350
  /* Recursive pattern. */
351
  recurse_entry *entry;
352
  /* Pattern is inlined. */
353
  BOOL inlined_pattern;
354
} recurse_backtrack;
355
356
typedef struct vreverse_backtrack {
357
  backtrack_common common;
358
  /* Return to the matching path. */
359
  struct sljit_label *matchingpath;
360
} vreverse_backtrack;
361
362
0
#define OP_THEN_TRAP OP_TABLE_LENGTH
363
364
typedef struct then_trap_backtrack {
365
  backtrack_common common;
366
  /* If then_trap is not NULL, this structure contains the real
367
  then_trap for the backtracking path. */
368
  struct then_trap_backtrack *then_trap;
369
  /* Points to the starting opcode. */
370
  sljit_sw start;
371
  /* Exit point for the then opcodes of this alternative. */
372
  jump_list *quit;
373
  /* Frame size of the current alternative. */
374
  int framesize;
375
} then_trap_backtrack;
376
377
0
#define MAX_N_CHARS 12
378
0
#define MAX_DIFF_CHARS 5
379
380
typedef struct fast_forward_char_data {
381
  /* Number of characters in the chars array, 255 for any character. */
382
  sljit_u8 count;
383
  /* Number of last UTF-8 characters in the chars array. */
384
  sljit_u8 last_count;
385
  /* Available characters in the current position. */
386
  PCRE2_UCHAR chars[MAX_DIFF_CHARS];
387
} fast_forward_char_data;
388
389
0
#define MAX_CLASS_RANGE_SIZE 4
390
0
#define MAX_CLASS_CHARS_SIZE 3
391
392
typedef struct compiler_common {
393
  /* The sljit ceneric compiler. */
394
  struct sljit_compiler *compiler;
395
  /* Compiled regular expression. */
396
  pcre2_real_code *re;
397
  /* First byte code. */
398
  PCRE2_SPTR start;
399
  /* Maps private data offset to each opcode. */
400
  sljit_s32 *private_data_ptrs;
401
  /* Chain list of read-only data ptrs. */
402
  void *read_only_data_head;
403
  /* Tells whether the capturing bracket is optimized. */
404
  sljit_u8 *optimized_cbracket;
405
  /* Tells whether the starting offset is a target of then. */
406
  sljit_u8 *then_offsets;
407
  /* Current position where a THEN must jump. */
408
  then_trap_backtrack *then_trap;
409
  /* Starting offset of private data for capturing brackets. */
410
  sljit_s32 cbra_ptr;
411
  /* Output vector starting point. Must be divisible by 2. */
412
  sljit_s32 ovector_start;
413
  /* Points to the starting character of the current match. */
414
  sljit_s32 start_ptr;
415
  /* Last known position of the requested byte. */
416
  sljit_s32 req_char_ptr;
417
  /* Head of the last recursion. */
418
  sljit_s32 recursive_head_ptr;
419
  /* First inspected character for partial matching.
420
     (Needed for avoiding zero length partial matches.) */
421
  sljit_s32 start_used_ptr;
422
  /* Starting pointer for partial soft matches. */
423
  sljit_s32 hit_start;
424
  /* Pointer of the match end position. */
425
  sljit_s32 match_end_ptr;
426
  /* Points to the marked string. */
427
  sljit_s32 mark_ptr;
428
  /* Head of the recursive control verb management chain.
429
     Each item must have a previous offset and type
430
     (see control_types) values. See do_search_mark. */
431
  sljit_s32 control_head_ptr;
432
  /* Points to the last matched capture block index. */
433
  sljit_s32 capture_last_ptr;
434
  /* Fast forward skipping byte code pointer. */
435
  PCRE2_SPTR fast_forward_bc_ptr;
436
  /* Locals used by fast fail optimization. */
437
  sljit_s32 early_fail_start_ptr;
438
  sljit_s32 early_fail_end_ptr;
439
  /* Variables used by recursive call generator. */
440
  sljit_s32 recurse_bitset_size;
441
  uint8_t *recurse_bitset;
442
443
  /* Flipped and lower case tables. */
444
  const sljit_u8 *fcc;
445
  sljit_sw lcc;
446
  /* Mode can be PCRE2_JIT_COMPLETE and others. */
447
  int mode;
448
  /* TRUE, when empty match is accepted for partial matching. */
449
  BOOL allow_empty_partial;
450
  /* TRUE, when minlength is greater than 0. */
451
  BOOL might_be_empty;
452
  /* \K is found in the pattern. */
453
  BOOL has_set_som;
454
  /* (*SKIP:arg) is found in the pattern. */
455
  BOOL has_skip_arg;
456
  /* (*THEN) is found in the pattern. */
457
  BOOL has_then;
458
  /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
459
  BOOL has_skip_in_assert_back;
460
  /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
461
  BOOL local_quit_available;
462
  /* Currently in a positive assertion. */
463
  BOOL in_positive_assertion;
464
  /* Newline control. */
465
  int nltype;
466
  sljit_u32 nlmax;
467
  sljit_u32 nlmin;
468
  int newline;
469
  int bsr_nltype;
470
  sljit_u32 bsr_nlmax;
471
  sljit_u32 bsr_nlmin;
472
  /* Dollar endonly. */
473
  int endonly;
474
  /* Tables. */
475
  sljit_sw ctypes;
476
  /* Named capturing brackets. */
477
  PCRE2_SPTR name_table;
478
  sljit_sw name_count;
479
  sljit_sw name_entry_size;
480
481
  /* Labels and jump lists. */
482
  struct sljit_label *partialmatchlabel;
483
  struct sljit_label *quit_label;
484
  struct sljit_label *abort_label;
485
  struct sljit_label *accept_label;
486
  struct sljit_label *ff_newline_shortcut;
487
  stub_list *stubs;
488
  recurse_entry *entries;
489
  recurse_entry *currententry;
490
  jump_list *partialmatch;
491
  jump_list *quit;
492
  jump_list *positive_assertion_quit;
493
  jump_list *abort;
494
  jump_list *failed_match;
495
  jump_list *accept;
496
  jump_list *calllimit;
497
  jump_list *stackalloc;
498
  jump_list *revertframes;
499
  jump_list *wordboundary;
500
  jump_list *ucp_wordboundary;
501
  jump_list *anynewline;
502
  jump_list *hspace;
503
  jump_list *vspace;
504
  jump_list *casefulcmp;
505
  jump_list *caselesscmp;
506
  jump_list *reset_match;
507
  /* Same as reset_match, but resets the STR_PTR as well. */
508
  jump_list *restart_match;
509
  BOOL unset_backref;
510
  BOOL alt_circumflex;
511
#ifdef SUPPORT_UNICODE
512
  BOOL utf;
513
  BOOL invalid_utf;
514
  BOOL ucp;
515
  /* Points to saving area for iref. */
516
  sljit_s32 iref_ptr;
517
  jump_list *getucd;
518
  jump_list *getucdtype;
519
#if PCRE2_CODE_UNIT_WIDTH == 8
520
  jump_list *utfreadchar;
521
  jump_list *utfreadtype8;
522
  jump_list *utfpeakcharback;
523
#endif
524
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
525
  jump_list *utfreadchar_invalid;
526
  jump_list *utfreadnewline_invalid;
527
  jump_list *utfmoveback_invalid;
528
  jump_list *utfpeakcharback_invalid;
529
#endif
530
#endif /* SUPPORT_UNICODE */
531
} compiler_common;
532
533
/* For byte_sequence_compare. */
534
535
typedef struct compare_context {
536
  int length;
537
  int sourcereg;
538
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
539
  int ucharptr;
540
  union {
541
    sljit_s32 asint;
542
    sljit_u16 asushort;
543
#if PCRE2_CODE_UNIT_WIDTH == 8
544
    sljit_u8 asbyte;
545
    sljit_u8 asuchars[4];
546
#elif PCRE2_CODE_UNIT_WIDTH == 16
547
    sljit_u16 asuchars[2];
548
#elif PCRE2_CODE_UNIT_WIDTH == 32
549
    sljit_u32 asuchars[1];
550
#endif
551
  } c;
552
  union {
553
    sljit_s32 asint;
554
    sljit_u16 asushort;
555
#if PCRE2_CODE_UNIT_WIDTH == 8
556
    sljit_u8 asbyte;
557
    sljit_u8 asuchars[4];
558
#elif PCRE2_CODE_UNIT_WIDTH == 16
559
    sljit_u16 asuchars[2];
560
#elif PCRE2_CODE_UNIT_WIDTH == 32
561
    sljit_u32 asuchars[1];
562
#endif
563
  } oc;
564
#endif
565
} compare_context;
566
567
/* Undefine sljit macros. */
568
#undef CMP
569
570
/* Used for accessing the elements of the stack. */
571
0
#define STACK(i)      ((i) * SSIZE_OF(sw))
572
573
#ifdef SLJIT_PREF_SHIFT_REG
574
#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
575
/* Nothing. */
576
#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
577
#define SHIFT_REG_IS_R3
578
#else
579
#error "Unsupported shift register"
580
#endif
581
#endif
582
583
0
#define TMP1          SLJIT_R0
584
#ifdef SHIFT_REG_IS_R3
585
0
#define TMP2          SLJIT_R3
586
0
#define TMP3          SLJIT_R2
587
#else
588
#define TMP2          SLJIT_R2
589
#define TMP3          SLJIT_R3
590
#endif
591
0
#define STR_PTR       SLJIT_R1
592
0
#define STR_END       SLJIT_S0
593
0
#define STACK_TOP     SLJIT_S1
594
0
#define STACK_LIMIT   SLJIT_S2
595
0
#define COUNT_MATCH   SLJIT_S3
596
0
#define ARGUMENTS     SLJIT_S4
597
0
#define RETURN_ADDR   SLJIT_R4
598
599
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
600
#define HAS_VIRTUAL_REGISTERS 1
601
#else
602
0
#define HAS_VIRTUAL_REGISTERS 0
603
#endif
604
605
/* Local space layout. */
606
/* These two locals can be used by the current opcode. */
607
0
#define LOCALS0          (0 * sizeof(sljit_sw))
608
0
#define LOCALS1          (1 * sizeof(sljit_sw))
609
/* Two local variables for possessive quantifiers (char1 cannot use them). */
610
0
#define POSSESSIVE0      (2 * sizeof(sljit_sw))
611
#define POSSESSIVE1      (3 * sizeof(sljit_sw))
612
/* Max limit of recursions. */
613
0
#define LIMIT_MATCH      (4 * sizeof(sljit_sw))
614
/* The output vector is stored on the stack, and contains pointers
615
to characters. The vector data is divided into two groups: the first
616
group contains the start / end character pointers, and the second is
617
the start pointers when the end of the capturing group has not yet reached. */
618
0
#define OVECTOR_START    (common->ovector_start)
619
0
#define OVECTOR(i)       (OVECTOR_START + (i) * SSIZE_OF(sw))
620
0
#define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * SSIZE_OF(sw))
621
0
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
622
623
#if PCRE2_CODE_UNIT_WIDTH == 8
624
0
#define MOV_UCHAR  SLJIT_MOV_U8
625
0
#define IN_UCHARS(x) (x)
626
#elif PCRE2_CODE_UNIT_WIDTH == 16
627
#define MOV_UCHAR  SLJIT_MOV_U16
628
#define UCHAR_SHIFT (1)
629
#define IN_UCHARS(x) ((x) * 2)
630
#elif PCRE2_CODE_UNIT_WIDTH == 32
631
#define MOV_UCHAR  SLJIT_MOV_U32
632
#define UCHAR_SHIFT (2)
633
#define IN_UCHARS(x) ((x) * 4)
634
#else
635
#error Unsupported compiling mode
636
#endif
637
638
/* Shortcuts. */
639
#define DEFINE_COMPILER \
640
0
  struct sljit_compiler *compiler = common->compiler
641
#define OP1(op, dst, dstw, src, srcw) \
642
0
  sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
643
#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
644
0
  sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
645
#define OP2U(op, src1, src1w, src2, src2w) \
646
0
  sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
647
#define OP_SRC(op, src, srcw) \
648
0
  sljit_emit_op_src(compiler, (op), (src), (srcw))
649
#define LABEL() \
650
0
  sljit_emit_label(compiler)
651
#define JUMP(type) \
652
0
  sljit_emit_jump(compiler, (type))
653
#define JUMPTO(type, label) \
654
0
  sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
655
#define JUMPHERE(jump) \
656
0
  sljit_set_label((jump), sljit_emit_label(compiler))
657
#define SET_LABEL(jump, label) \
658
0
  sljit_set_label((jump), (label))
659
#define CMP(type, src1, src1w, src2, src2w) \
660
0
  sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
661
#define CMPTO(type, src1, src1w, src2, src2w, label) \
662
0
  sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
663
#define OP_FLAGS(op, dst, dstw, type) \
664
0
  sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
665
#define SELECT(type, dst_reg, src1, src1w, src2_reg) \
666
0
  sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))
667
#define GET_LOCAL_BASE(dst, dstw, offset) \
668
0
  sljit_get_local_base(compiler, (dst), (dstw), (offset))
669
670
0
#define READ_CHAR_MAX 0x7fffffff
671
672
#define INVALID_UTF_CHAR -1
673
#define UNASSIGNED_UTF_CHAR 888
674
675
#if defined SUPPORT_UNICODE
676
#if PCRE2_CODE_UNIT_WIDTH == 8
677
678
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
679
0
  { \
680
0
  if (ptr[0] <= 0x7f) \
681
0
    c = *ptr++; \
682
0
  else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
683
0
    { \
684
0
    c = ptr[1] - 0x80; \
685
0
    \
686
0
    if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
687
0
      { \
688
0
      c |= (ptr[0] - 0xc0) << 6; \
689
0
      ptr += 2; \
690
0
      } \
691
0
    else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
692
0
      { \
693
0
      c = c << 6 | (ptr[2] - 0x80); \
694
0
      \
695
0
      if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
696
0
        { \
697
0
        c |= (ptr[0] - 0xe0) << 12; \
698
0
        ptr += 3; \
699
0
        \
700
0
        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
701
0
          { \
702
0
          invalid_action; \
703
0
          } \
704
0
        } \
705
0
      else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
706
0
        { \
707
0
        c = c << 6 | (ptr[3] - 0x80); \
708
0
        \
709
0
        if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
710
0
          { \
711
0
          c |= (ptr[0] - 0xf0) << 18; \
712
0
          ptr += 4; \
713
0
          \
714
0
          if (c >= 0x110000 || c < 0x10000) \
715
0
            { \
716
0
            invalid_action; \
717
0
            } \
718
0
          } \
719
0
        else \
720
0
          { \
721
0
          invalid_action; \
722
0
          } \
723
0
        } \
724
0
      else \
725
0
        { \
726
0
        invalid_action; \
727
0
        } \
728
0
      } \
729
0
    else \
730
0
      { \
731
0
      invalid_action; \
732
0
      } \
733
0
    } \
734
0
  else \
735
0
    { \
736
0
    invalid_action; \
737
0
    } \
738
0
  }
739
740
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
741
0
  { \
742
0
  c = ptr[-1]; \
743
0
  if (c <= 0x7f) \
744
0
    ptr--; \
745
0
  else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
746
0
    { \
747
0
    c -= 0x80; \
748
0
    \
749
0
    if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
750
0
      { \
751
0
      c |= (ptr[-2] - 0xc0) << 6; \
752
0
      ptr -= 2; \
753
0
      } \
754
0
    else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
755
0
      { \
756
0
      c = c << 6 | (ptr[-2] - 0x80); \
757
0
      \
758
0
      if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
759
0
        { \
760
0
        c |= (ptr[-3] - 0xe0) << 12; \
761
0
        ptr -= 3; \
762
0
        \
763
0
        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
764
0
          { \
765
0
          invalid_action; \
766
0
          } \
767
0
        } \
768
0
      else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
769
0
        { \
770
0
        c = c << 6 | (ptr[-3] - 0x80); \
771
0
        \
772
0
        if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
773
0
          { \
774
0
          c |= (ptr[-4] - 0xf0) << 18; \
775
0
          ptr -= 4; \
776
0
          \
777
0
          if (c >= 0x110000 || c < 0x10000) \
778
0
            { \
779
0
            invalid_action; \
780
0
            } \
781
0
          } \
782
0
        else \
783
0
          { \
784
0
          invalid_action; \
785
0
          } \
786
0
        } \
787
0
      else \
788
0
        { \
789
0
        invalid_action; \
790
0
        } \
791
0
      } \
792
0
    else \
793
0
      { \
794
0
      invalid_action; \
795
0
      } \
796
0
    } \
797
0
  else \
798
0
    { \
799
0
    invalid_action; \
800
0
    } \
801
0
  }
802
803
#elif PCRE2_CODE_UNIT_WIDTH == 16
804
805
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
806
  { \
807
  if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
808
    c = *ptr++; \
809
  else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
810
    { \
811
    c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
812
    ptr += 2; \
813
    } \
814
  else \
815
    { \
816
    invalid_action; \
817
    } \
818
  }
819
820
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
821
  { \
822
  c = ptr[-1]; \
823
  if (c < 0xd800 || c >= 0xe000) \
824
    ptr--; \
825
  else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
826
    { \
827
    c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
828
    ptr -= 2; \
829
    } \
830
  else \
831
    { \
832
    invalid_action; \
833
    } \
834
  }
835
836
837
#elif PCRE2_CODE_UNIT_WIDTH == 32
838
839
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
840
  { \
841
  if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
842
    c = *ptr++; \
843
  else \
844
    { \
845
    invalid_action; \
846
    } \
847
  }
848
849
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
850
  { \
851
  c = ptr[-1]; \
852
  if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
853
    ptr--; \
854
  else \
855
    { \
856
    invalid_action; \
857
    } \
858
  }
859
860
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
861
#endif /* SUPPORT_UNICODE */
862
863
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
864
0
{
865
0
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
866
0
do cc += GET(cc, 1); while (*cc == OP_ALT);
867
0
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
868
0
cc += 1 + LINK_SIZE;
869
0
return cc;
870
0
}
871
872
static int no_alternatives(PCRE2_SPTR cc)
873
0
{
874
0
int count = 0;
875
0
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
876
0
do
877
0
  {
878
0
  cc += GET(cc, 1);
879
0
  count++;
880
0
  }
881
0
while (*cc == OP_ALT);
882
0
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
883
0
return count;
884
0
}
885
886
static BOOL find_vreverse(PCRE2_SPTR cc)
887
0
{
888
0
  SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT ||  *cc == OP_ASSERTBACK_NA);
889
890
0
  do
891
0
    {
892
0
    if (cc[1 + LINK_SIZE] == OP_VREVERSE)
893
0
      return TRUE;
894
0
    cc += GET(cc, 1);
895
0
    }
896
0
  while (*cc == OP_ALT);
897
898
0
  return FALSE;
899
0
}
900
901
/* Functions whose might need modification for all new supported opcodes:
902
 next_opcode
903
 check_opcode_types
904
 set_private_data_ptrs
905
 get_framesize
906
 init_frame
907
 get_recurse_data_length
908
 copy_recurse_data
909
 compile_matchingpath
910
 compile_backtrackingpath
911
*/
912
913
static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
914
0
{
915
0
SLJIT_UNUSED_ARG(common);
916
0
switch(*cc)
917
0
  {
918
0
  case OP_SOD:
919
0
  case OP_SOM:
920
0
  case OP_SET_SOM:
921
0
  case OP_NOT_WORD_BOUNDARY:
922
0
  case OP_WORD_BOUNDARY:
923
0
  case OP_NOT_DIGIT:
924
0
  case OP_DIGIT:
925
0
  case OP_NOT_WHITESPACE:
926
0
  case OP_WHITESPACE:
927
0
  case OP_NOT_WORDCHAR:
928
0
  case OP_WORDCHAR:
929
0
  case OP_ANY:
930
0
  case OP_ALLANY:
931
0
  case OP_NOTPROP:
932
0
  case OP_PROP:
933
0
  case OP_ANYNL:
934
0
  case OP_NOT_HSPACE:
935
0
  case OP_HSPACE:
936
0
  case OP_NOT_VSPACE:
937
0
  case OP_VSPACE:
938
0
  case OP_EXTUNI:
939
0
  case OP_EODN:
940
0
  case OP_EOD:
941
0
  case OP_CIRC:
942
0
  case OP_CIRCM:
943
0
  case OP_DOLL:
944
0
  case OP_DOLLM:
945
0
  case OP_CRSTAR:
946
0
  case OP_CRMINSTAR:
947
0
  case OP_CRPLUS:
948
0
  case OP_CRMINPLUS:
949
0
  case OP_CRQUERY:
950
0
  case OP_CRMINQUERY:
951
0
  case OP_CRRANGE:
952
0
  case OP_CRMINRANGE:
953
0
  case OP_CRPOSSTAR:
954
0
  case OP_CRPOSPLUS:
955
0
  case OP_CRPOSQUERY:
956
0
  case OP_CRPOSRANGE:
957
0
  case OP_CLASS:
958
0
  case OP_NCLASS:
959
0
  case OP_REF:
960
0
  case OP_REFI:
961
0
  case OP_DNREF:
962
0
  case OP_DNREFI:
963
0
  case OP_RECURSE:
964
0
  case OP_CALLOUT:
965
0
  case OP_ALT:
966
0
  case OP_KET:
967
0
  case OP_KETRMAX:
968
0
  case OP_KETRMIN:
969
0
  case OP_KETRPOS:
970
0
  case OP_REVERSE:
971
0
  case OP_VREVERSE:
972
0
  case OP_ASSERT:
973
0
  case OP_ASSERT_NOT:
974
0
  case OP_ASSERTBACK:
975
0
  case OP_ASSERTBACK_NOT:
976
0
  case OP_ASSERT_NA:
977
0
  case OP_ASSERTBACK_NA:
978
0
  case OP_ONCE:
979
0
  case OP_SCRIPT_RUN:
980
0
  case OP_BRA:
981
0
  case OP_BRAPOS:
982
0
  case OP_CBRA:
983
0
  case OP_CBRAPOS:
984
0
  case OP_COND:
985
0
  case OP_SBRA:
986
0
  case OP_SBRAPOS:
987
0
  case OP_SCBRA:
988
0
  case OP_SCBRAPOS:
989
0
  case OP_SCOND:
990
0
  case OP_CREF:
991
0
  case OP_DNCREF:
992
0
  case OP_RREF:
993
0
  case OP_DNRREF:
994
0
  case OP_FALSE:
995
0
  case OP_TRUE:
996
0
  case OP_BRAZERO:
997
0
  case OP_BRAMINZERO:
998
0
  case OP_BRAPOSZERO:
999
0
  case OP_PRUNE:
1000
0
  case OP_SKIP:
1001
0
  case OP_THEN:
1002
0
  case OP_COMMIT:
1003
0
  case OP_FAIL:
1004
0
  case OP_ACCEPT:
1005
0
  case OP_ASSERT_ACCEPT:
1006
0
  case OP_CLOSE:
1007
0
  case OP_SKIPZERO:
1008
0
  case OP_NOT_UCP_WORD_BOUNDARY:
1009
0
  case OP_UCP_WORD_BOUNDARY:
1010
0
  return cc + PRIV(OP_lengths)[*cc];
1011
1012
0
  case OP_CHAR:
1013
0
  case OP_CHARI:
1014
0
  case OP_NOT:
1015
0
  case OP_NOTI:
1016
0
  case OP_STAR:
1017
0
  case OP_MINSTAR:
1018
0
  case OP_PLUS:
1019
0
  case OP_MINPLUS:
1020
0
  case OP_QUERY:
1021
0
  case OP_MINQUERY:
1022
0
  case OP_UPTO:
1023
0
  case OP_MINUPTO:
1024
0
  case OP_EXACT:
1025
0
  case OP_POSSTAR:
1026
0
  case OP_POSPLUS:
1027
0
  case OP_POSQUERY:
1028
0
  case OP_POSUPTO:
1029
0
  case OP_STARI:
1030
0
  case OP_MINSTARI:
1031
0
  case OP_PLUSI:
1032
0
  case OP_MINPLUSI:
1033
0
  case OP_QUERYI:
1034
0
  case OP_MINQUERYI:
1035
0
  case OP_UPTOI:
1036
0
  case OP_MINUPTOI:
1037
0
  case OP_EXACTI:
1038
0
  case OP_POSSTARI:
1039
0
  case OP_POSPLUSI:
1040
0
  case OP_POSQUERYI:
1041
0
  case OP_POSUPTOI:
1042
0
  case OP_NOTSTAR:
1043
0
  case OP_NOTMINSTAR:
1044
0
  case OP_NOTPLUS:
1045
0
  case OP_NOTMINPLUS:
1046
0
  case OP_NOTQUERY:
1047
0
  case OP_NOTMINQUERY:
1048
0
  case OP_NOTUPTO:
1049
0
  case OP_NOTMINUPTO:
1050
0
  case OP_NOTEXACT:
1051
0
  case OP_NOTPOSSTAR:
1052
0
  case OP_NOTPOSPLUS:
1053
0
  case OP_NOTPOSQUERY:
1054
0
  case OP_NOTPOSUPTO:
1055
0
  case OP_NOTSTARI:
1056
0
  case OP_NOTMINSTARI:
1057
0
  case OP_NOTPLUSI:
1058
0
  case OP_NOTMINPLUSI:
1059
0
  case OP_NOTQUERYI:
1060
0
  case OP_NOTMINQUERYI:
1061
0
  case OP_NOTUPTOI:
1062
0
  case OP_NOTMINUPTOI:
1063
0
  case OP_NOTEXACTI:
1064
0
  case OP_NOTPOSSTARI:
1065
0
  case OP_NOTPOSPLUSI:
1066
0
  case OP_NOTPOSQUERYI:
1067
0
  case OP_NOTPOSUPTOI:
1068
0
  cc += PRIV(OP_lengths)[*cc];
1069
0
#ifdef SUPPORT_UNICODE
1070
0
  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1071
0
#endif
1072
0
  return cc;
1073
1074
  /* Special cases. */
1075
0
  case OP_TYPESTAR:
1076
0
  case OP_TYPEMINSTAR:
1077
0
  case OP_TYPEPLUS:
1078
0
  case OP_TYPEMINPLUS:
1079
0
  case OP_TYPEQUERY:
1080
0
  case OP_TYPEMINQUERY:
1081
0
  case OP_TYPEUPTO:
1082
0
  case OP_TYPEMINUPTO:
1083
0
  case OP_TYPEEXACT:
1084
0
  case OP_TYPEPOSSTAR:
1085
0
  case OP_TYPEPOSPLUS:
1086
0
  case OP_TYPEPOSQUERY:
1087
0
  case OP_TYPEPOSUPTO:
1088
0
  return cc + PRIV(OP_lengths)[*cc] - 1;
1089
1090
0
  case OP_ANYBYTE:
1091
0
#ifdef SUPPORT_UNICODE
1092
0
  if (common->utf) return NULL;
1093
0
#endif
1094
0
  return cc + 1;
1095
1096
0
  case OP_CALLOUT_STR:
1097
0
  return cc + GET(cc, 1 + 2*LINK_SIZE);
1098
1099
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1100
0
  case OP_XCLASS:
1101
0
  return cc + GET(cc, 1);
1102
0
#endif
1103
1104
0
  case OP_MARK:
1105
0
  case OP_COMMIT_ARG:
1106
0
  case OP_PRUNE_ARG:
1107
0
  case OP_SKIP_ARG:
1108
0
  case OP_THEN_ARG:
1109
0
  return cc + 1 + 2 + cc[1];
1110
1111
0
  default:
1112
0
  SLJIT_UNREACHABLE();
1113
0
  return NULL;
1114
0
  }
1115
0
}
1116
1117
static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1118
0
{
1119
0
int count;
1120
0
PCRE2_SPTR slot;
1121
0
PCRE2_SPTR assert_back_end = cc - 1;
1122
0
PCRE2_SPTR assert_na_end = cc - 1;
1123
1124
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1125
0
while (cc < ccend)
1126
0
  {
1127
0
  switch(*cc)
1128
0
    {
1129
0
    case OP_SET_SOM:
1130
0
    common->has_set_som = TRUE;
1131
0
    common->might_be_empty = TRUE;
1132
0
    cc += 1;
1133
0
    break;
1134
1135
0
    case OP_REFI:
1136
0
#ifdef SUPPORT_UNICODE
1137
0
    if (common->iref_ptr == 0)
1138
0
      {
1139
0
      common->iref_ptr = common->ovector_start;
1140
0
      common->ovector_start += 3 * sizeof(sljit_sw);
1141
0
      }
1142
0
#endif /* SUPPORT_UNICODE */
1143
    /* Fall through. */
1144
0
    case OP_REF:
1145
0
    common->optimized_cbracket[GET2(cc, 1)] = 0;
1146
0
    cc += 1 + IMM2_SIZE;
1147
0
    break;
1148
1149
0
    case OP_ASSERT_NA:
1150
0
    case OP_ASSERTBACK_NA:
1151
0
    slot = bracketend(cc);
1152
0
    if (slot > assert_na_end)
1153
0
      assert_na_end = slot;
1154
0
    cc += 1 + LINK_SIZE;
1155
0
    break;
1156
1157
0
    case OP_CBRAPOS:
1158
0
    case OP_SCBRAPOS:
1159
0
    common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1160
0
    cc += 1 + LINK_SIZE + IMM2_SIZE;
1161
0
    break;
1162
1163
0
    case OP_COND:
1164
0
    case OP_SCOND:
1165
    /* Only AUTO_CALLOUT can insert this opcode. We do
1166
       not intend to support this case. */
1167
0
    if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1168
0
      return FALSE;
1169
0
    cc += 1 + LINK_SIZE;
1170
0
    break;
1171
1172
0
    case OP_CREF:
1173
0
    common->optimized_cbracket[GET2(cc, 1)] = 0;
1174
0
    cc += 1 + IMM2_SIZE;
1175
0
    break;
1176
1177
0
    case OP_DNREF:
1178
0
    case OP_DNREFI:
1179
0
    case OP_DNCREF:
1180
0
    count = GET2(cc, 1 + IMM2_SIZE);
1181
0
    slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1182
0
    while (count-- > 0)
1183
0
      {
1184
0
      common->optimized_cbracket[GET2(slot, 0)] = 0;
1185
0
      slot += common->name_entry_size;
1186
0
      }
1187
0
    cc += 1 + 2 * IMM2_SIZE;
1188
0
    break;
1189
1190
0
    case OP_RECURSE:
1191
    /* Set its value only once. */
1192
0
    if (common->recursive_head_ptr == 0)
1193
0
      {
1194
0
      common->recursive_head_ptr = common->ovector_start;
1195
0
      common->ovector_start += sizeof(sljit_sw);
1196
0
      }
1197
0
    cc += 1 + LINK_SIZE;
1198
0
    break;
1199
1200
0
    case OP_CALLOUT:
1201
0
    case OP_CALLOUT_STR:
1202
0
    if (common->capture_last_ptr == 0)
1203
0
      {
1204
0
      common->capture_last_ptr = common->ovector_start;
1205
0
      common->ovector_start += sizeof(sljit_sw);
1206
0
      }
1207
0
    cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1208
0
    break;
1209
1210
0
    case OP_ASSERTBACK:
1211
0
    slot = bracketend(cc);
1212
0
    if (slot > assert_back_end)
1213
0
      assert_back_end = slot;
1214
0
    cc += 1 + LINK_SIZE;
1215
0
    break;
1216
1217
0
    case OP_THEN_ARG:
1218
0
    common->has_then = TRUE;
1219
0
    common->control_head_ptr = 1;
1220
    /* Fall through. */
1221
1222
0
    case OP_COMMIT_ARG:
1223
0
    case OP_PRUNE_ARG:
1224
0
    if (cc < assert_na_end)
1225
0
      return FALSE;
1226
    /* Fall through */
1227
0
    case OP_MARK:
1228
0
    if (common->mark_ptr == 0)
1229
0
      {
1230
0
      common->mark_ptr = common->ovector_start;
1231
0
      common->ovector_start += sizeof(sljit_sw);
1232
0
      }
1233
0
    cc += 1 + 2 + cc[1];
1234
0
    break;
1235
1236
0
    case OP_THEN:
1237
0
    common->has_then = TRUE;
1238
0
    common->control_head_ptr = 1;
1239
0
    cc += 1;
1240
0
    break;
1241
1242
0
    case OP_SKIP:
1243
0
    if (cc < assert_back_end)
1244
0
      common->has_skip_in_assert_back = TRUE;
1245
0
    if (cc < assert_na_end)
1246
0
      return FALSE;
1247
0
    cc += 1;
1248
0
    break;
1249
1250
0
    case OP_SKIP_ARG:
1251
0
    common->control_head_ptr = 1;
1252
0
    common->has_skip_arg = TRUE;
1253
0
    if (cc < assert_back_end)
1254
0
      common->has_skip_in_assert_back = TRUE;
1255
0
    if (cc < assert_na_end)
1256
0
      return FALSE;
1257
0
    cc += 1 + 2 + cc[1];
1258
0
    break;
1259
1260
0
    case OP_PRUNE:
1261
0
    case OP_COMMIT:
1262
0
    case OP_ASSERT_ACCEPT:
1263
0
    if (cc < assert_na_end)
1264
0
      return FALSE;
1265
0
    cc++;
1266
0
    break;
1267
1268
0
    default:
1269
0
    cc = next_opcode(common, cc);
1270
0
    if (cc == NULL)
1271
0
      return FALSE;
1272
0
    break;
1273
0
    }
1274
0
  }
1275
0
return TRUE;
1276
0
}
1277
1278
0
#define EARLY_FAIL_ENHANCE_MAX (3 + 3)
1279
1280
/*
1281
  Start represent the number of allowed early fail enhancements
1282
1283
  The 0-2 values has a special meaning:
1284
    0 - skip is allowed for all iterators
1285
    1 - fail is allowed for all iterators
1286
    2 - fail is allowed for greedy iterators
1287
    3 - only ranged early fail is allowed
1288
  >3 - (start - 3) number of remaining ranged early fails allowed
1289
1290
return: the updated value of start
1291
*/
1292
static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,
1293
   int *private_data_start, sljit_s32 depth, int start)
1294
0
{
1295
0
PCRE2_SPTR begin = cc;
1296
0
PCRE2_SPTR next_alt;
1297
0
PCRE2_SPTR end;
1298
0
PCRE2_SPTR accelerated_start;
1299
0
int result = 0;
1300
0
int count, prev_count;
1301
1302
0
SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1303
0
SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1304
0
SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1305
1306
0
next_alt = cc + GET(cc, 1);
1307
0
if (*next_alt == OP_ALT && start < 1)
1308
0
  start = 1;
1309
1310
0
do
1311
0
  {
1312
0
  count = start;
1313
0
  cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1314
1315
0
  while (TRUE)
1316
0
    {
1317
0
    accelerated_start = NULL;
1318
1319
0
    switch(*cc)
1320
0
      {
1321
0
      case OP_SOD:
1322
0
      case OP_SOM:
1323
0
      case OP_SET_SOM:
1324
0
      case OP_NOT_WORD_BOUNDARY:
1325
0
      case OP_WORD_BOUNDARY:
1326
0
      case OP_EODN:
1327
0
      case OP_EOD:
1328
0
      case OP_CIRC:
1329
0
      case OP_CIRCM:
1330
0
      case OP_DOLL:
1331
0
      case OP_DOLLM:
1332
0
      case OP_NOT_UCP_WORD_BOUNDARY:
1333
0
      case OP_UCP_WORD_BOUNDARY:
1334
      /* Zero width assertions. */
1335
0
      cc++;
1336
0
      continue;
1337
1338
0
      case OP_NOT_DIGIT:
1339
0
      case OP_DIGIT:
1340
0
      case OP_NOT_WHITESPACE:
1341
0
      case OP_WHITESPACE:
1342
0
      case OP_NOT_WORDCHAR:
1343
0
      case OP_WORDCHAR:
1344
0
      case OP_ANY:
1345
0
      case OP_ALLANY:
1346
0
      case OP_ANYBYTE:
1347
0
      case OP_NOT_HSPACE:
1348
0
      case OP_HSPACE:
1349
0
      case OP_NOT_VSPACE:
1350
0
      case OP_VSPACE:
1351
0
      if (count < 1)
1352
0
        count = 1;
1353
0
      cc++;
1354
0
      continue;
1355
1356
0
      case OP_ANYNL:
1357
0
      case OP_EXTUNI:
1358
0
      if (count < 3)
1359
0
        count = 3;
1360
0
      cc++;
1361
0
      continue;
1362
1363
0
      case OP_NOTPROP:
1364
0
      case OP_PROP:
1365
0
      if (count < 1)
1366
0
        count = 1;
1367
0
      cc += 1 + 2;
1368
0
      continue;
1369
1370
0
      case OP_CHAR:
1371
0
      case OP_CHARI:
1372
0
      case OP_NOT:
1373
0
      case OP_NOTI:
1374
0
      if (count < 1)
1375
0
        count = 1;
1376
0
      cc += 2;
1377
0
#ifdef SUPPORT_UNICODE
1378
0
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1379
0
#endif
1380
0
      continue;
1381
1382
0
      case OP_TYPEMINSTAR:
1383
0
      case OP_TYPEMINPLUS:
1384
0
      if (count == 2)
1385
0
        count = 3;
1386
      /* Fall through */
1387
1388
0
      case OP_TYPESTAR:
1389
0
      case OP_TYPEPLUS:
1390
0
      case OP_TYPEPOSSTAR:
1391
0
      case OP_TYPEPOSPLUS:
1392
      /* The type or prop opcode is skipped in the next iteration. */
1393
0
      cc += 1;
1394
1395
0
      if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1396
0
        {
1397
0
        accelerated_start = cc - 1;
1398
0
        break;
1399
0
        }
1400
1401
0
      if (count < 3)
1402
0
        count = 3;
1403
0
      continue;
1404
1405
0
      case OP_TYPEEXACT:
1406
0
      if (count < 1)
1407
0
        count = 1;
1408
0
      cc += 1 + IMM2_SIZE;
1409
0
      continue;
1410
1411
0
      case OP_TYPEUPTO:
1412
0
      case OP_TYPEMINUPTO:
1413
0
      case OP_TYPEPOSUPTO:
1414
0
      cc += IMM2_SIZE;
1415
      /* Fall through */
1416
1417
0
      case OP_TYPEQUERY:
1418
0
      case OP_TYPEMINQUERY:
1419
0
      case OP_TYPEPOSQUERY:
1420
      /* The type or prop opcode is skipped in the next iteration. */
1421
0
      if (count < 3)
1422
0
        count = 3;
1423
0
      cc += 1;
1424
0
      continue;
1425
1426
0
      case OP_MINSTAR:
1427
0
      case OP_MINPLUS:
1428
0
      case OP_MINSTARI:
1429
0
      case OP_MINPLUSI:
1430
0
      case OP_NOTMINSTAR:
1431
0
      case OP_NOTMINPLUS:
1432
0
      case OP_NOTMINSTARI:
1433
0
      case OP_NOTMINPLUSI:
1434
0
      if (count == 2)
1435
0
        count = 3;
1436
      /* Fall through */
1437
1438
0
      case OP_STAR:
1439
0
      case OP_PLUS:
1440
0
      case OP_POSSTAR:
1441
0
      case OP_POSPLUS:
1442
1443
0
      case OP_STARI:
1444
0
      case OP_PLUSI:
1445
0
      case OP_POSSTARI:
1446
0
      case OP_POSPLUSI:
1447
1448
0
      case OP_NOTSTAR:
1449
0
      case OP_NOTPLUS:
1450
0
      case OP_NOTPOSSTAR:
1451
0
      case OP_NOTPOSPLUS:
1452
1453
0
      case OP_NOTSTARI:
1454
0
      case OP_NOTPLUSI:
1455
0
      case OP_NOTPOSSTARI:
1456
0
      case OP_NOTPOSPLUSI:
1457
0
      accelerated_start = cc;
1458
0
      cc += 2;
1459
0
#ifdef SUPPORT_UNICODE
1460
0
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1461
0
#endif
1462
0
      break;
1463
1464
0
      case OP_EXACT:
1465
0
      if (count < 1)
1466
0
        count = 1;
1467
0
      cc += 2 + IMM2_SIZE;
1468
0
#ifdef SUPPORT_UNICODE
1469
0
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1470
0
#endif
1471
0
      continue;
1472
1473
0
      case OP_UPTO:
1474
0
      case OP_MINUPTO:
1475
0
      case OP_POSUPTO:
1476
0
      case OP_UPTOI:
1477
0
      case OP_MINUPTOI:
1478
0
      case OP_EXACTI:
1479
0
      case OP_POSUPTOI:
1480
0
      case OP_NOTUPTO:
1481
0
      case OP_NOTMINUPTO:
1482
0
      case OP_NOTEXACT:
1483
0
      case OP_NOTPOSUPTO:
1484
0
      case OP_NOTUPTOI:
1485
0
      case OP_NOTMINUPTOI:
1486
0
      case OP_NOTEXACTI:
1487
0
      case OP_NOTPOSUPTOI:
1488
0
      cc += IMM2_SIZE;
1489
      /* Fall through */
1490
1491
0
      case OP_QUERY:
1492
0
      case OP_MINQUERY:
1493
0
      case OP_POSQUERY:
1494
0
      case OP_QUERYI:
1495
0
      case OP_MINQUERYI:
1496
0
      case OP_POSQUERYI:
1497
0
      case OP_NOTQUERY:
1498
0
      case OP_NOTMINQUERY:
1499
0
      case OP_NOTPOSQUERY:
1500
0
      case OP_NOTQUERYI:
1501
0
      case OP_NOTMINQUERYI:
1502
0
      case OP_NOTPOSQUERYI:
1503
0
      if (count < 3)
1504
0
        count = 3;
1505
0
      cc += 2;
1506
0
#ifdef SUPPORT_UNICODE
1507
0
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1508
0
#endif
1509
0
      continue;
1510
1511
0
      case OP_CLASS:
1512
0
      case OP_NCLASS:
1513
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1514
0
      case OP_XCLASS:
1515
0
      accelerated_start = cc;
1516
0
      cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1517
#else
1518
      accelerated_start = cc;
1519
      cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1520
#endif
1521
1522
0
      switch (*cc)
1523
0
        {
1524
0
        case OP_CRMINSTAR:
1525
0
        case OP_CRMINPLUS:
1526
0
        if (count == 2)
1527
0
          count = 3;
1528
        /* Fall through */
1529
1530
0
        case OP_CRSTAR:
1531
0
        case OP_CRPLUS:
1532
0
        case OP_CRPOSSTAR:
1533
0
        case OP_CRPOSPLUS:
1534
0
        cc++;
1535
0
        break;
1536
1537
0
        case OP_CRRANGE:
1538
0
        case OP_CRMINRANGE:
1539
0
        case OP_CRPOSRANGE:
1540
0
        if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
1541
0
          {
1542
          /* Exact repeat. */
1543
0
          cc += 1 + 2 * IMM2_SIZE;
1544
0
          if (count < 1)
1545
0
            count = 1;
1546
0
          continue;
1547
0
          }
1548
1549
0
        cc += 2 * IMM2_SIZE;
1550
        /* Fall through */
1551
0
        case OP_CRQUERY:
1552
0
        case OP_CRMINQUERY:
1553
0
        case OP_CRPOSQUERY:
1554
0
        cc++;
1555
0
        if (count < 3)
1556
0
          count = 3;
1557
0
        continue;
1558
1559
0
        default:
1560
        /* No repeat. */
1561
0
        if (count < 1)
1562
0
          count = 1;
1563
0
        continue;
1564
0
        }
1565
0
      break;
1566
1567
0
      case OP_BRA:
1568
0
      case OP_CBRA:
1569
0
      prev_count = count;
1570
0
      if (count < 1)
1571
0
        count = 1;
1572
1573
0
      if (depth >= 4)
1574
0
        break;
1575
1576
0
      if (count < 3 && cc[GET(cc, 1)] == OP_ALT)
1577
0
        count = 3;
1578
1579
0
      end = bracketend(cc);
1580
0
      if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1581
0
        break;
1582
1583
0
      prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);
1584
1585
0
      if (prev_count > count)
1586
0
        count = prev_count;
1587
1588
0
      if (PRIVATE_DATA(cc) != 0)
1589
0
        common->private_data_ptrs[begin - common->start] = 1;
1590
1591
0
      if (count < EARLY_FAIL_ENHANCE_MAX)
1592
0
        {
1593
0
        cc = end;
1594
0
        continue;
1595
0
        }
1596
0
      break;
1597
1598
0
      case OP_KET:
1599
0
      SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1600
0
      if (cc >= next_alt)
1601
0
        break;
1602
0
      cc += 1 + LINK_SIZE;
1603
0
      continue;
1604
0
      }
1605
1606
0
    if (accelerated_start == NULL)
1607
0
      break;
1608
1609
0
    if (count == 0)
1610
0
      {
1611
0
      common->fast_forward_bc_ptr = accelerated_start;
1612
0
      common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1613
0
      *private_data_start += sizeof(sljit_sw);
1614
0
      count = 4;
1615
0
      }
1616
0
    else if (count < 3)
1617
0
      {
1618
0
      common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1619
1620
0
      if (common->early_fail_start_ptr == 0)
1621
0
        common->early_fail_start_ptr = *private_data_start;
1622
1623
0
      *private_data_start += sizeof(sljit_sw);
1624
0
      common->early_fail_end_ptr = *private_data_start;
1625
1626
0
      if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1627
0
        return EARLY_FAIL_ENHANCE_MAX;
1628
1629
0
      count = 4;
1630
0
      }
1631
0
    else
1632
0
      {
1633
0
      common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1634
1635
0
      if (common->early_fail_start_ptr == 0)
1636
0
        common->early_fail_start_ptr = *private_data_start;
1637
1638
0
      *private_data_start += 2 * sizeof(sljit_sw);
1639
0
      common->early_fail_end_ptr = *private_data_start;
1640
1641
0
      if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1642
0
        return EARLY_FAIL_ENHANCE_MAX;
1643
1644
0
      count++;
1645
0
      }
1646
1647
    /* Cannot be part of a repeat. */
1648
0
    common->private_data_ptrs[begin - common->start] = 1;
1649
1650
0
    if (count >= EARLY_FAIL_ENHANCE_MAX)
1651
0
      break;
1652
0
    }
1653
1654
0
  if (*cc != OP_ALT && *cc != OP_KET)
1655
0
    result = EARLY_FAIL_ENHANCE_MAX;
1656
0
  else if (result < count)
1657
0
    result = count;
1658
1659
0
  cc = next_alt;
1660
0
  next_alt = cc + GET(cc, 1);
1661
0
  }
1662
0
while (*cc == OP_ALT);
1663
1664
0
return result;
1665
0
}
1666
1667
static int get_class_iterator_size(PCRE2_SPTR cc)
1668
0
{
1669
0
sljit_u32 min;
1670
0
sljit_u32 max;
1671
0
switch(*cc)
1672
0
  {
1673
0
  case OP_CRSTAR:
1674
0
  case OP_CRPLUS:
1675
0
  return 2;
1676
1677
0
  case OP_CRMINSTAR:
1678
0
  case OP_CRMINPLUS:
1679
0
  case OP_CRQUERY:
1680
0
  case OP_CRMINQUERY:
1681
0
  return 1;
1682
1683
0
  case OP_CRRANGE:
1684
0
  case OP_CRMINRANGE:
1685
0
  min = GET2(cc, 1);
1686
0
  max = GET2(cc, 1 + IMM2_SIZE);
1687
0
  if (max == 0)
1688
0
    return (*cc == OP_CRRANGE) ? 2 : 1;
1689
0
  max -= min;
1690
0
  if (max > 2)
1691
0
    max = 2;
1692
0
  return max;
1693
1694
0
  default:
1695
0
  return 0;
1696
0
  }
1697
0
}
1698
1699
static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1700
0
{
1701
0
PCRE2_SPTR end = bracketend(begin);
1702
0
PCRE2_SPTR next;
1703
0
PCRE2_SPTR next_end;
1704
0
PCRE2_SPTR max_end;
1705
0
PCRE2_UCHAR type;
1706
0
sljit_sw length = end - begin;
1707
0
sljit_s32 min, max, i;
1708
1709
/* Detect fixed iterations first. */
1710
0
if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1711
0
  return FALSE;
1712
1713
/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1714
 * Skip the check of the second part. */
1715
0
if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1716
0
  return TRUE;
1717
1718
0
next = end;
1719
0
min = 1;
1720
0
while (1)
1721
0
  {
1722
0
  if (*next != *begin)
1723
0
    break;
1724
0
  next_end = bracketend(next);
1725
0
  if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1726
0
    break;
1727
0
  next = next_end;
1728
0
  min++;
1729
0
  }
1730
1731
0
if (min == 2)
1732
0
  return FALSE;
1733
1734
0
max = 0;
1735
0
max_end = next;
1736
0
if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1737
0
  {
1738
0
  type = *next;
1739
0
  while (1)
1740
0
    {
1741
0
    if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1742
0
      break;
1743
0
    next_end = bracketend(next + 2 + LINK_SIZE);
1744
0
    if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1745
0
      break;
1746
0
    next = next_end;
1747
0
    max++;
1748
0
    }
1749
1750
0
  if (next[0] == type && next[1] == *begin && max >= 1)
1751
0
    {
1752
0
    next_end = bracketend(next + 1);
1753
0
    if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1754
0
      {
1755
0
      for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1756
0
        if (*next_end != OP_KET)
1757
0
          break;
1758
1759
0
      if (i == max)
1760
0
        {
1761
0
        common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1762
0
        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1763
        /* +2 the original and the last. */
1764
0
        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1765
0
        if (min == 1)
1766
0
          return TRUE;
1767
0
        min--;
1768
0
        max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1769
0
        }
1770
0
      }
1771
0
    }
1772
0
  }
1773
1774
0
if (min >= 3)
1775
0
  {
1776
0
  common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1777
0
  common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1778
0
  common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1779
0
  return TRUE;
1780
0
  }
1781
1782
0
return FALSE;
1783
0
}
1784
1785
#define CASE_ITERATOR_PRIVATE_DATA_1 \
1786
0
    case OP_MINSTAR: \
1787
0
    case OP_MINPLUS: \
1788
0
    case OP_QUERY: \
1789
0
    case OP_MINQUERY: \
1790
0
    case OP_MINSTARI: \
1791
0
    case OP_MINPLUSI: \
1792
0
    case OP_QUERYI: \
1793
0
    case OP_MINQUERYI: \
1794
0
    case OP_NOTMINSTAR: \
1795
0
    case OP_NOTMINPLUS: \
1796
0
    case OP_NOTQUERY: \
1797
0
    case OP_NOTMINQUERY: \
1798
0
    case OP_NOTMINSTARI: \
1799
0
    case OP_NOTMINPLUSI: \
1800
0
    case OP_NOTQUERYI: \
1801
0
    case OP_NOTMINQUERYI:
1802
1803
#define CASE_ITERATOR_PRIVATE_DATA_2A \
1804
0
    case OP_STAR: \
1805
0
    case OP_PLUS: \
1806
0
    case OP_STARI: \
1807
0
    case OP_PLUSI: \
1808
0
    case OP_NOTSTAR: \
1809
0
    case OP_NOTPLUS: \
1810
0
    case OP_NOTSTARI: \
1811
0
    case OP_NOTPLUSI:
1812
1813
#define CASE_ITERATOR_PRIVATE_DATA_2B \
1814
0
    case OP_UPTO: \
1815
0
    case OP_MINUPTO: \
1816
0
    case OP_UPTOI: \
1817
0
    case OP_MINUPTOI: \
1818
0
    case OP_NOTUPTO: \
1819
0
    case OP_NOTMINUPTO: \
1820
0
    case OP_NOTUPTOI: \
1821
0
    case OP_NOTMINUPTOI:
1822
1823
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1824
0
    case OP_TYPEMINSTAR: \
1825
0
    case OP_TYPEMINPLUS: \
1826
0
    case OP_TYPEQUERY: \
1827
0
    case OP_TYPEMINQUERY:
1828
1829
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1830
0
    case OP_TYPESTAR: \
1831
0
    case OP_TYPEPLUS:
1832
1833
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1834
0
    case OP_TYPEUPTO: \
1835
0
    case OP_TYPEMINUPTO:
1836
1837
static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1838
0
{
1839
0
PCRE2_SPTR cc = common->start;
1840
0
PCRE2_SPTR alternative;
1841
0
PCRE2_SPTR end = NULL;
1842
0
int private_data_ptr = *private_data_start;
1843
0
int space, size, bracketlen;
1844
0
BOOL repeat_check = TRUE;
1845
1846
0
while (cc < ccend)
1847
0
  {
1848
0
  space = 0;
1849
0
  size = 0;
1850
0
  bracketlen = 0;
1851
0
  if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1852
0
    break;
1853
1854
  /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1855
0
  if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1856
0
    {
1857
0
    if (detect_repeat(common, cc))
1858
0
      {
1859
      /* These brackets are converted to repeats, so no global
1860
      based single character repeat is allowed. */
1861
0
      if (cc >= end)
1862
0
        end = bracketend(cc);
1863
0
      }
1864
0
    }
1865
0
  repeat_check = TRUE;
1866
1867
0
  switch(*cc)
1868
0
    {
1869
0
    case OP_KET:
1870
0
    if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1871
0
      {
1872
0
      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1873
0
      private_data_ptr += sizeof(sljit_sw);
1874
0
      cc += common->private_data_ptrs[cc + 1 - common->start];
1875
0
      }
1876
0
    cc += 1 + LINK_SIZE;
1877
0
    break;
1878
1879
0
    case OP_ASSERT:
1880
0
    case OP_ASSERT_NOT:
1881
0
    case OP_ASSERTBACK:
1882
0
    case OP_ASSERTBACK_NOT:
1883
0
    case OP_ASSERT_NA:
1884
0
    case OP_ONCE:
1885
0
    case OP_SCRIPT_RUN:
1886
0
    case OP_BRAPOS:
1887
0
    case OP_SBRA:
1888
0
    case OP_SBRAPOS:
1889
0
    case OP_SCOND:
1890
0
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1891
0
    private_data_ptr += sizeof(sljit_sw);
1892
0
    bracketlen = 1 + LINK_SIZE;
1893
0
    break;
1894
1895
0
    case OP_ASSERTBACK_NA:
1896
0
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1897
0
    private_data_ptr += sizeof(sljit_sw);
1898
1899
0
    if (find_vreverse(cc))
1900
0
      {
1901
0
      common->private_data_ptrs[cc + 1 - common->start] = 1;
1902
0
      private_data_ptr += sizeof(sljit_sw);
1903
0
      }
1904
1905
0
    bracketlen = 1 + LINK_SIZE;
1906
0
    break;
1907
1908
0
    case OP_CBRAPOS:
1909
0
    case OP_SCBRAPOS:
1910
0
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1911
0
    private_data_ptr += sizeof(sljit_sw);
1912
0
    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1913
0
    break;
1914
1915
0
    case OP_COND:
1916
    /* Might be a hidden SCOND. */
1917
0
    common->private_data_ptrs[cc - common->start] = 0;
1918
0
    alternative = cc + GET(cc, 1);
1919
0
    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1920
0
      {
1921
0
      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1922
0
      private_data_ptr += sizeof(sljit_sw);
1923
0
      }
1924
0
    bracketlen = 1 + LINK_SIZE;
1925
0
    break;
1926
1927
0
    case OP_BRA:
1928
0
    bracketlen = 1 + LINK_SIZE;
1929
0
    break;
1930
1931
0
    case OP_CBRA:
1932
0
    case OP_SCBRA:
1933
0
    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1934
0
    break;
1935
1936
0
    case OP_BRAZERO:
1937
0
    case OP_BRAMINZERO:
1938
0
    case OP_BRAPOSZERO:
1939
0
    size = 1;
1940
0
    repeat_check = FALSE;
1941
0
    break;
1942
1943
0
    CASE_ITERATOR_PRIVATE_DATA_1
1944
0
    size = -2;
1945
0
    space = 1;
1946
0
    break;
1947
1948
0
    CASE_ITERATOR_PRIVATE_DATA_2A
1949
0
    size = -2;
1950
0
    space = 2;
1951
0
    break;
1952
1953
0
    CASE_ITERATOR_PRIVATE_DATA_2B
1954
0
    size = -(2 + IMM2_SIZE);
1955
0
    space = 2;
1956
0
    break;
1957
1958
0
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1959
0
    size = 1;
1960
0
    space = 1;
1961
0
    break;
1962
1963
0
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1964
0
    size = 1;
1965
0
    if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1966
0
      space = 2;
1967
0
    break;
1968
1969
0
    case OP_TYPEUPTO:
1970
0
    size = 1 + IMM2_SIZE;
1971
0
    if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1972
0
      space = 2;
1973
0
    break;
1974
1975
0
    case OP_TYPEMINUPTO:
1976
0
    size = 1 + IMM2_SIZE;
1977
0
    space = 2;
1978
0
    break;
1979
1980
0
    case OP_CLASS:
1981
0
    case OP_NCLASS:
1982
0
    size = 1 + 32 / sizeof(PCRE2_UCHAR);
1983
0
    space = get_class_iterator_size(cc + size);
1984
0
    break;
1985
1986
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1987
0
    case OP_XCLASS:
1988
0
    size = GET(cc, 1);
1989
0
    space = get_class_iterator_size(cc + size);
1990
0
    break;
1991
0
#endif
1992
1993
0
    default:
1994
0
    cc = next_opcode(common, cc);
1995
0
    SLJIT_ASSERT(cc != NULL);
1996
0
    break;
1997
0
    }
1998
1999
  /* Character iterators, which are not inside a repeated bracket,
2000
     gets a private slot instead of allocating it on the stack. */
2001
0
  if (space > 0 && cc >= end)
2002
0
    {
2003
0
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
2004
0
    private_data_ptr += sizeof(sljit_sw) * space;
2005
0
    }
2006
2007
0
  if (size != 0)
2008
0
    {
2009
0
    if (size < 0)
2010
0
      {
2011
0
      cc += -size;
2012
0
#ifdef SUPPORT_UNICODE
2013
0
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2014
0
#endif
2015
0
      }
2016
0
    else
2017
0
      cc += size;
2018
0
    }
2019
2020
0
  if (bracketlen > 0)
2021
0
    {
2022
0
    if (cc >= end)
2023
0
      {
2024
0
      end = bracketend(cc);
2025
0
      if (end[-1 - LINK_SIZE] == OP_KET)
2026
0
        end = NULL;
2027
0
      }
2028
0
    cc += bracketlen;
2029
0
    }
2030
0
  }
2031
0
*private_data_start = private_data_ptr;
2032
0
}
2033
2034
/* Returns with a frame_types (always < 0) if no need for frame. */
2035
static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
2036
0
{
2037
0
int length = 0;
2038
0
int possessive = 0;
2039
0
BOOL stack_restore = FALSE;
2040
0
BOOL setsom_found = recursive;
2041
0
BOOL setmark_found = recursive;
2042
/* The last capture is a local variable even for recursions. */
2043
0
BOOL capture_last_found = FALSE;
2044
2045
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2046
SLJIT_ASSERT(common->control_head_ptr != 0);
2047
*needs_control_head = TRUE;
2048
#else
2049
0
*needs_control_head = FALSE;
2050
0
#endif
2051
2052
0
if (ccend == NULL)
2053
0
  {
2054
0
  ccend = bracketend(cc) - (1 + LINK_SIZE);
2055
0
  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
2056
0
    {
2057
0
    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
2058
    /* This is correct regardless of common->capture_last_ptr. */
2059
0
    capture_last_found = TRUE;
2060
0
    }
2061
0
  cc = next_opcode(common, cc);
2062
0
  }
2063
2064
0
SLJIT_ASSERT(cc != NULL);
2065
0
while (cc < ccend)
2066
0
  switch(*cc)
2067
0
    {
2068
0
    case OP_SET_SOM:
2069
0
    SLJIT_ASSERT(common->has_set_som);
2070
0
    stack_restore = TRUE;
2071
0
    if (!setsom_found)
2072
0
      {
2073
0
      length += 2;
2074
0
      setsom_found = TRUE;
2075
0
      }
2076
0
    cc += 1;
2077
0
    break;
2078
2079
0
    case OP_MARK:
2080
0
    case OP_COMMIT_ARG:
2081
0
    case OP_PRUNE_ARG:
2082
0
    case OP_THEN_ARG:
2083
0
    SLJIT_ASSERT(common->mark_ptr != 0);
2084
0
    stack_restore = TRUE;
2085
0
    if (!setmark_found)
2086
0
      {
2087
0
      length += 2;
2088
0
      setmark_found = TRUE;
2089
0
      }
2090
0
    if (common->control_head_ptr != 0)
2091
0
      *needs_control_head = TRUE;
2092
0
    cc += 1 + 2 + cc[1];
2093
0
    break;
2094
2095
0
    case OP_RECURSE:
2096
0
    stack_restore = TRUE;
2097
0
    if (common->has_set_som && !setsom_found)
2098
0
      {
2099
0
      length += 2;
2100
0
      setsom_found = TRUE;
2101
0
      }
2102
0
    if (common->mark_ptr != 0 && !setmark_found)
2103
0
      {
2104
0
      length += 2;
2105
0
      setmark_found = TRUE;
2106
0
      }
2107
0
    if (common->capture_last_ptr != 0 && !capture_last_found)
2108
0
      {
2109
0
      length += 2;
2110
0
      capture_last_found = TRUE;
2111
0
      }
2112
0
    cc += 1 + LINK_SIZE;
2113
0
    break;
2114
2115
0
    case OP_CBRA:
2116
0
    case OP_CBRAPOS:
2117
0
    case OP_SCBRA:
2118
0
    case OP_SCBRAPOS:
2119
0
    stack_restore = TRUE;
2120
0
    if (common->capture_last_ptr != 0 && !capture_last_found)
2121
0
      {
2122
0
      length += 2;
2123
0
      capture_last_found = TRUE;
2124
0
      }
2125
0
    length += 3;
2126
0
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2127
0
    break;
2128
2129
0
    case OP_THEN:
2130
0
    stack_restore = TRUE;
2131
0
    if (common->control_head_ptr != 0)
2132
0
      *needs_control_head = TRUE;
2133
0
    cc ++;
2134
0
    break;
2135
2136
0
    default:
2137
0
    stack_restore = TRUE;
2138
    /* Fall through. */
2139
2140
0
    case OP_NOT_WORD_BOUNDARY:
2141
0
    case OP_WORD_BOUNDARY:
2142
0
    case OP_NOT_DIGIT:
2143
0
    case OP_DIGIT:
2144
0
    case OP_NOT_WHITESPACE:
2145
0
    case OP_WHITESPACE:
2146
0
    case OP_NOT_WORDCHAR:
2147
0
    case OP_WORDCHAR:
2148
0
    case OP_ANY:
2149
0
    case OP_ALLANY:
2150
0
    case OP_ANYBYTE:
2151
0
    case OP_NOTPROP:
2152
0
    case OP_PROP:
2153
0
    case OP_ANYNL:
2154
0
    case OP_NOT_HSPACE:
2155
0
    case OP_HSPACE:
2156
0
    case OP_NOT_VSPACE:
2157
0
    case OP_VSPACE:
2158
0
    case OP_EXTUNI:
2159
0
    case OP_EODN:
2160
0
    case OP_EOD:
2161
0
    case OP_CIRC:
2162
0
    case OP_CIRCM:
2163
0
    case OP_DOLL:
2164
0
    case OP_DOLLM:
2165
0
    case OP_CHAR:
2166
0
    case OP_CHARI:
2167
0
    case OP_NOT:
2168
0
    case OP_NOTI:
2169
2170
0
    case OP_EXACT:
2171
0
    case OP_POSSTAR:
2172
0
    case OP_POSPLUS:
2173
0
    case OP_POSQUERY:
2174
0
    case OP_POSUPTO:
2175
2176
0
    case OP_EXACTI:
2177
0
    case OP_POSSTARI:
2178
0
    case OP_POSPLUSI:
2179
0
    case OP_POSQUERYI:
2180
0
    case OP_POSUPTOI:
2181
2182
0
    case OP_NOTEXACT:
2183
0
    case OP_NOTPOSSTAR:
2184
0
    case OP_NOTPOSPLUS:
2185
0
    case OP_NOTPOSQUERY:
2186
0
    case OP_NOTPOSUPTO:
2187
2188
0
    case OP_NOTEXACTI:
2189
0
    case OP_NOTPOSSTARI:
2190
0
    case OP_NOTPOSPLUSI:
2191
0
    case OP_NOTPOSQUERYI:
2192
0
    case OP_NOTPOSUPTOI:
2193
2194
0
    case OP_TYPEEXACT:
2195
0
    case OP_TYPEPOSSTAR:
2196
0
    case OP_TYPEPOSPLUS:
2197
0
    case OP_TYPEPOSQUERY:
2198
0
    case OP_TYPEPOSUPTO:
2199
2200
0
    case OP_CLASS:
2201
0
    case OP_NCLASS:
2202
0
    case OP_XCLASS:
2203
2204
0
    case OP_CALLOUT:
2205
0
    case OP_CALLOUT_STR:
2206
2207
0
    case OP_NOT_UCP_WORD_BOUNDARY:
2208
0
    case OP_UCP_WORD_BOUNDARY:
2209
2210
0
    cc = next_opcode(common, cc);
2211
0
    SLJIT_ASSERT(cc != NULL);
2212
0
    break;
2213
0
    }
2214
2215
/* Possessive quantifiers can use a special case. */
2216
0
if (SLJIT_UNLIKELY(possessive == length))
2217
0
  return stack_restore ? no_frame : no_stack;
2218
2219
0
if (length > 0)
2220
0
  return length + 1;
2221
0
return stack_restore ? no_frame : no_stack;
2222
0
}
2223
2224
static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2225
0
{
2226
0
DEFINE_COMPILER;
2227
0
BOOL setsom_found = FALSE;
2228
0
BOOL setmark_found = FALSE;
2229
/* The last capture is a local variable even for recursions. */
2230
0
BOOL capture_last_found = FALSE;
2231
0
int offset;
2232
2233
/* >= 1 + shortest item size (2) */
2234
0
SLJIT_UNUSED_ARG(stacktop);
2235
0
SLJIT_ASSERT(stackpos >= stacktop + 2);
2236
2237
0
stackpos = STACK(stackpos);
2238
0
if (ccend == NULL)
2239
0
  {
2240
0
  ccend = bracketend(cc) - (1 + LINK_SIZE);
2241
0
  if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2242
0
    cc = next_opcode(common, cc);
2243
0
  }
2244
2245
0
SLJIT_ASSERT(cc != NULL);
2246
0
while (cc < ccend)
2247
0
  switch(*cc)
2248
0
    {
2249
0
    case OP_SET_SOM:
2250
0
    SLJIT_ASSERT(common->has_set_som);
2251
0
    if (!setsom_found)
2252
0
      {
2253
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2254
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2255
0
      stackpos -= SSIZE_OF(sw);
2256
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2257
0
      stackpos -= SSIZE_OF(sw);
2258
0
      setsom_found = TRUE;
2259
0
      }
2260
0
    cc += 1;
2261
0
    break;
2262
2263
0
    case OP_MARK:
2264
0
    case OP_COMMIT_ARG:
2265
0
    case OP_PRUNE_ARG:
2266
0
    case OP_THEN_ARG:
2267
0
    SLJIT_ASSERT(common->mark_ptr != 0);
2268
0
    if (!setmark_found)
2269
0
      {
2270
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2271
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2272
0
      stackpos -= SSIZE_OF(sw);
2273
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2274
0
      stackpos -= SSIZE_OF(sw);
2275
0
      setmark_found = TRUE;
2276
0
      }
2277
0
    cc += 1 + 2 + cc[1];
2278
0
    break;
2279
2280
0
    case OP_RECURSE:
2281
0
    if (common->has_set_som && !setsom_found)
2282
0
      {
2283
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2284
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2285
0
      stackpos -= SSIZE_OF(sw);
2286
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2287
0
      stackpos -= SSIZE_OF(sw);
2288
0
      setsom_found = TRUE;
2289
0
      }
2290
0
    if (common->mark_ptr != 0 && !setmark_found)
2291
0
      {
2292
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2293
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2294
0
      stackpos -= SSIZE_OF(sw);
2295
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2296
0
      stackpos -= SSIZE_OF(sw);
2297
0
      setmark_found = TRUE;
2298
0
      }
2299
0
    if (common->capture_last_ptr != 0 && !capture_last_found)
2300
0
      {
2301
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2302
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2303
0
      stackpos -= SSIZE_OF(sw);
2304
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2305
0
      stackpos -= SSIZE_OF(sw);
2306
0
      capture_last_found = TRUE;
2307
0
      }
2308
0
    cc += 1 + LINK_SIZE;
2309
0
    break;
2310
2311
0
    case OP_CBRA:
2312
0
    case OP_CBRAPOS:
2313
0
    case OP_SCBRA:
2314
0
    case OP_SCBRAPOS:
2315
0
    if (common->capture_last_ptr != 0 && !capture_last_found)
2316
0
      {
2317
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2318
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2319
0
      stackpos -= SSIZE_OF(sw);
2320
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2321
0
      stackpos -= SSIZE_OF(sw);
2322
0
      capture_last_found = TRUE;
2323
0
      }
2324
0
    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2325
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2326
0
    stackpos -= SSIZE_OF(sw);
2327
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2328
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2329
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2330
0
    stackpos -= SSIZE_OF(sw);
2331
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2332
0
    stackpos -= SSIZE_OF(sw);
2333
2334
0
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2335
0
    break;
2336
2337
0
    default:
2338
0
    cc = next_opcode(common, cc);
2339
0
    SLJIT_ASSERT(cc != NULL);
2340
0
    break;
2341
0
    }
2342
2343
0
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2344
0
SLJIT_ASSERT(stackpos == STACK(stacktop));
2345
0
}
2346
2347
0
#define RECURSE_TMP_REG_COUNT 3
2348
2349
typedef struct delayed_mem_copy_status {
2350
  struct sljit_compiler *compiler;
2351
  int store_bases[RECURSE_TMP_REG_COUNT];
2352
  int store_offsets[RECURSE_TMP_REG_COUNT];
2353
  int tmp_regs[RECURSE_TMP_REG_COUNT];
2354
  int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2355
  int next_tmp_reg;
2356
} delayed_mem_copy_status;
2357
2358
static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2359
0
{
2360
0
int i;
2361
2362
0
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2363
0
  {
2364
0
  SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2365
0
  SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2366
2367
0
  status->store_bases[i] = -1;
2368
0
  }
2369
0
status->next_tmp_reg = 0;
2370
0
status->compiler = common->compiler;
2371
0
}
2372
2373
static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2374
  int store_base, sljit_sw store_offset)
2375
0
{
2376
0
struct sljit_compiler *compiler = status->compiler;
2377
0
int next_tmp_reg = status->next_tmp_reg;
2378
0
int tmp_reg = status->tmp_regs[next_tmp_reg];
2379
2380
0
SLJIT_ASSERT(load_base > 0 && store_base > 0);
2381
2382
0
if (status->store_bases[next_tmp_reg] == -1)
2383
0
  {
2384
  /* Preserve virtual registers. */
2385
0
  if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
2386
0
    OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2387
0
  }
2388
0
else
2389
0
  OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2390
2391
0
OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2392
0
status->store_bases[next_tmp_reg] = store_base;
2393
0
status->store_offsets[next_tmp_reg] = store_offset;
2394
2395
0
status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2396
0
}
2397
2398
static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2399
0
{
2400
0
struct sljit_compiler *compiler = status->compiler;
2401
0
int next_tmp_reg = status->next_tmp_reg;
2402
0
int tmp_reg, saved_tmp_reg, i;
2403
2404
0
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2405
0
  {
2406
0
  if (status->store_bases[next_tmp_reg] != -1)
2407
0
    {
2408
0
    tmp_reg = status->tmp_regs[next_tmp_reg];
2409
0
    saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2410
2411
0
    OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2412
2413
    /* Restore virtual registers. */
2414
0
    if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)
2415
0
      OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2416
0
    }
2417
2418
0
  next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2419
0
  }
2420
0
}
2421
2422
#undef RECURSE_TMP_REG_COUNT
2423
2424
static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2425
0
{
2426
0
uint8_t *byte;
2427
0
uint8_t mask;
2428
2429
0
SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2430
2431
0
bit_index >>= SLJIT_WORD_SHIFT;
2432
2433
0
SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2434
2435
0
mask = 1 << (bit_index & 0x7);
2436
0
byte = common->recurse_bitset + (bit_index >> 3);
2437
2438
0
if (*byte & mask)
2439
0
  return FALSE;
2440
2441
0
*byte |= mask;
2442
0
return TRUE;
2443
0
}
2444
2445
enum get_recurse_flags {
2446
  recurse_flag_quit_found = (1 << 0),
2447
  recurse_flag_accept_found = (1 << 1),
2448
  recurse_flag_setsom_found = (1 << 2),
2449
  recurse_flag_setmark_found = (1 << 3),
2450
  recurse_flag_control_head_found = (1 << 4),
2451
};
2452
2453
static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2454
0
{
2455
0
int length = 1;
2456
0
int size, offset;
2457
0
PCRE2_SPTR alternative;
2458
0
uint32_t recurse_flags = 0;
2459
2460
0
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2461
2462
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2463
SLJIT_ASSERT(common->control_head_ptr != 0);
2464
recurse_flags |= recurse_flag_control_head_found;
2465
#endif
2466
2467
/* Calculate the sum of the private machine words. */
2468
0
while (cc < ccend)
2469
0
  {
2470
0
  size = 0;
2471
0
  switch(*cc)
2472
0
    {
2473
0
    case OP_SET_SOM:
2474
0
    SLJIT_ASSERT(common->has_set_som);
2475
0
    recurse_flags |= recurse_flag_setsom_found;
2476
0
    cc += 1;
2477
0
    break;
2478
2479
0
    case OP_RECURSE:
2480
0
    if (common->has_set_som)
2481
0
      recurse_flags |= recurse_flag_setsom_found;
2482
0
    if (common->mark_ptr != 0)
2483
0
      recurse_flags |= recurse_flag_setmark_found;
2484
0
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2485
0
      length++;
2486
0
    cc += 1 + LINK_SIZE;
2487
0
    break;
2488
2489
0
    case OP_KET:
2490
0
    offset = PRIVATE_DATA(cc);
2491
0
    if (offset != 0)
2492
0
      {
2493
0
      if (recurse_check_bit(common, offset))
2494
0
        length++;
2495
0
      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2496
0
      cc += PRIVATE_DATA(cc + 1);
2497
0
      }
2498
0
    cc += 1 + LINK_SIZE;
2499
0
    break;
2500
2501
0
    case OP_ASSERT:
2502
0
    case OP_ASSERT_NOT:
2503
0
    case OP_ASSERTBACK:
2504
0
    case OP_ASSERTBACK_NOT:
2505
0
    case OP_ASSERT_NA:
2506
0
    case OP_ASSERTBACK_NA:
2507
0
    case OP_ONCE:
2508
0
    case OP_SCRIPT_RUN:
2509
0
    case OP_BRAPOS:
2510
0
    case OP_SBRA:
2511
0
    case OP_SBRAPOS:
2512
0
    case OP_SCOND:
2513
0
    SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2514
0
    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2515
0
      length++;
2516
0
    cc += 1 + LINK_SIZE;
2517
0
    break;
2518
2519
0
    case OP_CBRA:
2520
0
    case OP_SCBRA:
2521
0
    offset = GET2(cc, 1 + LINK_SIZE);
2522
0
    if (recurse_check_bit(common, OVECTOR(offset << 1)))
2523
0
      {
2524
0
      SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2525
0
      length += 2;
2526
0
      }
2527
0
    if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2528
0
      length++;
2529
0
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2530
0
      length++;
2531
0
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2532
0
    break;
2533
2534
0
    case OP_CBRAPOS:
2535
0
    case OP_SCBRAPOS:
2536
0
    offset = GET2(cc, 1 + LINK_SIZE);
2537
0
    if (recurse_check_bit(common, OVECTOR(offset << 1)))
2538
0
      {
2539
0
      SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2540
0
      length += 2;
2541
0
      }
2542
0
    if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2543
0
      length++;
2544
0
    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2545
0
      length++;
2546
0
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2547
0
      length++;
2548
0
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2549
0
    break;
2550
2551
0
    case OP_COND:
2552
    /* Might be a hidden SCOND. */
2553
0
    alternative = cc + GET(cc, 1);
2554
0
    if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2555
0
      length++;
2556
0
    cc += 1 + LINK_SIZE;
2557
0
    break;
2558
2559
0
    CASE_ITERATOR_PRIVATE_DATA_1
2560
0
    offset = PRIVATE_DATA(cc);
2561
0
    if (offset != 0 && recurse_check_bit(common, offset))
2562
0
      length++;
2563
0
    cc += 2;
2564
0
#ifdef SUPPORT_UNICODE
2565
0
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2566
0
#endif
2567
0
    break;
2568
2569
0
    CASE_ITERATOR_PRIVATE_DATA_2A
2570
0
    offset = PRIVATE_DATA(cc);
2571
0
    if (offset != 0 && recurse_check_bit(common, offset))
2572
0
      {
2573
0
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2574
0
      length += 2;
2575
0
      }
2576
0
    cc += 2;
2577
0
#ifdef SUPPORT_UNICODE
2578
0
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2579
0
#endif
2580
0
    break;
2581
2582
0
    CASE_ITERATOR_PRIVATE_DATA_2B
2583
0
    offset = PRIVATE_DATA(cc);
2584
0
    if (offset != 0 && recurse_check_bit(common, offset))
2585
0
      {
2586
0
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2587
0
      length += 2;
2588
0
      }
2589
0
    cc += 2 + IMM2_SIZE;
2590
0
#ifdef SUPPORT_UNICODE
2591
0
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2592
0
#endif
2593
0
    break;
2594
2595
0
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2596
0
    offset = PRIVATE_DATA(cc);
2597
0
    if (offset != 0 && recurse_check_bit(common, offset))
2598
0
      length++;
2599
0
    cc += 1;
2600
0
    break;
2601
2602
0
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2603
0
    offset = PRIVATE_DATA(cc);
2604
0
    if (offset != 0 && recurse_check_bit(common, offset))
2605
0
      {
2606
0
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2607
0
      length += 2;
2608
0
      }
2609
0
    cc += 1;
2610
0
    break;
2611
2612
0
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2613
0
    offset = PRIVATE_DATA(cc);
2614
0
    if (offset != 0 && recurse_check_bit(common, offset))
2615
0
      {
2616
0
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2617
0
      length += 2;
2618
0
      }
2619
0
    cc += 1 + IMM2_SIZE;
2620
0
    break;
2621
2622
0
    case OP_CLASS:
2623
0
    case OP_NCLASS:
2624
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2625
0
    case OP_XCLASS:
2626
0
    size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2627
#else
2628
    size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2629
#endif
2630
2631
0
    offset = PRIVATE_DATA(cc);
2632
0
    if (offset != 0 && recurse_check_bit(common, offset))
2633
0
      length += get_class_iterator_size(cc + size);
2634
0
    cc += size;
2635
0
    break;
2636
2637
0
    case OP_MARK:
2638
0
    case OP_COMMIT_ARG:
2639
0
    case OP_PRUNE_ARG:
2640
0
    case OP_THEN_ARG:
2641
0
    SLJIT_ASSERT(common->mark_ptr != 0);
2642
0
    recurse_flags |= recurse_flag_setmark_found;
2643
0
    if (common->control_head_ptr != 0)
2644
0
      recurse_flags |= recurse_flag_control_head_found;
2645
0
    if (*cc != OP_MARK)
2646
0
      recurse_flags |= recurse_flag_quit_found;
2647
2648
0
    cc += 1 + 2 + cc[1];
2649
0
    break;
2650
2651
0
    case OP_PRUNE:
2652
0
    case OP_SKIP:
2653
0
    case OP_COMMIT:
2654
0
    recurse_flags |= recurse_flag_quit_found;
2655
0
    cc++;
2656
0
    break;
2657
2658
0
    case OP_SKIP_ARG:
2659
0
    recurse_flags |= recurse_flag_quit_found;
2660
0
    cc += 1 + 2 + cc[1];
2661
0
    break;
2662
2663
0
    case OP_THEN:
2664
0
    SLJIT_ASSERT(common->control_head_ptr != 0);
2665
0
    recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2666
0
    cc++;
2667
0
    break;
2668
2669
0
    case OP_ACCEPT:
2670
0
    case OP_ASSERT_ACCEPT:
2671
0
    recurse_flags |= recurse_flag_accept_found;
2672
0
    cc++;
2673
0
    break;
2674
2675
0
    default:
2676
0
    cc = next_opcode(common, cc);
2677
0
    SLJIT_ASSERT(cc != NULL);
2678
0
    break;
2679
0
    }
2680
0
  }
2681
0
SLJIT_ASSERT(cc == ccend);
2682
2683
0
if (recurse_flags & recurse_flag_control_head_found)
2684
0
  length++;
2685
0
if (recurse_flags & recurse_flag_quit_found)
2686
0
  {
2687
0
  if (recurse_flags & recurse_flag_setsom_found)
2688
0
    length++;
2689
0
  if (recurse_flags & recurse_flag_setmark_found)
2690
0
    length++;
2691
0
  }
2692
2693
0
*result_flags = recurse_flags;
2694
0
return length;
2695
0
}
2696
2697
enum copy_recurse_data_types {
2698
  recurse_copy_from_global,
2699
  recurse_copy_private_to_global,
2700
  recurse_copy_shared_to_global,
2701
  recurse_copy_kept_shared_to_global,
2702
  recurse_swap_global
2703
};
2704
2705
static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2706
  int type, int stackptr, int stacktop, uint32_t recurse_flags)
2707
0
{
2708
0
delayed_mem_copy_status status;
2709
0
PCRE2_SPTR alternative;
2710
0
sljit_sw private_srcw[2];
2711
0
sljit_sw shared_srcw[3];
2712
0
sljit_sw kept_shared_srcw[2];
2713
0
int private_count, shared_count, kept_shared_count;
2714
0
int from_sp, base_reg, offset, i;
2715
2716
0
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2717
2718
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2719
SLJIT_ASSERT(common->control_head_ptr != 0);
2720
recurse_check_bit(common, common->control_head_ptr);
2721
#endif
2722
2723
0
switch (type)
2724
0
  {
2725
0
  case recurse_copy_from_global:
2726
0
  from_sp = TRUE;
2727
0
  base_reg = STACK_TOP;
2728
0
  break;
2729
2730
0
  case recurse_copy_private_to_global:
2731
0
  case recurse_copy_shared_to_global:
2732
0
  case recurse_copy_kept_shared_to_global:
2733
0
  from_sp = FALSE;
2734
0
  base_reg = STACK_TOP;
2735
0
  break;
2736
2737
0
  default:
2738
0
  SLJIT_ASSERT(type == recurse_swap_global);
2739
0
  from_sp = FALSE;
2740
0
  base_reg = TMP2;
2741
0
  break;
2742
0
  }
2743
2744
0
stackptr = STACK(stackptr);
2745
0
stacktop = STACK(stacktop);
2746
2747
0
status.tmp_regs[0] = TMP1;
2748
0
status.saved_tmp_regs[0] = TMP1;
2749
2750
0
if (base_reg != TMP2)
2751
0
  {
2752
0
  status.tmp_regs[1] = TMP2;
2753
0
  status.saved_tmp_regs[1] = TMP2;
2754
0
  }
2755
0
else
2756
0
  {
2757
0
  status.saved_tmp_regs[1] = RETURN_ADDR;
2758
0
  if (HAS_VIRTUAL_REGISTERS)
2759
0
    status.tmp_regs[1] = STR_PTR;
2760
0
  else
2761
0
    status.tmp_regs[1] = RETURN_ADDR;
2762
0
  }
2763
2764
0
status.saved_tmp_regs[2] = TMP3;
2765
0
if (HAS_VIRTUAL_REGISTERS)
2766
0
  status.tmp_regs[2] = STR_END;
2767
0
else
2768
0
  status.tmp_regs[2] = TMP3;
2769
2770
0
delayed_mem_copy_init(&status, common);
2771
2772
0
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2773
0
  {
2774
0
  SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2775
2776
0
  if (!from_sp)
2777
0
    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2778
2779
0
  if (from_sp || type == recurse_swap_global)
2780
0
    delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2781
0
  }
2782
2783
0
stackptr += sizeof(sljit_sw);
2784
2785
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2786
if (type != recurse_copy_shared_to_global)
2787
  {
2788
  if (!from_sp)
2789
    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2790
2791
  if (from_sp || type == recurse_swap_global)
2792
    delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2793
  }
2794
2795
stackptr += sizeof(sljit_sw);
2796
#endif
2797
2798
0
while (cc < ccend)
2799
0
  {
2800
0
  private_count = 0;
2801
0
  shared_count = 0;
2802
0
  kept_shared_count = 0;
2803
2804
0
  switch(*cc)
2805
0
    {
2806
0
    case OP_SET_SOM:
2807
0
    SLJIT_ASSERT(common->has_set_som);
2808
0
    if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
2809
0
      {
2810
0
      kept_shared_srcw[0] = OVECTOR(0);
2811
0
      kept_shared_count = 1;
2812
0
      }
2813
0
    cc += 1;
2814
0
    break;
2815
2816
0
    case OP_RECURSE:
2817
0
    if (recurse_flags & recurse_flag_quit_found)
2818
0
      {
2819
0
      if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2820
0
        {
2821
0
        kept_shared_srcw[0] = OVECTOR(0);
2822
0
        kept_shared_count = 1;
2823
0
        }
2824
0
      if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2825
0
        {
2826
0
        kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2827
0
        kept_shared_count++;
2828
0
        }
2829
0
      }
2830
0
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2831
0
      {
2832
0
      shared_srcw[0] = common->capture_last_ptr;
2833
0
      shared_count = 1;
2834
0
      }
2835
0
    cc += 1 + LINK_SIZE;
2836
0
    break;
2837
2838
0
    case OP_KET:
2839
0
    private_srcw[0] = PRIVATE_DATA(cc);
2840
0
    if (private_srcw[0] != 0)
2841
0
      {
2842
0
      if (recurse_check_bit(common, private_srcw[0]))
2843
0
        private_count = 1;
2844
0
      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2845
0
      cc += PRIVATE_DATA(cc + 1);
2846
0
      }
2847
0
    cc += 1 + LINK_SIZE;
2848
0
    break;
2849
2850
0
    case OP_ASSERT:
2851
0
    case OP_ASSERT_NOT:
2852
0
    case OP_ASSERTBACK:
2853
0
    case OP_ASSERTBACK_NOT:
2854
0
    case OP_ASSERT_NA:
2855
0
    case OP_ASSERTBACK_NA:
2856
0
    case OP_ONCE:
2857
0
    case OP_SCRIPT_RUN:
2858
0
    case OP_BRAPOS:
2859
0
    case OP_SBRA:
2860
0
    case OP_SBRAPOS:
2861
0
    case OP_SCOND:
2862
0
    private_srcw[0] = PRIVATE_DATA(cc);
2863
0
    if (recurse_check_bit(common, private_srcw[0]))
2864
0
      private_count = 1;
2865
0
    cc += 1 + LINK_SIZE;
2866
0
    break;
2867
2868
0
    case OP_CBRA:
2869
0
    case OP_SCBRA:
2870
0
    offset = GET2(cc, 1 + LINK_SIZE);
2871
0
    shared_srcw[0] = OVECTOR(offset << 1);
2872
0
    if (recurse_check_bit(common, shared_srcw[0]))
2873
0
      {
2874
0
      shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2875
0
      SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2876
0
      shared_count = 2;
2877
0
      }
2878
2879
0
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2880
0
      {
2881
0
      shared_srcw[shared_count] = common->capture_last_ptr;
2882
0
      shared_count++;
2883
0
      }
2884
2885
0
    if (common->optimized_cbracket[offset] == 0)
2886
0
      {
2887
0
      private_srcw[0] = OVECTOR_PRIV(offset);
2888
0
      if (recurse_check_bit(common, private_srcw[0]))
2889
0
        private_count = 1;
2890
0
      }
2891
2892
0
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2893
0
    break;
2894
2895
0
    case OP_CBRAPOS:
2896
0
    case OP_SCBRAPOS:
2897
0
    offset = GET2(cc, 1 + LINK_SIZE);
2898
0
    shared_srcw[0] = OVECTOR(offset << 1);
2899
0
    if (recurse_check_bit(common, shared_srcw[0]))
2900
0
      {
2901
0
      shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2902
0
      SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2903
0
      shared_count = 2;
2904
0
      }
2905
2906
0
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2907
0
      {
2908
0
      shared_srcw[shared_count] = common->capture_last_ptr;
2909
0
      shared_count++;
2910
0
      }
2911
2912
0
    private_srcw[0] = PRIVATE_DATA(cc);
2913
0
    if (recurse_check_bit(common, private_srcw[0]))
2914
0
      private_count = 1;
2915
2916
0
    offset = OVECTOR_PRIV(offset);
2917
0
    if (recurse_check_bit(common, offset))
2918
0
      {
2919
0
      private_srcw[private_count] = offset;
2920
0
      private_count++;
2921
0
      }
2922
0
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2923
0
    break;
2924
2925
0
    case OP_COND:
2926
    /* Might be a hidden SCOND. */
2927
0
    alternative = cc + GET(cc, 1);
2928
0
    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2929
0
      {
2930
0
      private_srcw[0] = PRIVATE_DATA(cc);
2931
0
      if (recurse_check_bit(common, private_srcw[0]))
2932
0
        private_count = 1;
2933
0
      }
2934
0
    cc += 1 + LINK_SIZE;
2935
0
    break;
2936
2937
0
    CASE_ITERATOR_PRIVATE_DATA_1
2938
0
    private_srcw[0] = PRIVATE_DATA(cc);
2939
0
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2940
0
      private_count = 1;
2941
0
    cc += 2;
2942
0
#ifdef SUPPORT_UNICODE
2943
0
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2944
0
#endif
2945
0
    break;
2946
2947
0
    CASE_ITERATOR_PRIVATE_DATA_2A
2948
0
    private_srcw[0] = PRIVATE_DATA(cc);
2949
0
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2950
0
      {
2951
0
      private_count = 2;
2952
0
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2953
0
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2954
0
      }
2955
0
    cc += 2;
2956
0
#ifdef SUPPORT_UNICODE
2957
0
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2958
0
#endif
2959
0
    break;
2960
2961
0
    CASE_ITERATOR_PRIVATE_DATA_2B
2962
0
    private_srcw[0] = PRIVATE_DATA(cc);
2963
0
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2964
0
      {
2965
0
      private_count = 2;
2966
0
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2967
0
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2968
0
      }
2969
0
    cc += 2 + IMM2_SIZE;
2970
0
#ifdef SUPPORT_UNICODE
2971
0
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2972
0
#endif
2973
0
    break;
2974
2975
0
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2976
0
    private_srcw[0] = PRIVATE_DATA(cc);
2977
0
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2978
0
      private_count = 1;
2979
0
    cc += 1;
2980
0
    break;
2981
2982
0
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2983
0
    private_srcw[0] = PRIVATE_DATA(cc);
2984
0
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2985
0
      {
2986
0
      private_count = 2;
2987
0
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2988
0
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2989
0
      }
2990
0
    cc += 1;
2991
0
    break;
2992
2993
0
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2994
0
    private_srcw[0] = PRIVATE_DATA(cc);
2995
0
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2996
0
      {
2997
0
      private_count = 2;
2998
0
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2999
0
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3000
0
      }
3001
0
    cc += 1 + IMM2_SIZE;
3002
0
    break;
3003
3004
0
    case OP_CLASS:
3005
0
    case OP_NCLASS:
3006
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
3007
0
    case OP_XCLASS:
3008
0
    i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3009
#else
3010
    i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
3011
#endif
3012
0
    if (PRIVATE_DATA(cc) != 0)
3013
0
      {
3014
0
      private_count = 1;
3015
0
      private_srcw[0] = PRIVATE_DATA(cc);
3016
0
      switch(get_class_iterator_size(cc + i))
3017
0
        {
3018
0
        case 1:
3019
0
        break;
3020
3021
0
        case 2:
3022
0
        if (recurse_check_bit(common, private_srcw[0]))
3023
0
          {
3024
0
          private_count = 2;
3025
0
          private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
3026
0
          SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
3027
0
          }
3028
0
        break;
3029
3030
0
        default:
3031
0
        SLJIT_UNREACHABLE();
3032
0
        break;
3033
0
        }
3034
0
      }
3035
0
    cc += i;
3036
0
    break;
3037
3038
0
    case OP_MARK:
3039
0
    case OP_COMMIT_ARG:
3040
0
    case OP_PRUNE_ARG:
3041
0
    case OP_THEN_ARG:
3042
0
    SLJIT_ASSERT(common->mark_ptr != 0);
3043
0
    if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
3044
0
      {
3045
0
      kept_shared_srcw[0] = common->mark_ptr;
3046
0
      kept_shared_count = 1;
3047
0
      }
3048
0
    if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
3049
0
      {
3050
0
      private_srcw[0] = common->control_head_ptr;
3051
0
      private_count = 1;
3052
0
      }
3053
0
    cc += 1 + 2 + cc[1];
3054
0
    break;
3055
3056
0
    case OP_THEN:
3057
0
    SLJIT_ASSERT(common->control_head_ptr != 0);
3058
0
    if (recurse_check_bit(common, common->control_head_ptr))
3059
0
      {
3060
0
      private_srcw[0] = common->control_head_ptr;
3061
0
      private_count = 1;
3062
0
      }
3063
0
    cc++;
3064
0
    break;
3065
3066
0
    default:
3067
0
    cc = next_opcode(common, cc);
3068
0
    SLJIT_ASSERT(cc != NULL);
3069
0
    continue;
3070
0
    }
3071
3072
0
  if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
3073
0
    {
3074
0
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
3075
3076
0
    for (i = 0; i < private_count; i++)
3077
0
      {
3078
0
      SLJIT_ASSERT(private_srcw[i] != 0);
3079
3080
0
      if (!from_sp)
3081
0
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
3082
3083
0
      if (from_sp || type == recurse_swap_global)
3084
0
        delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
3085
3086
0
      stackptr += sizeof(sljit_sw);
3087
0
      }
3088
0
    }
3089
0
  else
3090
0
    stackptr += sizeof(sljit_sw) * private_count;
3091
3092
0
  if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
3093
0
    {
3094
0
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
3095
3096
0
    for (i = 0; i < shared_count; i++)
3097
0
      {
3098
0
      SLJIT_ASSERT(shared_srcw[i] != 0);
3099
3100
0
      if (!from_sp)
3101
0
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3102
3103
0
      if (from_sp || type == recurse_swap_global)
3104
0
        delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3105
3106
0
      stackptr += sizeof(sljit_sw);
3107
0
      }
3108
0
    }
3109
0
  else
3110
0
    stackptr += sizeof(sljit_sw) * shared_count;
3111
3112
0
  if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3113
0
    {
3114
0
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3115
3116
0
    for (i = 0; i < kept_shared_count; i++)
3117
0
      {
3118
0
      SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3119
3120
0
      if (!from_sp)
3121
0
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3122
3123
0
      if (from_sp || type == recurse_swap_global)
3124
0
        delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3125
3126
0
      stackptr += sizeof(sljit_sw);
3127
0
      }
3128
0
    }
3129
0
  else
3130
0
    stackptr += sizeof(sljit_sw) * kept_shared_count;
3131
0
  }
3132
3133
0
SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3134
3135
0
delayed_mem_copy_finish(&status);
3136
0
}
3137
3138
static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3139
0
{
3140
0
PCRE2_SPTR end = bracketend(cc);
3141
0
BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3142
3143
/* Assert captures then. */
3144
0
if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3145
0
  current_offset = NULL;
3146
/* Conditional block does not. */
3147
0
if (*cc == OP_COND || *cc == OP_SCOND)
3148
0
  has_alternatives = FALSE;
3149
3150
0
cc = next_opcode(common, cc);
3151
3152
0
if (has_alternatives)
3153
0
  {
3154
0
  if (*cc == OP_REVERSE)
3155
0
    cc += 1 + IMM2_SIZE;
3156
0
  else if (*cc == OP_VREVERSE)
3157
0
    cc += 1 + 2 * IMM2_SIZE;
3158
3159
0
  current_offset = common->then_offsets + (cc - common->start);
3160
0
  }
3161
3162
0
while (cc < end)
3163
0
  {
3164
0
  if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3165
0
    cc = set_then_offsets(common, cc, current_offset);
3166
0
  else
3167
0
    {
3168
0
    if (*cc == OP_ALT && has_alternatives)
3169
0
      {
3170
0
      cc += 1 + LINK_SIZE;
3171
3172
0
      if (*cc == OP_REVERSE)
3173
0
        cc += 1 + IMM2_SIZE;
3174
0
      else if (*cc == OP_VREVERSE)
3175
0
        cc += 1 + 2 * IMM2_SIZE;
3176
3177
0
      current_offset = common->then_offsets + (cc - common->start);
3178
0
      continue;
3179
0
      }
3180
3181
0
    if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3182
0
      *current_offset = 1;
3183
0
    cc = next_opcode(common, cc);
3184
0
    }
3185
0
  }
3186
3187
0
return end;
3188
0
}
3189
3190
#undef CASE_ITERATOR_PRIVATE_DATA_1
3191
#undef CASE_ITERATOR_PRIVATE_DATA_2A
3192
#undef CASE_ITERATOR_PRIVATE_DATA_2B
3193
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3194
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3195
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3196
3197
static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3198
0
{
3199
0
return (value & (value - 1)) == 0;
3200
0
}
3201
3202
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3203
0
{
3204
0
while (list != NULL)
3205
0
  {
3206
  /* sljit_set_label is clever enough to do nothing
3207
  if either the jump or the label is NULL. */
3208
0
  SET_LABEL(list->jump, label);
3209
0
  list = list->next;
3210
0
  }
3211
0
}
3212
3213
static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3214
0
{
3215
0
jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3216
0
if (list_item)
3217
0
  {
3218
0
  list_item->next = *list;
3219
0
  list_item->jump = jump;
3220
0
  *list = list_item;
3221
0
  }
3222
0
}
3223
3224
static void add_stub(compiler_common *common, struct sljit_jump *start)
3225
0
{
3226
0
DEFINE_COMPILER;
3227
0
stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3228
3229
0
if (list_item)
3230
0
  {
3231
0
  list_item->start = start;
3232
0
  list_item->quit = LABEL();
3233
0
  list_item->next = common->stubs;
3234
0
  common->stubs = list_item;
3235
0
  }
3236
0
}
3237
3238
static void flush_stubs(compiler_common *common)
3239
0
{
3240
0
DEFINE_COMPILER;
3241
0
stub_list *list_item = common->stubs;
3242
3243
0
while (list_item)
3244
0
  {
3245
0
  JUMPHERE(list_item->start);
3246
0
  add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3247
0
  JUMPTO(SLJIT_JUMP, list_item->quit);
3248
0
  list_item = list_item->next;
3249
0
  }
3250
0
common->stubs = NULL;
3251
0
}
3252
3253
static SLJIT_INLINE void count_match(compiler_common *common)
3254
0
{
3255
0
DEFINE_COMPILER;
3256
3257
0
OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3258
0
add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3259
0
}
3260
3261
static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3262
0
{
3263
/* May destroy all locals and registers except TMP2. */
3264
0
DEFINE_COMPILER;
3265
3266
0
SLJIT_ASSERT(size > 0);
3267
0
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3268
#ifdef DESTROY_REGISTERS
3269
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3270
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3271
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3272
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3273
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3274
#endif
3275
0
add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3276
0
}
3277
3278
static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3279
0
{
3280
0
DEFINE_COMPILER;
3281
3282
0
SLJIT_ASSERT(size > 0);
3283
0
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3284
0
}
3285
3286
static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3287
0
{
3288
0
DEFINE_COMPILER;
3289
0
sljit_uw *result;
3290
3291
0
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3292
0
  return NULL;
3293
3294
0
result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3295
0
if (SLJIT_UNLIKELY(result == NULL))
3296
0
  {
3297
0
  sljit_set_compiler_memory_error(compiler);
3298
0
  return NULL;
3299
0
  }
3300
3301
0
*(void**)result = common->read_only_data_head;
3302
0
common->read_only_data_head = (void *)result;
3303
0
return result + 1;
3304
0
}
3305
3306
static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3307
0
{
3308
0
DEFINE_COMPILER;
3309
0
struct sljit_label *loop;
3310
0
sljit_s32 i;
3311
3312
/* At this point we can freely use all temporary registers. */
3313
0
SLJIT_ASSERT(length > 1);
3314
/* TMP1 returns with begin - 1. */
3315
0
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3316
0
if (length < 8)
3317
0
  {
3318
0
  for (i = 1; i < length; i++)
3319
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3320
0
  }
3321
0
else
3322
0
  {
3323
0
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3324
0
    {
3325
0
    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3326
0
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3327
0
    loop = LABEL();
3328
0
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3329
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3330
0
    JUMPTO(SLJIT_NOT_ZERO, loop);
3331
0
    }
3332
0
  else
3333
0
    {
3334
0
    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3335
0
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3336
0
    loop = LABEL();
3337
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3338
0
    OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3339
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3340
0
    JUMPTO(SLJIT_NOT_ZERO, loop);
3341
0
    }
3342
0
  }
3343
0
}
3344
3345
static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3346
0
{
3347
0
DEFINE_COMPILER;
3348
0
sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3349
0
sljit_u32 uncleared_size;
3350
0
sljit_s32 src = SLJIT_IMM;
3351
0
sljit_s32 i;
3352
0
struct sljit_label *loop;
3353
3354
0
SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3355
3356
0
if (size == sizeof(sljit_sw))
3357
0
  {
3358
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3359
0
  return;
3360
0
  }
3361
3362
0
if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3363
0
  {
3364
0
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3365
0
  src = TMP3;
3366
0
  }
3367
3368
0
if (size <= 6 * sizeof(sljit_sw))
3369
0
  {
3370
0
  for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3371
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3372
0
  return;
3373
0
  }
3374
3375
0
GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3376
3377
0
uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3378
3379
0
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3380
3381
0
loop = LABEL();
3382
0
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3383
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3384
0
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
3385
0
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
3386
0
CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3387
3388
0
if (uncleared_size >= sizeof(sljit_sw))
3389
0
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3390
3391
0
if (uncleared_size >= 2 * sizeof(sljit_sw))
3392
0
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3393
0
}
3394
3395
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3396
0
{
3397
0
DEFINE_COMPILER;
3398
0
struct sljit_label *loop;
3399
0
int i;
3400
3401
0
SLJIT_ASSERT(length > 1);
3402
/* OVECTOR(1) contains the "string begin - 1" constant. */
3403
0
if (length > 2)
3404
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3405
0
if (length < 8)
3406
0
  {
3407
0
  for (i = 2; i < length; i++)
3408
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3409
0
  }
3410
0
else
3411
0
  {
3412
0
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3413
0
    {
3414
0
    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3415
0
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3416
0
    loop = LABEL();
3417
0
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3418
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3419
0
    JUMPTO(SLJIT_NOT_ZERO, loop);
3420
0
    }
3421
0
  else
3422
0
    {
3423
0
    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3424
0
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3425
0
    loop = LABEL();
3426
0
    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3427
0
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3428
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3429
0
    JUMPTO(SLJIT_NOT_ZERO, loop);
3430
0
    }
3431
0
  }
3432
3433
0
if (!HAS_VIRTUAL_REGISTERS)
3434
0
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3435
0
else
3436
0
  OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3437
3438
0
if (common->mark_ptr != 0)
3439
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3440
0
if (common->control_head_ptr != 0)
3441
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3442
0
if (HAS_VIRTUAL_REGISTERS)
3443
0
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3444
3445
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3446
0
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3447
0
}
3448
3449
static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3450
0
{
3451
0
while (current != NULL)
3452
0
  {
3453
0
  switch (current[1])
3454
0
    {
3455
0
    case type_then_trap:
3456
0
    break;
3457
3458
0
    case type_mark:
3459
0
    if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3460
0
      return current[3];
3461
0
    break;
3462
3463
0
    default:
3464
0
    SLJIT_UNREACHABLE();
3465
0
    break;
3466
0
    }
3467
0
  SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3468
0
  current = (sljit_sw*)current[0];
3469
0
  }
3470
0
return 0;
3471
0
}
3472
3473
static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3474
0
{
3475
0
DEFINE_COMPILER;
3476
0
struct sljit_label *loop;
3477
0
BOOL has_pre;
3478
3479
/* At this point we can freely use all registers. */
3480
0
OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3481
0
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3482
3483
0
if (HAS_VIRTUAL_REGISTERS)
3484
0
  {
3485
0
  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3486
0
  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3487
0
  if (common->mark_ptr != 0)
3488
0
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3489
0
  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3490
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3491
0
  if (common->mark_ptr != 0)
3492
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3493
0
  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3494
0
    SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3495
0
  }
3496
0
else
3497
0
  {
3498
0
  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3499
0
  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3500
0
  if (common->mark_ptr != 0)
3501
0
    OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3502
0
  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3503
0
  OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3504
0
  if (common->mark_ptr != 0)
3505
0
    OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3506
0
  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3507
0
  }
3508
3509
0
has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3510
3511
0
GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3512
0
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3513
3514
0
loop = LABEL();
3515
3516
0
if (has_pre)
3517
0
  sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3518
0
else
3519
0
  {
3520
0
  OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3521
0
  OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3522
0
  }
3523
3524
0
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3525
0
OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3526
/* Copy the integer value to the output buffer */
3527
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3528
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3529
#endif
3530
3531
0
SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3532
0
OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3533
3534
0
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3535
0
JUMPTO(SLJIT_NOT_ZERO, loop);
3536
3537
/* Calculate the return value, which is the maximum ovector value. */
3538
0
if (topbracket > 1)
3539
0
  {
3540
0
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
3541
0
    {
3542
0
    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3543
0
    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3544
3545
    /* OVECTOR(0) is never equal to SLJIT_S2. */
3546
0
    loop = LABEL();
3547
0
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
3548
0
    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3549
0
    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3550
0
    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3551
0
    }
3552
0
  else
3553
0
    {
3554
0
    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3555
0
    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3556
3557
    /* OVECTOR(0) is never equal to SLJIT_S2. */
3558
0
    loop = LABEL();
3559
0
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3560
0
    OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
3561
0
    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3562
0
    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3563
0
    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3564
0
    }
3565
0
  }
3566
0
else
3567
0
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3568
0
}
3569
3570
static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3571
0
{
3572
0
DEFINE_COMPILER;
3573
0
sljit_s32 mov_opcode;
3574
0
sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3575
3576
0
SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3577
0
SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3578
0
  && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3579
3580
0
if (arguments_reg != ARGUMENTS)
3581
0
  OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3582
0
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3583
0
  common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3584
0
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3585
3586
/* Store match begin and end. */
3587
0
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3588
0
OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3589
0
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3590
3591
0
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3592
3593
0
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3594
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3595
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3596
#endif
3597
0
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3598
3599
0
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3600
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3601
OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3602
#endif
3603
0
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3604
3605
0
JUMPTO(SLJIT_JUMP, quit);
3606
0
}
3607
3608
static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3609
0
{
3610
/* May destroy TMP1. */
3611
0
DEFINE_COMPILER;
3612
0
struct sljit_jump *jump;
3613
3614
0
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3615
0
  {
3616
  /* The value of -1 must be kept for start_used_ptr! */
3617
0
  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3618
  /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3619
  is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3620
0
  jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3621
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3622
0
  JUMPHERE(jump);
3623
0
  }
3624
0
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3625
0
  {
3626
0
  jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3627
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3628
0
  JUMPHERE(jump);
3629
0
  }
3630
0
}
3631
3632
static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3633
0
{
3634
/* Detects if the character has an othercase. */
3635
0
unsigned int c;
3636
3637
0
#ifdef SUPPORT_UNICODE
3638
0
if (common->utf || common->ucp)
3639
0
  {
3640
0
  if (common->utf)
3641
0
    {
3642
0
    GETCHAR(c, cc);
3643
0
    }
3644
0
  else
3645
0
    c = *cc;
3646
3647
0
  if (c > 127)
3648
0
    return c != UCD_OTHERCASE(c);
3649
3650
0
  return common->fcc[c] != c;
3651
0
  }
3652
0
else
3653
0
#endif
3654
0
  c = *cc;
3655
0
return MAX_255(c) ? common->fcc[c] != c : FALSE;
3656
0
}
3657
3658
static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3659
0
{
3660
/* Returns with the othercase. */
3661
0
#ifdef SUPPORT_UNICODE
3662
0
if ((common->utf || common->ucp) && c > 127)
3663
0
  return UCD_OTHERCASE(c);
3664
0
#endif
3665
0
return TABLE_GET(c, common->fcc, c);
3666
0
}
3667
3668
static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3669
0
{
3670
/* Detects if the character and its othercase has only 1 bit difference. */
3671
0
unsigned int c, oc, bit;
3672
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3673
0
int n;
3674
0
#endif
3675
3676
0
#ifdef SUPPORT_UNICODE
3677
0
if (common->utf || common->ucp)
3678
0
  {
3679
0
  if (common->utf)
3680
0
    {
3681
0
    GETCHAR(c, cc);
3682
0
    }
3683
0
  else
3684
0
    c = *cc;
3685
3686
0
  if (c <= 127)
3687
0
    oc = common->fcc[c];
3688
0
  else
3689
0
    oc = UCD_OTHERCASE(c);
3690
0
  }
3691
0
else
3692
0
  {
3693
0
  c = *cc;
3694
0
  oc = TABLE_GET(c, common->fcc, c);
3695
0
  }
3696
#else
3697
c = *cc;
3698
oc = TABLE_GET(c, common->fcc, c);
3699
#endif
3700
3701
0
SLJIT_ASSERT(c != oc);
3702
3703
0
bit = c ^ oc;
3704
/* Optimized for English alphabet. */
3705
0
if (c <= 127 && bit == 0x20)
3706
0
  return (0 << 8) | 0x20;
3707
3708
/* Since c != oc, they must have at least 1 bit difference. */
3709
0
if (!is_powerof2(bit))
3710
0
  return 0;
3711
3712
0
#if PCRE2_CODE_UNIT_WIDTH == 8
3713
3714
0
#ifdef SUPPORT_UNICODE
3715
0
if (common->utf && c > 127)
3716
0
  {
3717
0
  n = GET_EXTRALEN(*cc);
3718
0
  while ((bit & 0x3f) == 0)
3719
0
    {
3720
0
    n--;
3721
0
    bit >>= 6;
3722
0
    }
3723
0
  return (n << 8) | bit;
3724
0
  }
3725
0
#endif /* SUPPORT_UNICODE */
3726
0
return (0 << 8) | bit;
3727
3728
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3729
3730
#ifdef SUPPORT_UNICODE
3731
if (common->utf && c > 65535)
3732
  {
3733
  if (bit >= (1u << 10))
3734
    bit >>= 10;
3735
  else
3736
    return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3737
  }
3738
#endif /* SUPPORT_UNICODE */
3739
return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3740
3741
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3742
0
}
3743
3744
static void check_partial(compiler_common *common, BOOL force)
3745
0
{
3746
/* Checks whether a partial matching is occurred. Does not modify registers. */
3747
0
DEFINE_COMPILER;
3748
0
struct sljit_jump *jump = NULL;
3749
3750
0
SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3751
3752
0
if (common->mode == PCRE2_JIT_COMPLETE)
3753
0
  return;
3754
3755
0
if (!force && !common->allow_empty_partial)
3756
0
  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3757
0
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3758
0
  jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3759
3760
0
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3761
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3762
0
else
3763
0
  {
3764
0
  if (common->partialmatchlabel != NULL)
3765
0
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3766
0
  else
3767
0
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3768
0
  }
3769
3770
0
if (jump != NULL)
3771
0
  JUMPHERE(jump);
3772
0
}
3773
3774
static void check_str_end(compiler_common *common, jump_list **end_reached)
3775
0
{
3776
/* Does not affect registers. Usually used in a tight spot. */
3777
0
DEFINE_COMPILER;
3778
0
struct sljit_jump *jump;
3779
3780
0
if (common->mode == PCRE2_JIT_COMPLETE)
3781
0
  {
3782
0
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3783
0
  return;
3784
0
  }
3785
3786
0
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3787
0
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3788
0
  {
3789
0
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3790
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3791
0
  add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3792
0
  }
3793
0
else
3794
0
  {
3795
0
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3796
0
  if (common->partialmatchlabel != NULL)
3797
0
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3798
0
  else
3799
0
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3800
0
  }
3801
0
JUMPHERE(jump);
3802
0
}
3803
3804
static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3805
0
{
3806
0
DEFINE_COMPILER;
3807
0
struct sljit_jump *jump;
3808
3809
0
if (common->mode == PCRE2_JIT_COMPLETE)
3810
0
  {
3811
0
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3812
0
  return;
3813
0
  }
3814
3815
/* Partial matching mode. */
3816
0
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3817
0
if (!common->allow_empty_partial)
3818
0
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3819
0
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3820
0
  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3821
3822
0
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3823
0
  {
3824
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3825
0
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3826
0
  }
3827
0
else
3828
0
  {
3829
0
  if (common->partialmatchlabel != NULL)
3830
0
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3831
0
  else
3832
0
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3833
0
  }
3834
0
JUMPHERE(jump);
3835
0
}
3836
3837
static void process_partial_match(compiler_common *common)
3838
0
{
3839
0
DEFINE_COMPILER;
3840
0
struct sljit_jump *jump;
3841
3842
/* Partial matching mode. */
3843
0
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3844
0
  {
3845
0
  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3846
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3847
0
  JUMPHERE(jump);
3848
0
  }
3849
0
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3850
0
  {
3851
0
  if (common->partialmatchlabel != NULL)
3852
0
    CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3853
0
  else
3854
0
    add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3855
0
  }
3856
0
}
3857
3858
static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3859
0
{
3860
0
DEFINE_COMPILER;
3861
3862
0
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3863
0
process_partial_match(common);
3864
0
}
3865
3866
static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3867
0
{
3868
/* Reads the character into TMP1, keeps STR_PTR.
3869
Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3870
0
DEFINE_COMPILER;
3871
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3872
0
struct sljit_jump *jump;
3873
0
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3874
3875
0
SLJIT_UNUSED_ARG(max);
3876
0
SLJIT_UNUSED_ARG(dst);
3877
0
SLJIT_UNUSED_ARG(dstw);
3878
0
SLJIT_UNUSED_ARG(backtracks);
3879
3880
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3881
3882
0
#ifdef SUPPORT_UNICODE
3883
0
#if PCRE2_CODE_UNIT_WIDTH == 8
3884
0
if (common->utf)
3885
0
  {
3886
0
  if (max < 128) return;
3887
3888
0
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3889
0
  OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3890
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3891
0
  add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3892
0
  OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3893
0
  if (backtracks && common->invalid_utf)
3894
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3895
0
  JUMPHERE(jump);
3896
0
  }
3897
#elif PCRE2_CODE_UNIT_WIDTH == 16
3898
if (common->utf)
3899
  {
3900
  if (max < 0xd800) return;
3901
3902
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3903
3904
  if (common->invalid_utf)
3905
    {
3906
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3907
    OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3908
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3909
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3910
    OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3911
    if (backtracks && common->invalid_utf)
3912
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3913
    }
3914
  else
3915
    {
3916
    /* TMP2 contains the high surrogate. */
3917
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3918
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3919
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3920
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3921
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3922
    }
3923
3924
  JUMPHERE(jump);
3925
  }
3926
#elif PCRE2_CODE_UNIT_WIDTH == 32
3927
if (common->invalid_utf)
3928
  {
3929
  if (max < 0xd800) return;
3930
3931
  if (backtracks != NULL)
3932
    {
3933
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3934
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3935
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3936
    }
3937
  else
3938
    {
3939
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3940
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
3941
    SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3942
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3943
    SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
3944
    }
3945
  }
3946
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3947
0
#endif /* SUPPORT_UNICODE */
3948
0
}
3949
3950
static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3951
0
{
3952
/* Reads one character back without moving STR_PTR. TMP2 must
3953
contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3954
0
DEFINE_COMPILER;
3955
3956
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3957
0
struct sljit_jump *jump;
3958
0
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3959
3960
0
SLJIT_UNUSED_ARG(max);
3961
0
SLJIT_UNUSED_ARG(backtracks);
3962
3963
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3964
3965
0
#ifdef SUPPORT_UNICODE
3966
0
#if PCRE2_CODE_UNIT_WIDTH == 8
3967
0
if (common->utf)
3968
0
  {
3969
0
  if (max < 128) return;
3970
3971
0
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3972
0
  if (common->invalid_utf)
3973
0
    {
3974
0
    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3975
0
    if (backtracks != NULL)
3976
0
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3977
0
    }
3978
0
  else
3979
0
    add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3980
0
  JUMPHERE(jump);
3981
0
  }
3982
#elif PCRE2_CODE_UNIT_WIDTH == 16
3983
if (common->utf)
3984
  {
3985
  if (max < 0xd800) return;
3986
3987
  if (common->invalid_utf)
3988
    {
3989
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3990
    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3991
    if (backtracks != NULL)
3992
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3993
    }
3994
  else
3995
    {
3996
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3997
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3998
    /* TMP2 contains the low surrogate. */
3999
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4000
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4001
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4002
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
4003
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4004
    }
4005
    JUMPHERE(jump);
4006
  }
4007
#elif PCRE2_CODE_UNIT_WIDTH == 32
4008
if (common->invalid_utf)
4009
  {
4010
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4011
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4012
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4013
  }
4014
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4015
0
#endif /* SUPPORT_UNICODE */
4016
0
}
4017
4018
0
#define READ_CHAR_UPDATE_STR_PTR 0x1
4019
0
#define READ_CHAR_UTF8_NEWLINE 0x2
4020
0
#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
4021
0
#define READ_CHAR_VALID_UTF 0x4
4022
4023
static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
4024
  jump_list **backtracks, sljit_u32 options)
4025
0
{
4026
/* Reads the precise value of a character into TMP1, if the character is
4027
between min and max (c >= min && c <= max). Otherwise it returns with a value
4028
outside the range. Does not check STR_END. */
4029
0
DEFINE_COMPILER;
4030
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4031
0
struct sljit_jump *jump;
4032
0
#endif
4033
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4034
0
struct sljit_jump *jump2;
4035
0
#endif
4036
4037
0
SLJIT_UNUSED_ARG(min);
4038
0
SLJIT_UNUSED_ARG(max);
4039
0
SLJIT_UNUSED_ARG(backtracks);
4040
0
SLJIT_UNUSED_ARG(options);
4041
0
SLJIT_ASSERT(min <= max);
4042
4043
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4044
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4045
4046
0
#ifdef SUPPORT_UNICODE
4047
0
#if PCRE2_CODE_UNIT_WIDTH == 8
4048
0
if (common->utf)
4049
0
  {
4050
0
  if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4051
4052
0
  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4053
0
    {
4054
0
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4055
4056
0
    if (options & READ_CHAR_UTF8_NEWLINE)
4057
0
      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4058
0
    else
4059
0
      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4060
4061
0
    if (backtracks != NULL)
4062
0
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4063
0
    JUMPHERE(jump);
4064
0
    return;
4065
0
    }
4066
4067
0
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4068
0
  if (min >= 0x10000)
4069
0
    {
4070
0
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4071
0
    if (options & READ_CHAR_UPDATE_STR_PTR)
4072
0
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4073
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4074
0
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
4075
0
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4076
0
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4077
0
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4078
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4079
0
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4080
0
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4081
0
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4082
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4083
0
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4084
0
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4085
0
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4086
0
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4087
0
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4088
0
    JUMPHERE(jump2);
4089
0
    if (options & READ_CHAR_UPDATE_STR_PTR)
4090
0
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4091
0
    }
4092
0
  else if (min >= 0x800 && max <= 0xffff)
4093
0
    {
4094
0
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4095
0
    if (options & READ_CHAR_UPDATE_STR_PTR)
4096
0
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4097
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4098
0
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
4099
0
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4100
0
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4101
0
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4102
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4103
0
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4104
0
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4105
0
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4106
0
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4107
0
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4108
0
    JUMPHERE(jump2);
4109
0
    if (options & READ_CHAR_UPDATE_STR_PTR)
4110
0
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4111
0
    }
4112
0
  else if (max >= 0x800)
4113
0
    {
4114
0
    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
4115
0
    }
4116
0
  else if (max < 128)
4117
0
    {
4118
0
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4119
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4120
0
    }
4121
0
  else
4122
0
    {
4123
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4124
0
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4125
0
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4126
0
    else
4127
0
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4128
0
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4129
0
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4130
0
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4131
0
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4132
0
    if (options & READ_CHAR_UPDATE_STR_PTR)
4133
0
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4134
0
    }
4135
0
  JUMPHERE(jump);
4136
0
  }
4137
#elif PCRE2_CODE_UNIT_WIDTH == 16
4138
if (common->utf)
4139
  {
4140
  if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4141
4142
  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4143
    {
4144
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4145
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4146
4147
    if (options & READ_CHAR_UTF8_NEWLINE)
4148
      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4149
    else
4150
      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4151
4152
    if (backtracks != NULL)
4153
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4154
    JUMPHERE(jump);
4155
    return;
4156
    }
4157
4158
  if (max >= 0x10000)
4159
    {
4160
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4161
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4162
    /* TMP2 contains the high surrogate. */
4163
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4164
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4165
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4166
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4167
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4168
    JUMPHERE(jump);
4169
    return;
4170
    }
4171
4172
  /* Skip low surrogate if necessary. */
4173
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4174
4175
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4176
    {
4177
    if (options & READ_CHAR_UPDATE_STR_PTR)
4178
      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4179
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4180
    if (options & READ_CHAR_UPDATE_STR_PTR)
4181
      SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4182
    if (max >= 0xd800)
4183
      SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);
4184
    }
4185
  else
4186
    {
4187
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4188
    if (options & READ_CHAR_UPDATE_STR_PTR)
4189
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4190
    if (max >= 0xd800)
4191
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4192
    JUMPHERE(jump);
4193
    }
4194
  }
4195
#elif PCRE2_CODE_UNIT_WIDTH == 32
4196
if (common->invalid_utf)
4197
  {
4198
  if (backtracks != NULL)
4199
    {
4200
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4201
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4202
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4203
    }
4204
  else
4205
    {
4206
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4207
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4208
    SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4209
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4210
    SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4211
    }
4212
  }
4213
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4214
0
#endif /* SUPPORT_UNICODE */
4215
0
}
4216
4217
static void skip_valid_char(compiler_common *common)
4218
0
{
4219
0
DEFINE_COMPILER;
4220
0
#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4221
0
struct sljit_jump *jump;
4222
0
#endif
4223
4224
0
#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
4225
0
  if (common->utf)
4226
0
    {
4227
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4228
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4229
0
#if PCRE2_CODE_UNIT_WIDTH == 8
4230
0
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4231
0
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4232
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4233
#elif PCRE2_CODE_UNIT_WIDTH == 16
4234
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4235
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4236
    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
4237
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4238
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4239
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4240
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4241
0
    JUMPHERE(jump);
4242
0
    return;
4243
0
    }
4244
0
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
4245
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4246
0
}
4247
4248
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4249
4250
static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4251
0
{
4252
/* Tells whether the character codes below 128 are enough
4253
to determine a match. */
4254
0
const sljit_u8 value = nclass ? 0xff : 0;
4255
0
const sljit_u8 *end = bitset + 32;
4256
4257
0
bitset += 16;
4258
0
do
4259
0
  {
4260
0
  if (*bitset++ != value)
4261
0
    return FALSE;
4262
0
  }
4263
0
while (bitset < end);
4264
0
return TRUE;
4265
0
}
4266
4267
static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4268
0
{
4269
/* Reads the precise character type of a character into TMP1, if the character
4270
is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4271
full_read argument tells whether characters above max are accepted or not. */
4272
0
DEFINE_COMPILER;
4273
0
struct sljit_jump *jump;
4274
4275
0
SLJIT_ASSERT(common->utf);
4276
4277
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4278
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4279
4280
/* All values > 127 are zero in ctypes. */
4281
0
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4282
4283
0
if (negated)
4284
0
  {
4285
0
  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4286
4287
0
  if (common->invalid_utf)
4288
0
    {
4289
0
    OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);
4290
0
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4291
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4292
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4293
0
    }
4294
0
  else
4295
0
    {
4296
0
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4297
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4298
0
    }
4299
0
  JUMPHERE(jump);
4300
0
  }
4301
0
}
4302
4303
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4304
4305
static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4306
0
{
4307
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4308
0
DEFINE_COMPILER;
4309
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4310
0
struct sljit_jump *jump;
4311
0
#endif
4312
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4313
0
struct sljit_jump *jump2;
4314
0
#endif
4315
4316
0
SLJIT_UNUSED_ARG(backtracks);
4317
0
SLJIT_UNUSED_ARG(negated);
4318
4319
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4320
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4321
4322
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4323
0
if (common->utf)
4324
0
  {
4325
  /* The result of this read may be unused, but saves an "else" part. */
4326
0
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4327
0
  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4328
4329
0
  if (!negated)
4330
0
    {
4331
0
    if (common->invalid_utf)
4332
0
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4333
4334
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4335
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4336
0
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4337
0
    if (common->invalid_utf)
4338
0
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4339
4340
0
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4341
0
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4342
0
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4343
0
    if (common->invalid_utf)
4344
0
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4345
4346
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4347
0
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4348
0
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4349
0
    JUMPHERE(jump2);
4350
0
    }
4351
0
  else if (common->invalid_utf)
4352
0
    {
4353
0
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4354
0
    OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4355
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4356
4357
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4358
0
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4359
0
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4360
0
    JUMPHERE(jump2);
4361
0
    }
4362
0
  else
4363
0
    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4364
4365
0
  JUMPHERE(jump);
4366
0
  return;
4367
0
  }
4368
0
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4369
4370
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4371
if (common->invalid_utf && negated)
4372
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4373
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4374
4375
#if PCRE2_CODE_UNIT_WIDTH != 8
4376
/* The ctypes array contains only 256 values. */
4377
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4378
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4379
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4380
0
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4381
#if PCRE2_CODE_UNIT_WIDTH != 8
4382
JUMPHERE(jump);
4383
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4384
4385
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4386
if (common->utf && negated)
4387
  {
4388
  /* Skip low surrogate if necessary. */
4389
  if (!common->invalid_utf)
4390
    {
4391
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4392
4393
    if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4394
      {
4395
      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4396
      OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4397
      SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
4398
      }
4399
    else
4400
      {
4401
      jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4402
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4403
      JUMPHERE(jump);
4404
      }
4405
    return;
4406
    }
4407
4408
  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4409
  jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4410
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4411
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4412
4413
  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4414
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4415
  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4416
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4417
4418
  JUMPHERE(jump);
4419
  return;
4420
  }
4421
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4422
0
}
4423
4424
static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4425
0
{
4426
/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4427
TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4428
and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4429
0
DEFINE_COMPILER;
4430
4431
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4432
0
struct sljit_jump *jump;
4433
0
#endif
4434
4435
0
#ifdef SUPPORT_UNICODE
4436
0
#if PCRE2_CODE_UNIT_WIDTH == 8
4437
0
struct sljit_label *label;
4438
4439
0
if (common->utf)
4440
0
  {
4441
0
  if (!must_be_valid && common->invalid_utf)
4442
0
    {
4443
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4444
0
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4445
0
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4446
0
    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4447
0
    if (backtracks != NULL)
4448
0
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4449
0
    JUMPHERE(jump);
4450
0
    return;
4451
0
    }
4452
4453
0
  label = LABEL();
4454
0
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4455
0
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4456
0
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4457
0
  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4458
0
  return;
4459
0
  }
4460
#elif PCRE2_CODE_UNIT_WIDTH == 16
4461
if (common->utf)
4462
  {
4463
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4464
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4465
4466
  if (!must_be_valid && common->invalid_utf)
4467
    {
4468
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4469
    jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4470
    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4471
    if (backtracks != NULL)
4472
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4473
    JUMPHERE(jump);
4474
    return;
4475
    }
4476
4477
  /* Skip low surrogate if necessary. */
4478
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4479
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4480
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4481
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4482
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4483
  return;
4484
  }
4485
#elif PCRE2_CODE_UNIT_WIDTH == 32
4486
if (common->invalid_utf && !must_be_valid)
4487
  {
4488
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4489
  if (backtracks != NULL)
4490
    {
4491
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4492
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4493
    return;
4494
    }
4495
4496
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4497
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4498
  OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4499
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4500
  return;
4501
  }
4502
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4503
0
#endif /* SUPPORT_UNICODE */
4504
4505
0
SLJIT_UNUSED_ARG(backtracks);
4506
0
SLJIT_UNUSED_ARG(must_be_valid);
4507
4508
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4509
0
}
4510
4511
static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4512
0
{
4513
/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4514
0
DEFINE_COMPILER;
4515
0
struct sljit_jump *jump;
4516
4517
0
if (nltype == NLTYPE_ANY)
4518
0
  {
4519
0
  add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4520
0
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
4521
0
  add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4522
0
  }
4523
0
else if (nltype == NLTYPE_ANYCRLF)
4524
0
  {
4525
0
  if (jumpifmatch)
4526
0
    {
4527
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4528
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4529
0
    }
4530
0
  else
4531
0
    {
4532
0
    jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4533
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4534
0
    JUMPHERE(jump);
4535
0
    }
4536
0
  }
4537
0
else
4538
0
  {
4539
0
  SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4540
0
  add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4541
0
  }
4542
0
}
4543
4544
#ifdef SUPPORT_UNICODE
4545
4546
#if PCRE2_CODE_UNIT_WIDTH == 8
4547
static void do_utfreadchar(compiler_common *common)
4548
0
{
4549
/* Fast decoding a UTF-8 character. TMP1 contains the first byte
4550
of the character (>= 0xc0). Return char value in TMP1. */
4551
0
DEFINE_COMPILER;
4552
0
struct sljit_jump *jump;
4553
4554
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4555
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4556
0
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4557
0
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4558
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4559
4560
/* Searching for the first zero. */
4561
0
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4562
0
jump = JUMP(SLJIT_NOT_ZERO);
4563
/* Two byte sequence. */
4564
0
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4565
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4566
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4567
4568
0
JUMPHERE(jump);
4569
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4570
0
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4571
0
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4572
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4573
4574
0
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4575
0
jump = JUMP(SLJIT_NOT_ZERO);
4576
/* Three byte sequence. */
4577
0
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4578
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4579
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4580
4581
/* Four byte sequence. */
4582
0
JUMPHERE(jump);
4583
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4584
0
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4585
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4586
0
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4587
0
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4588
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4589
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4590
0
}
4591
4592
static void do_utfreadtype8(compiler_common *common)
4593
0
{
4594
/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4595
of the character (>= 0xc0). Return value in TMP1. */
4596
0
DEFINE_COMPILER;
4597
0
struct sljit_jump *jump;
4598
0
struct sljit_jump *compare;
4599
4600
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4601
4602
0
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4603
0
jump = JUMP(SLJIT_NOT_ZERO);
4604
/* Two byte sequence. */
4605
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4606
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4607
0
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4608
/* The upper 5 bits are known at this point. */
4609
0
compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4610
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4611
0
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4612
0
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4613
0
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4614
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4615
4616
0
JUMPHERE(compare);
4617
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4618
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4619
4620
/* We only have types for characters less than 256. */
4621
0
JUMPHERE(jump);
4622
0
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4623
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4624
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4625
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4626
0
}
4627
4628
static void do_utfreadchar_invalid(compiler_common *common)
4629
0
{
4630
/* Slow decoding a UTF-8 character. TMP1 contains the first byte
4631
of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4632
undefined for invalid characters. */
4633
0
DEFINE_COMPILER;
4634
0
sljit_s32 i;
4635
0
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4636
0
struct sljit_jump *jump;
4637
0
struct sljit_jump *buffer_end_close;
4638
0
struct sljit_label *three_byte_entry;
4639
0
struct sljit_label *exit_invalid_label;
4640
0
struct sljit_jump *exit_invalid[11];
4641
4642
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4643
4644
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4645
4646
/* Usually more than 3 characters remained in the subject buffer. */
4647
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4648
4649
/* Not a valid start of a multi-byte sequence, no more bytes read. */
4650
0
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4651
4652
0
buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4653
4654
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4655
0
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4656
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4657
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4658
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4659
0
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4660
4661
0
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4662
0
jump = JUMP(SLJIT_NOT_ZERO);
4663
4664
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4665
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4666
4667
0
JUMPHERE(jump);
4668
4669
/* Three-byte sequence. */
4670
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4671
0
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4672
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4673
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4674
0
if (has_cmov)
4675
0
  {
4676
0
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4677
0
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);
4678
0
  exit_invalid[2] = NULL;
4679
0
  }
4680
0
else
4681
0
  exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4682
4683
0
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4684
0
jump = JUMP(SLJIT_NOT_ZERO);
4685
4686
0
three_byte_entry = LABEL();
4687
4688
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4689
0
if (has_cmov)
4690
0
  {
4691
0
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4692
0
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);
4693
0
  exit_invalid[3] = NULL;
4694
0
  }
4695
0
else
4696
0
  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4697
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4698
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4699
4700
0
if (has_cmov)
4701
0
  {
4702
0
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4703
0
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4704
0
  exit_invalid[4] = NULL;
4705
0
  }
4706
0
else
4707
0
  exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4708
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4709
4710
0
JUMPHERE(jump);
4711
4712
/* Four-byte sequence. */
4713
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4714
0
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4715
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4716
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4717
0
if (has_cmov)
4718
0
  {
4719
0
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4720
0
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);
4721
0
  exit_invalid[5] = NULL;
4722
0
  }
4723
0
else
4724
0
  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4725
4726
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4727
0
if (has_cmov)
4728
0
  {
4729
0
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4730
0
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
4731
0
  exit_invalid[6] = NULL;
4732
0
  }
4733
0
else
4734
0
  exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4735
4736
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4737
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4738
4739
0
JUMPHERE(buffer_end_close);
4740
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4741
0
exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4742
4743
/* Two-byte sequence. */
4744
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4745
0
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4746
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4747
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4748
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4749
0
exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4750
4751
0
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4752
0
jump = JUMP(SLJIT_NOT_ZERO);
4753
4754
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4755
4756
/* Three-byte sequence. */
4757
0
JUMPHERE(jump);
4758
0
exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4759
4760
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4761
0
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4762
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4763
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4764
0
if (has_cmov)
4765
0
  {
4766
0
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4767
0
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
4768
0
  exit_invalid[10] = NULL;
4769
0
  }
4770
0
else
4771
0
  exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4772
4773
/* One will be substracted from STR_PTR later. */
4774
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4775
4776
/* Four byte sequences are not possible. */
4777
0
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4778
4779
0
exit_invalid_label = LABEL();
4780
0
for (i = 0; i < 11; i++)
4781
0
  sljit_set_label(exit_invalid[i], exit_invalid_label);
4782
4783
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4784
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4785
0
}
4786
4787
static void do_utfreadnewline_invalid(compiler_common *common)
4788
0
{
4789
/* Slow decoding a UTF-8 character, specialized for newlines.
4790
TMP1 contains the first byte of the character (>= 0xc0). Return
4791
char value in TMP1. */
4792
0
DEFINE_COMPILER;
4793
0
struct sljit_label *loop;
4794
0
struct sljit_label *skip_start;
4795
0
struct sljit_label *three_byte_exit;
4796
0
struct sljit_jump *jump[5];
4797
4798
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4799
4800
0
if (common->nltype != NLTYPE_ANY)
4801
0
  {
4802
0
  SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4803
4804
  /* All newlines are ascii, just skip intermediate octets. */
4805
0
  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4806
0
  loop = LABEL();
4807
0
  if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4808
0
    sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4809
0
  else
4810
0
    {
4811
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4812
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4813
0
    }
4814
4815
0
  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4816
0
  CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4817
0
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4818
4819
0
  JUMPHERE(jump[0]);
4820
4821
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4822
0
  OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4823
0
  return;
4824
0
  }
4825
4826
0
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4827
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4828
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4829
4830
0
jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4831
0
jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4832
4833
0
skip_start = LABEL();
4834
0
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4835
0
jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4836
4837
/* Skip intermediate octets. */
4838
0
loop = LABEL();
4839
0
jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4840
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4841
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4842
0
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4843
0
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4844
4845
0
JUMPHERE(jump[3]);
4846
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4847
4848
0
three_byte_exit = LABEL();
4849
0
JUMPHERE(jump[0]);
4850
0
JUMPHERE(jump[4]);
4851
4852
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4853
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4854
4855
/* Two byte long newline: 0x85. */
4856
0
JUMPHERE(jump[1]);
4857
0
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4858
4859
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4860
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4861
4862
/* Three byte long newlines: 0x2028 and 0x2029. */
4863
0
JUMPHERE(jump[2]);
4864
0
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4865
0
CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4866
4867
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4868
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4869
4870
0
OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4871
0
CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4872
4873
0
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4874
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4875
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4876
0
}
4877
4878
static void do_utfmoveback_invalid(compiler_common *common)
4879
0
{
4880
/* Goes one character back. */
4881
0
DEFINE_COMPILER;
4882
0
sljit_s32 i;
4883
0
struct sljit_jump *jump;
4884
0
struct sljit_jump *buffer_start_close;
4885
0
struct sljit_label *exit_ok_label;
4886
0
struct sljit_label *exit_invalid_label;
4887
0
struct sljit_jump *exit_invalid[7];
4888
4889
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4890
4891
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4892
0
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4893
4894
/* Two-byte sequence. */
4895
0
buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4896
4897
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4898
4899
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4900
0
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4901
4902
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4903
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4904
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4905
4906
/* Three-byte sequence. */
4907
0
JUMPHERE(jump);
4908
0
exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4909
4910
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4911
4912
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4913
0
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4914
4915
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4916
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4917
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4918
4919
/* Four-byte sequence. */
4920
0
JUMPHERE(jump);
4921
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4922
0
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4923
4924
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4925
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4926
0
exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4927
4928
0
exit_ok_label = LABEL();
4929
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4930
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4931
4932
/* Two-byte sequence. */
4933
0
JUMPHERE(buffer_start_close);
4934
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4935
4936
0
exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4937
4938
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4939
4940
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4941
0
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4942
4943
/* Three-byte sequence. */
4944
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4945
0
exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4946
0
exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4947
4948
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4949
4950
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4951
0
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4952
4953
/* Four-byte sequences are not possible. */
4954
4955
0
exit_invalid_label = LABEL();
4956
0
sljit_set_label(exit_invalid[5], exit_invalid_label);
4957
0
sljit_set_label(exit_invalid[6], exit_invalid_label);
4958
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4959
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4960
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4961
4962
0
JUMPHERE(exit_invalid[4]);
4963
/* -2 + 4 = 2 */
4964
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4965
4966
0
exit_invalid_label = LABEL();
4967
0
for (i = 0; i < 4; i++)
4968
0
  sljit_set_label(exit_invalid[i], exit_invalid_label);
4969
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4970
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4971
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4972
0
}
4973
4974
static void do_utfpeakcharback(compiler_common *common)
4975
0
{
4976
/* Peak a character back. Does not modify STR_PTR. */
4977
0
DEFINE_COMPILER;
4978
0
struct sljit_jump *jump[2];
4979
4980
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
4981
4982
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4983
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4984
0
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4985
4986
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4987
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4988
0
jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4989
4990
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4991
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4992
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4993
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4994
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4995
4996
0
JUMPHERE(jump[1]);
4997
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4998
0
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4999
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5000
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5001
5002
0
JUMPHERE(jump[0]);
5003
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5004
0
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
5005
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
5006
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5007
5008
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5009
0
}
5010
5011
static void do_utfpeakcharback_invalid(compiler_common *common)
5012
0
{
5013
/* Peak a character back. Does not modify STR_PTR. */
5014
0
DEFINE_COMPILER;
5015
0
sljit_s32 i;
5016
0
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
5017
0
struct sljit_jump *jump[2];
5018
0
struct sljit_label *two_byte_entry;
5019
0
struct sljit_label *three_byte_entry;
5020
0
struct sljit_label *exit_invalid_label;
5021
0
struct sljit_jump *exit_invalid[8];
5022
5023
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5024
5025
0
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
5026
0
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
5027
0
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5028
5029
/* Two-byte sequence. */
5030
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5031
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5032
0
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
5033
5034
0
two_byte_entry = LABEL();
5035
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5036
/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
5037
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5038
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5039
5040
0
JUMPHERE(jump[1]);
5041
0
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5042
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5043
0
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5044
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5045
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5046
5047
/* Three-byte sequence. */
5048
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5049
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5050
0
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
5051
5052
0
three_byte_entry = LABEL();
5053
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5054
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5055
5056
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5057
0
if (has_cmov)
5058
0
  {
5059
0
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5060
0
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);
5061
0
  exit_invalid[2] = NULL;
5062
0
  }
5063
0
else
5064
0
  exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5065
5066
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5067
0
if (has_cmov)
5068
0
  {
5069
0
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5070
0
  SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
5071
0
  exit_invalid[3] = NULL;
5072
0
  }
5073
0
else
5074
0
  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
5075
5076
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5077
5078
0
JUMPHERE(jump[1]);
5079
0
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
5080
0
exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5081
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
5082
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5083
5084
/* Four-byte sequence. */
5085
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
5086
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5087
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
5088
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
5089
/* ADD is used instead of OR because of the SUB 0x10000 above. */
5090
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5091
5092
0
if (has_cmov)
5093
0
  {
5094
0
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5095
0
  SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
5096
0
  exit_invalid[5] = NULL;
5097
0
  }
5098
0
else
5099
0
  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
5100
5101
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
5102
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5103
5104
0
JUMPHERE(jump[0]);
5105
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5106
0
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5107
5108
/* Two-byte sequence. */
5109
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5110
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5111
0
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5112
5113
0
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
5114
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5115
0
exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
5116
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
5117
0
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
5118
5119
/* Three-byte sequence. */
5120
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
5121
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
5122
0
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
5123
5124
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5125
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5126
5127
0
JUMPHERE(jump[0]);
5128
0
exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
5129
5130
/* Two-byte sequence. */
5131
0
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5132
0
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
5133
0
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
5134
5135
0
exit_invalid_label = LABEL();
5136
0
for (i = 0; i < 8; i++)
5137
0
  sljit_set_label(exit_invalid[i], exit_invalid_label);
5138
5139
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5140
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5141
0
}
5142
5143
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
5144
5145
#if PCRE2_CODE_UNIT_WIDTH == 16
5146
5147
static void do_utfreadchar_invalid(compiler_common *common)
5148
{
5149
/* Slow decoding a UTF-16 character. TMP1 contains the first half
5150
of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
5151
undefined for invalid characters. */
5152
DEFINE_COMPILER;
5153
struct sljit_jump *exit_invalid[3];
5154
5155
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5156
5157
/* TMP2 contains the high surrogate. */
5158
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5159
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5160
5161
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5162
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5163
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5164
5165
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5166
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5167
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5168
5169
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5170
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5171
5172
JUMPHERE(exit_invalid[0]);
5173
JUMPHERE(exit_invalid[1]);
5174
JUMPHERE(exit_invalid[2]);
5175
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5176
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5177
}
5178
5179
static void do_utfreadnewline_invalid(compiler_common *common)
5180
{
5181
/* Slow decoding a UTF-16 character, specialized for newlines.
5182
TMP1 contains the first half of the character (>= 0xd800). Return
5183
char value in TMP1. */
5184
5185
DEFINE_COMPILER;
5186
struct sljit_jump *exit_invalid[2];
5187
5188
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5189
5190
/* TMP2 contains the high surrogate. */
5191
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5192
5193
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5194
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5195
5196
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5197
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5198
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5199
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5200
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5201
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5202
5203
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5204
5205
JUMPHERE(exit_invalid[0]);
5206
JUMPHERE(exit_invalid[1]);
5207
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5208
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5209
}
5210
5211
static void do_utfmoveback_invalid(compiler_common *common)
5212
{
5213
/* Goes one character back. */
5214
DEFINE_COMPILER;
5215
struct sljit_jump *exit_invalid[3];
5216
5217
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5218
5219
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5220
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5221
5222
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5223
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5224
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5225
5226
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5227
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5228
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5229
5230
JUMPHERE(exit_invalid[0]);
5231
JUMPHERE(exit_invalid[1]);
5232
JUMPHERE(exit_invalid[2]);
5233
5234
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5235
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5236
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5237
}
5238
5239
static void do_utfpeakcharback_invalid(compiler_common *common)
5240
{
5241
/* Peak a character back. Does not modify STR_PTR. */
5242
DEFINE_COMPILER;
5243
struct sljit_jump *jump;
5244
struct sljit_jump *exit_invalid[3];
5245
5246
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5247
5248
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5249
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5250
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5251
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5252
5253
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5254
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5255
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5256
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5257
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5258
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5259
5260
JUMPHERE(jump);
5261
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5262
5263
JUMPHERE(exit_invalid[0]);
5264
JUMPHERE(exit_invalid[1]);
5265
JUMPHERE(exit_invalid[2]);
5266
5267
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5268
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5269
}
5270
5271
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5272
5273
/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5274
#define UCD_BLOCK_MASK 127
5275
#define UCD_BLOCK_SHIFT 7
5276
5277
static void do_getucd(compiler_common *common)
5278
0
{
5279
/* Search the UCD record for the character comes in TMP1.
5280
Returns chartype in TMP1 and UCD offset in TMP2. */
5281
0
DEFINE_COMPILER;
5282
#if PCRE2_CODE_UNIT_WIDTH == 32
5283
struct sljit_jump *jump;
5284
#endif
5285
5286
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5287
/* dummy_ucd_record */
5288
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5289
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5290
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5291
#endif
5292
5293
0
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5294
5295
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5296
5297
#if PCRE2_CODE_UNIT_WIDTH == 32
5298
if (!common->utf)
5299
  {
5300
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5301
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5302
  JUMPHERE(jump);
5303
  }
5304
#endif
5305
5306
0
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5307
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5308
0
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5309
0
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5310
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5311
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5312
0
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5313
0
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5314
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5315
0
}
5316
5317
static void do_getucdtype(compiler_common *common)
5318
0
{
5319
/* Search the UCD record for the character comes in TMP1.
5320
Returns chartype in TMP1 and UCD offset in TMP2. */
5321
0
DEFINE_COMPILER;
5322
#if PCRE2_CODE_UNIT_WIDTH == 32
5323
struct sljit_jump *jump;
5324
#endif
5325
5326
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5327
/* dummy_ucd_record */
5328
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5329
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5330
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5331
#endif
5332
5333
0
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5334
5335
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
5336
5337
#if PCRE2_CODE_UNIT_WIDTH == 32
5338
if (!common->utf)
5339
  {
5340
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5341
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5342
  JUMPHERE(jump);
5343
  }
5344
#endif
5345
5346
0
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5347
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5348
0
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5349
0
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5350
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5351
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5352
0
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5353
0
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5354
5355
/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5356
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5357
0
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5358
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5359
0
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5360
5361
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5362
0
}
5363
5364
#endif /* SUPPORT_UNICODE */
5365
5366
static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5367
0
{
5368
0
DEFINE_COMPILER;
5369
0
struct sljit_label *mainloop;
5370
0
struct sljit_label *newlinelabel = NULL;
5371
0
struct sljit_jump *start;
5372
0
struct sljit_jump *end = NULL;
5373
0
struct sljit_jump *end2 = NULL;
5374
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5375
0
struct sljit_label *loop;
5376
0
struct sljit_jump *jump;
5377
0
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5378
0
jump_list *newline = NULL;
5379
0
sljit_u32 overall_options = common->re->overall_options;
5380
0
BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5381
0
BOOL newlinecheck = FALSE;
5382
0
BOOL readuchar = FALSE;
5383
5384
0
if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5385
0
    && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5386
0
  newlinecheck = TRUE;
5387
5388
0
SLJIT_ASSERT(common->abort_label == NULL);
5389
5390
0
if ((overall_options & PCRE2_FIRSTLINE) != 0)
5391
0
  {
5392
  /* Search for the end of the first line. */
5393
0
  SLJIT_ASSERT(common->match_end_ptr != 0);
5394
0
  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5395
5396
0
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5397
0
    {
5398
0
    mainloop = LABEL();
5399
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5400
0
    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5401
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5402
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5403
0
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5404
0
    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5405
0
    JUMPHERE(end);
5406
0
    OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5407
0
    }
5408
0
  else
5409
0
    {
5410
0
    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5411
0
    mainloop = LABEL();
5412
    /* Continual stores does not cause data dependency. */
5413
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5414
0
    read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5415
0
    check_newlinechar(common, common->nltype, &newline, TRUE);
5416
0
    CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5417
0
    JUMPHERE(end);
5418
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5419
0
    set_jumps(newline, LABEL());
5420
0
    }
5421
5422
0
  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5423
0
  }
5424
0
else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5425
0
  {
5426
  /* Check whether offset limit is set and valid. */
5427
0
  SLJIT_ASSERT(common->match_end_ptr != 0);
5428
5429
0
  if (HAS_VIRTUAL_REGISTERS)
5430
0
    {
5431
0
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5432
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5433
0
    }
5434
0
  else
5435
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5436
5437
0
  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5438
0
  end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5439
0
  if (HAS_VIRTUAL_REGISTERS)
5440
0
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5441
0
  else
5442
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5443
5444
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5445
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5446
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5447
0
  if (HAS_VIRTUAL_REGISTERS)
5448
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5449
5450
0
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5451
0
  end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5452
0
  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5453
0
  JUMPHERE(end2);
5454
0
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5455
0
  add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5456
0
  JUMPHERE(end);
5457
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5458
0
  }
5459
5460
0
start = JUMP(SLJIT_JUMP);
5461
5462
0
if (newlinecheck)
5463
0
  {
5464
0
  newlinelabel = LABEL();
5465
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5466
0
  end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5467
0
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5468
0
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5469
0
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5470
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5471
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5472
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5473
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5474
0
  end2 = JUMP(SLJIT_JUMP);
5475
0
  }
5476
5477
0
mainloop = LABEL();
5478
5479
/* Increasing the STR_PTR here requires one less jump in the most common case. */
5480
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5481
0
if (common->utf && !common->invalid_utf) readuchar = TRUE;
5482
0
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5483
0
if (newlinecheck) readuchar = TRUE;
5484
5485
0
if (readuchar)
5486
0
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5487
5488
0
if (newlinecheck)
5489
0
  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5490
5491
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5492
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5493
0
#if PCRE2_CODE_UNIT_WIDTH == 8
5494
0
if (common->invalid_utf)
5495
0
  {
5496
  /* Skip continuation code units. */
5497
0
  loop = LABEL();
5498
0
  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5499
0
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5500
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5501
0
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5502
0
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5503
0
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5504
0
  JUMPHERE(jump);
5505
0
  }
5506
0
else if (common->utf)
5507
0
  {
5508
0
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5509
0
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5510
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5511
0
  JUMPHERE(jump);
5512
0
  }
5513
#elif PCRE2_CODE_UNIT_WIDTH == 16
5514
if (common->invalid_utf)
5515
  {
5516
  /* Skip continuation code units. */
5517
  loop = LABEL();
5518
  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5519
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5520
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5521
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5522
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5523
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5524
  JUMPHERE(jump);
5525
  }
5526
else if (common->utf)
5527
  {
5528
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5529
5530
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5531
    {
5532
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5533
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5534
    SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
5535
    }
5536
  else
5537
    {
5538
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5539
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5540
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5541
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5542
    }
5543
  }
5544
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5545
0
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5546
0
JUMPHERE(start);
5547
5548
0
if (newlinecheck)
5549
0
  {
5550
0
  JUMPHERE(end);
5551
0
  JUMPHERE(end2);
5552
0
  }
5553
5554
0
return mainloop;
5555
0
}
5556
5557
5558
static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5559
0
{
5560
0
sljit_u32 i, count = chars->count;
5561
5562
0
if (count == 255)
5563
0
  return;
5564
5565
0
if (count == 0)
5566
0
  {
5567
0
  chars->count = 1;
5568
0
  chars->chars[0] = chr;
5569
5570
0
  if (last)
5571
0
    chars->last_count = 1;
5572
0
  return;
5573
0
  }
5574
5575
0
for (i = 0; i < count; i++)
5576
0
  if (chars->chars[i] == chr)
5577
0
    return;
5578
5579
0
if (count >= MAX_DIFF_CHARS)
5580
0
  {
5581
0
  chars->count = 255;
5582
0
  return;
5583
0
  }
5584
5585
0
chars->chars[count] = chr;
5586
0
chars->count = count + 1;
5587
5588
0
if (last)
5589
0
  chars->last_count++;
5590
0
}
5591
5592
static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5593
0
{
5594
/* Recursive function, which scans prefix literals. */
5595
0
BOOL last, any, class, caseless;
5596
0
int len, repeat, len_save, consumed = 0;
5597
0
sljit_u32 chr; /* Any unicode character. */
5598
0
sljit_u8 *bytes, *bytes_end, byte;
5599
0
PCRE2_SPTR alternative, cc_save, oc;
5600
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5601
0
PCRE2_UCHAR othercase[4];
5602
#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5603
PCRE2_UCHAR othercase[2];
5604
#else
5605
PCRE2_UCHAR othercase[1];
5606
#endif
5607
5608
0
repeat = 1;
5609
0
while (TRUE)
5610
0
  {
5611
0
  if (*rec_count == 0)
5612
0
    return 0;
5613
0
  (*rec_count)--;
5614
5615
0
  last = TRUE;
5616
0
  any = FALSE;
5617
0
  class = FALSE;
5618
0
  caseless = FALSE;
5619
5620
0
  switch (*cc)
5621
0
    {
5622
0
    case OP_CHARI:
5623
0
    caseless = TRUE;
5624
    /* Fall through */
5625
0
    case OP_CHAR:
5626
0
    last = FALSE;
5627
0
    cc++;
5628
0
    break;
5629
5630
0
    case OP_SOD:
5631
0
    case OP_SOM:
5632
0
    case OP_SET_SOM:
5633
0
    case OP_NOT_WORD_BOUNDARY:
5634
0
    case OP_WORD_BOUNDARY:
5635
0
    case OP_EODN:
5636
0
    case OP_EOD:
5637
0
    case OP_CIRC:
5638
0
    case OP_CIRCM:
5639
0
    case OP_DOLL:
5640
0
    case OP_DOLLM:
5641
0
    case OP_NOT_UCP_WORD_BOUNDARY:
5642
0
    case OP_UCP_WORD_BOUNDARY:
5643
    /* Zero width assertions. */
5644
0
    cc++;
5645
0
    continue;
5646
5647
0
    case OP_ASSERT:
5648
0
    case OP_ASSERT_NOT:
5649
0
    case OP_ASSERTBACK:
5650
0
    case OP_ASSERTBACK_NOT:
5651
0
    case OP_ASSERT_NA:
5652
0
    case OP_ASSERTBACK_NA:
5653
0
    cc = bracketend(cc);
5654
0
    continue;
5655
5656
0
    case OP_PLUSI:
5657
0
    case OP_MINPLUSI:
5658
0
    case OP_POSPLUSI:
5659
0
    caseless = TRUE;
5660
    /* Fall through */
5661
0
    case OP_PLUS:
5662
0
    case OP_MINPLUS:
5663
0
    case OP_POSPLUS:
5664
0
    cc++;
5665
0
    break;
5666
5667
0
    case OP_EXACTI:
5668
0
    caseless = TRUE;
5669
    /* Fall through */
5670
0
    case OP_EXACT:
5671
0
    repeat = GET2(cc, 1);
5672
0
    last = FALSE;
5673
0
    cc += 1 + IMM2_SIZE;
5674
0
    break;
5675
5676
0
    case OP_QUERYI:
5677
0
    case OP_MINQUERYI:
5678
0
    case OP_POSQUERYI:
5679
0
    caseless = TRUE;
5680
    /* Fall through */
5681
0
    case OP_QUERY:
5682
0
    case OP_MINQUERY:
5683
0
    case OP_POSQUERY:
5684
0
    len = 1;
5685
0
    cc++;
5686
0
#ifdef SUPPORT_UNICODE
5687
0
    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5688
0
#endif
5689
0
    max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5690
0
    if (max_chars == 0)
5691
0
      return consumed;
5692
0
    last = FALSE;
5693
0
    break;
5694
5695
0
    case OP_KET:
5696
0
    cc += 1 + LINK_SIZE;
5697
0
    continue;
5698
5699
0
    case OP_ALT:
5700
0
    cc += GET(cc, 1);
5701
0
    continue;
5702
5703
0
    case OP_ONCE:
5704
0
    case OP_BRA:
5705
0
    case OP_BRAPOS:
5706
0
    case OP_CBRA:
5707
0
    case OP_CBRAPOS:
5708
0
    alternative = cc + GET(cc, 1);
5709
0
    while (*alternative == OP_ALT)
5710
0
      {
5711
0
      max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5712
0
      if (max_chars == 0)
5713
0
        return consumed;
5714
0
      alternative += GET(alternative, 1);
5715
0
      }
5716
5717
0
    if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5718
0
      cc += IMM2_SIZE;
5719
0
    cc += 1 + LINK_SIZE;
5720
0
    continue;
5721
5722
0
    case OP_CLASS:
5723
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5724
0
    if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5725
0
      return consumed;
5726
0
#endif
5727
0
    class = TRUE;
5728
0
    break;
5729
5730
0
    case OP_NCLASS:
5731
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5732
0
    if (common->utf) return consumed;
5733
0
#endif
5734
0
    class = TRUE;
5735
0
    break;
5736
5737
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5738
0
    case OP_XCLASS:
5739
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5740
0
    if (common->utf) return consumed;
5741
0
#endif
5742
0
    any = TRUE;
5743
0
    cc += GET(cc, 1);
5744
0
    break;
5745
0
#endif
5746
5747
0
    case OP_DIGIT:
5748
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5749
0
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5750
0
      return consumed;
5751
0
#endif
5752
0
    any = TRUE;
5753
0
    cc++;
5754
0
    break;
5755
5756
0
    case OP_WHITESPACE:
5757
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5758
0
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5759
0
      return consumed;
5760
0
#endif
5761
0
    any = TRUE;
5762
0
    cc++;
5763
0
    break;
5764
5765
0
    case OP_WORDCHAR:
5766
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5767
0
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5768
0
      return consumed;
5769
0
#endif
5770
0
    any = TRUE;
5771
0
    cc++;
5772
0
    break;
5773
5774
0
    case OP_NOT:
5775
0
    case OP_NOTI:
5776
0
    cc++;
5777
    /* Fall through. */
5778
0
    case OP_NOT_DIGIT:
5779
0
    case OP_NOT_WHITESPACE:
5780
0
    case OP_NOT_WORDCHAR:
5781
0
    case OP_ANY:
5782
0
    case OP_ALLANY:
5783
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5784
0
    if (common->utf) return consumed;
5785
0
#endif
5786
0
    any = TRUE;
5787
0
    cc++;
5788
0
    break;
5789
5790
0
#ifdef SUPPORT_UNICODE
5791
0
    case OP_NOTPROP:
5792
0
    case OP_PROP:
5793
0
#if PCRE2_CODE_UNIT_WIDTH != 32
5794
0
    if (common->utf) return consumed;
5795
0
#endif
5796
0
    any = TRUE;
5797
0
    cc += 1 + 2;
5798
0
    break;
5799
0
#endif
5800
5801
0
    case OP_TYPEEXACT:
5802
0
    repeat = GET2(cc, 1);
5803
0
    cc += 1 + IMM2_SIZE;
5804
0
    continue;
5805
5806
0
    case OP_NOTEXACT:
5807
0
    case OP_NOTEXACTI:
5808
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5809
0
    if (common->utf) return consumed;
5810
0
#endif
5811
0
    any = TRUE;
5812
0
    repeat = GET2(cc, 1);
5813
0
    cc += 1 + IMM2_SIZE + 1;
5814
0
    break;
5815
5816
0
    default:
5817
0
    return consumed;
5818
0
    }
5819
5820
0
  if (any)
5821
0
    {
5822
0
    do
5823
0
      {
5824
0
      chars->count = 255;
5825
5826
0
      consumed++;
5827
0
      if (--max_chars == 0)
5828
0
        return consumed;
5829
0
      chars++;
5830
0
      }
5831
0
    while (--repeat > 0);
5832
5833
0
    repeat = 1;
5834
0
    continue;
5835
0
    }
5836
5837
0
  if (class)
5838
0
    {
5839
0
    bytes = (sljit_u8*) (cc + 1);
5840
0
    cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5841
5842
0
    switch (*cc)
5843
0
      {
5844
0
      case OP_CRSTAR:
5845
0
      case OP_CRMINSTAR:
5846
0
      case OP_CRPOSSTAR:
5847
0
      case OP_CRQUERY:
5848
0
      case OP_CRMINQUERY:
5849
0
      case OP_CRPOSQUERY:
5850
0
      max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5851
0
      if (max_chars == 0)
5852
0
        return consumed;
5853
0
      break;
5854
5855
0
      default:
5856
0
      case OP_CRPLUS:
5857
0
      case OP_CRMINPLUS:
5858
0
      case OP_CRPOSPLUS:
5859
0
      break;
5860
5861
0
      case OP_CRRANGE:
5862
0
      case OP_CRMINRANGE:
5863
0
      case OP_CRPOSRANGE:
5864
0
      repeat = GET2(cc, 1);
5865
0
      if (repeat <= 0)
5866
0
        return consumed;
5867
0
      break;
5868
0
      }
5869
5870
0
    do
5871
0
      {
5872
0
      if (bytes[31] & 0x80)
5873
0
        chars->count = 255;
5874
0
      else if (chars->count != 255)
5875
0
        {
5876
0
        bytes_end = bytes + 32;
5877
0
        chr = 0;
5878
0
        do
5879
0
          {
5880
0
          byte = *bytes++;
5881
0
          SLJIT_ASSERT((chr & 0x7) == 0);
5882
0
          if (byte == 0)
5883
0
            chr += 8;
5884
0
          else
5885
0
            {
5886
0
            do
5887
0
              {
5888
0
              if ((byte & 0x1) != 0)
5889
0
                add_prefix_char(chr, chars, TRUE);
5890
0
              byte >>= 1;
5891
0
              chr++;
5892
0
              }
5893
0
            while (byte != 0);
5894
0
            chr = (chr + 7) & (sljit_u32)(~7);
5895
0
            }
5896
0
          }
5897
0
        while (chars->count != 255 && bytes < bytes_end);
5898
0
        bytes = bytes_end - 32;
5899
0
        }
5900
5901
0
      consumed++;
5902
0
      if (--max_chars == 0)
5903
0
        return consumed;
5904
0
      chars++;
5905
0
      }
5906
0
    while (--repeat > 0);
5907
5908
0
    switch (*cc)
5909
0
      {
5910
0
      case OP_CRSTAR:
5911
0
      case OP_CRMINSTAR:
5912
0
      case OP_CRPOSSTAR:
5913
0
      return consumed;
5914
5915
0
      case OP_CRQUERY:
5916
0
      case OP_CRMINQUERY:
5917
0
      case OP_CRPOSQUERY:
5918
0
      cc++;
5919
0
      break;
5920
5921
0
      case OP_CRRANGE:
5922
0
      case OP_CRMINRANGE:
5923
0
      case OP_CRPOSRANGE:
5924
0
      if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5925
0
        return consumed;
5926
0
      cc += 1 + 2 * IMM2_SIZE;
5927
0
      break;
5928
0
      }
5929
5930
0
    repeat = 1;
5931
0
    continue;
5932
0
    }
5933
5934
0
  len = 1;
5935
0
#ifdef SUPPORT_UNICODE
5936
0
  if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5937
0
#endif
5938
5939
0
  if (caseless && char_has_othercase(common, cc))
5940
0
    {
5941
0
#ifdef SUPPORT_UNICODE
5942
0
    if (common->utf)
5943
0
      {
5944
0
      GETCHAR(chr, cc);
5945
0
      if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5946
0
        return consumed;
5947
0
      }
5948
0
    else
5949
0
#endif
5950
0
      {
5951
0
      chr = *cc;
5952
0
#ifdef SUPPORT_UNICODE
5953
0
      if (common->ucp && chr > 127)
5954
0
        {
5955
0
        chr = UCD_OTHERCASE(chr);
5956
0
        othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc;
5957
0
        }
5958
0
      else
5959
0
#endif
5960
0
        othercase[0] = TABLE_GET(chr, common->fcc, chr);
5961
0
      }
5962
0
    }
5963
0
  else
5964
0
    {
5965
0
    caseless = FALSE;
5966
0
    othercase[0] = 0; /* Stops compiler warning - PH */
5967
0
    }
5968
5969
0
  len_save = len;
5970
0
  cc_save = cc;
5971
0
  while (TRUE)
5972
0
    {
5973
0
    oc = othercase;
5974
0
    do
5975
0
      {
5976
0
      len--;
5977
0
      consumed++;
5978
5979
0
      chr = *cc;
5980
0
      add_prefix_char(*cc, chars, len == 0);
5981
5982
0
      if (caseless)
5983
0
        add_prefix_char(*oc, chars, len == 0);
5984
5985
0
      if (--max_chars == 0)
5986
0
        return consumed;
5987
0
      chars++;
5988
0
      cc++;
5989
0
      oc++;
5990
0
      }
5991
0
    while (len > 0);
5992
5993
0
    if (--repeat == 0)
5994
0
      break;
5995
5996
0
    len = len_save;
5997
0
    cc = cc_save;
5998
0
    }
5999
6000
0
  repeat = 1;
6001
0
  if (last)
6002
0
    return consumed;
6003
0
  }
6004
0
}
6005
6006
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6007
static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
6008
0
{
6009
0
#if PCRE2_CODE_UNIT_WIDTH == 8
6010
0
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
6011
0
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
6012
#elif PCRE2_CODE_UNIT_WIDTH == 16
6013
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
6014
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
6015
#else
6016
#error "Unknown code width"
6017
#endif
6018
0
}
6019
#endif
6020
6021
#include "pcre2_jit_simd_inc.h"
6022
6023
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6024
6025
static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
6026
0
{
6027
0
  sljit_s32 i, j, max_i = 0, max_j = 0;
6028
0
  sljit_u32 max_pri = 0;
6029
0
  sljit_s32 max_offset = max_fast_forward_char_pair_offset();
6030
0
  PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
6031
6032
0
  for (i = max - 1; i >= 1; i--)
6033
0
    {
6034
0
    if (chars[i].last_count > 2)
6035
0
      {
6036
0
      a1 = chars[i].chars[0];
6037
0
      a2 = chars[i].chars[1];
6038
0
      a_pri = chars[i].last_count;
6039
6040
0
      j = i - max_offset;
6041
0
      if (j < 0)
6042
0
        j = 0;
6043
6044
0
      while (j < i)
6045
0
        {
6046
0
        b_pri = chars[j].last_count;
6047
0
        if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
6048
0
          {
6049
0
          b1 = chars[j].chars[0];
6050
0
          b2 = chars[j].chars[1];
6051
6052
0
          if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
6053
0
            {
6054
0
            max_pri = a_pri + b_pri;
6055
0
            max_i = i;
6056
0
            max_j = j;
6057
0
            }
6058
0
          }
6059
0
        j++;
6060
0
        }
6061
0
      }
6062
0
    }
6063
6064
0
if (max_pri == 0)
6065
0
  return FALSE;
6066
6067
0
fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
6068
0
return TRUE;
6069
0
}
6070
6071
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6072
6073
static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
6074
0
{
6075
0
DEFINE_COMPILER;
6076
0
struct sljit_label *start;
6077
0
struct sljit_jump *match;
6078
0
struct sljit_jump *partial_quit;
6079
0
PCRE2_UCHAR mask;
6080
0
BOOL has_match_end = (common->match_end_ptr != 0);
6081
6082
0
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
6083
6084
0
if (has_match_end)
6085
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6086
6087
0
if (offset > 0)
6088
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6089
6090
0
if (has_match_end)
6091
0
  {
6092
0
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6093
6094
0
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6095
0
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6096
0
  SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6097
0
  }
6098
6099
0
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6100
6101
0
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
6102
0
  {
6103
0
  fast_forward_char_simd(common, char1, char2, offset);
6104
6105
0
  if (offset > 0)
6106
0
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
6107
6108
0
  if (has_match_end)
6109
0
    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6110
0
  return;
6111
0
  }
6112
6113
0
#endif
6114
6115
0
start = LABEL();
6116
6117
0
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6118
0
if (common->mode == PCRE2_JIT_COMPLETE)
6119
0
  add_jump(compiler, &common->failed_match, partial_quit);
6120
6121
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6122
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6123
6124
0
if (char1 == char2)
6125
0
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
6126
0
else
6127
0
  {
6128
0
  mask = char1 ^ char2;
6129
0
  if (is_powerof2(mask))
6130
0
    {
6131
0
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6132
0
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
6133
0
    }
6134
0
  else
6135
0
    {
6136
0
    match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
6137
0
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
6138
0
    JUMPHERE(match);
6139
0
    }
6140
0
  }
6141
6142
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6143
0
if (common->utf && offset > 0)
6144
0
  {
6145
0
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
6146
0
  jumpto_if_not_utf_char_start(compiler, TMP1, start);
6147
0
  }
6148
0
#endif
6149
6150
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
6151
6152
0
if (common->mode != PCRE2_JIT_COMPLETE)
6153
0
  JUMPHERE(partial_quit);
6154
6155
0
if (has_match_end)
6156
0
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6157
0
}
6158
6159
static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6160
0
{
6161
0
DEFINE_COMPILER;
6162
0
struct sljit_label *start;
6163
0
struct sljit_jump *match;
6164
0
fast_forward_char_data chars[MAX_N_CHARS];
6165
0
sljit_s32 offset;
6166
0
PCRE2_UCHAR mask;
6167
0
PCRE2_UCHAR *char_set, *char_set_end;
6168
0
int i, max, from;
6169
0
int range_right = -1, range_len;
6170
0
sljit_u8 *update_table = NULL;
6171
0
BOOL in_range;
6172
0
sljit_u32 rec_count;
6173
6174
0
for (i = 0; i < MAX_N_CHARS; i++)
6175
0
  {
6176
0
  chars[i].count = 0;
6177
0
  chars[i].last_count = 0;
6178
0
  }
6179
6180
0
rec_count = 10000;
6181
0
max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6182
6183
0
if (max < 1)
6184
0
  return FALSE;
6185
6186
/* Convert last_count to priority. */
6187
0
for (i = 0; i < max; i++)
6188
0
  {
6189
0
  SLJIT_ASSERT(chars[i].last_count <= chars[i].count);
6190
6191
0
  switch (chars[i].count)
6192
0
    {
6193
0
    case 0:
6194
0
    chars[i].count = 255;
6195
0
    chars[i].last_count = 0;
6196
0
    break;
6197
6198
0
    case 1:
6199
0
    chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6200
    /* Simplifies algorithms later. */
6201
0
    chars[i].chars[1] = chars[i].chars[0];
6202
0
    break;
6203
6204
0
    case 2:
6205
0
    SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6206
6207
0
    if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6208
0
      chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6209
0
    else
6210
0
      chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6211
0
    break;
6212
6213
0
    default:
6214
0
    chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6215
0
    break;
6216
0
    }
6217
0
  }
6218
6219
0
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6220
0
if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6221
0
  return TRUE;
6222
0
#endif
6223
6224
0
in_range = FALSE;
6225
/* Prevent compiler "uninitialized" warning */
6226
0
from = 0;
6227
0
range_len = 4 /* minimum length */ - 1;
6228
0
for (i = 0; i <= max; i++)
6229
0
  {
6230
0
  if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6231
0
    {
6232
0
    range_len = i - from;
6233
0
    range_right = i - 1;
6234
0
    }
6235
6236
0
  if (i < max && chars[i].count < 255)
6237
0
    {
6238
0
    SLJIT_ASSERT(chars[i].count > 0);
6239
0
    if (!in_range)
6240
0
      {
6241
0
      in_range = TRUE;
6242
0
      from = i;
6243
0
      }
6244
0
    }
6245
0
  else
6246
0
    in_range = FALSE;
6247
0
  }
6248
6249
0
if (range_right >= 0)
6250
0
  {
6251
0
  update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6252
0
  if (update_table == NULL)
6253
0
    return TRUE;
6254
0
  memset(update_table, IN_UCHARS(range_len), 256);
6255
6256
0
  for (i = 0; i < range_len; i++)
6257
0
    {
6258
0
    SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6259
6260
0
    char_set = chars[range_right - i].chars;
6261
0
    char_set_end = char_set + chars[range_right - i].count;
6262
0
    do
6263
0
      {
6264
0
      if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6265
0
        update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6266
0
      char_set++;
6267
0
      }
6268
0
    while (char_set < char_set_end);
6269
0
    }
6270
0
  }
6271
6272
0
offset = -1;
6273
/* Scan forward. */
6274
0
for (i = 0; i < max; i++)
6275
0
  {
6276
0
  if (range_right == i)
6277
0
    continue;
6278
6279
0
  if (offset == -1)
6280
0
    {
6281
0
    if (chars[i].last_count >= 2)
6282
0
      offset = i;
6283
0
    }
6284
0
  else if (chars[offset].last_count < chars[i].last_count)
6285
0
    offset = i;
6286
0
  }
6287
6288
0
SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6289
6290
0
if (range_right < 0)
6291
0
  {
6292
0
  if (offset < 0)
6293
0
    return FALSE;
6294
  /* Works regardless the value is 1 or 2. */
6295
0
  fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6296
0
  return TRUE;
6297
0
  }
6298
6299
0
SLJIT_ASSERT(range_right != offset);
6300
6301
0
if (common->match_end_ptr != 0)
6302
0
  {
6303
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6304
0
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6305
0
  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6306
0
  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6307
0
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6308
0
  SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6309
0
  }
6310
0
else
6311
0
  {
6312
0
  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6313
0
  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6314
0
  }
6315
6316
0
SLJIT_ASSERT(range_right >= 0);
6317
6318
0
if (!HAS_VIRTUAL_REGISTERS)
6319
0
  OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6320
6321
0
start = LABEL();
6322
0
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6323
6324
0
#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6325
0
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6326
#else
6327
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6328
#endif
6329
6330
0
if (!HAS_VIRTUAL_REGISTERS)
6331
0
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6332
0
else
6333
0
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6334
6335
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6336
0
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6337
6338
0
if (offset >= 0)
6339
0
  {
6340
0
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6341
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6342
6343
0
  if (chars[offset].count == 1)
6344
0
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6345
0
  else
6346
0
    {
6347
0
    mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6348
0
    if (is_powerof2(mask))
6349
0
      {
6350
0
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6351
0
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6352
0
      }
6353
0
    else
6354
0
      {
6355
0
      match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6356
0
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6357
0
      JUMPHERE(match);
6358
0
      }
6359
0
    }
6360
0
  }
6361
6362
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6363
0
if (common->utf && offset != 0)
6364
0
  {
6365
0
  if (offset < 0)
6366
0
    {
6367
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6368
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6369
0
    }
6370
0
  else
6371
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6372
6373
0
  jumpto_if_not_utf_char_start(compiler, TMP1, start);
6374
6375
0
  if (offset < 0)
6376
0
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6377
0
  }
6378
0
#endif
6379
6380
0
if (offset >= 0)
6381
0
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6382
6383
0
if (common->match_end_ptr != 0)
6384
0
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6385
0
else
6386
0
  OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6387
0
return TRUE;
6388
0
}
6389
6390
static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6391
0
{
6392
0
PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6393
0
PCRE2_UCHAR oc;
6394
6395
0
oc = first_char;
6396
0
if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6397
0
  {
6398
0
  oc = TABLE_GET(first_char, common->fcc, first_char);
6399
0
#if defined SUPPORT_UNICODE
6400
0
  if (first_char > 127 && (common->utf || common->ucp))
6401
0
    oc = UCD_OTHERCASE(first_char);
6402
0
#endif
6403
0
  }
6404
6405
0
fast_forward_first_char2(common, first_char, oc, 0);
6406
0
}
6407
6408
static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6409
0
{
6410
0
DEFINE_COMPILER;
6411
0
struct sljit_label *loop;
6412
0
struct sljit_jump *lastchar = NULL;
6413
0
struct sljit_jump *firstchar;
6414
0
struct sljit_jump *quit = NULL;
6415
0
struct sljit_jump *foundcr = NULL;
6416
0
struct sljit_jump *notfoundnl;
6417
0
jump_list *newline = NULL;
6418
6419
0
if (common->match_end_ptr != 0)
6420
0
  {
6421
0
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6422
0
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6423
0
  }
6424
6425
0
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6426
0
  {
6427
0
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6428
0
  if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6429
0
    {
6430
0
    if (HAS_VIRTUAL_REGISTERS)
6431
0
      {
6432
0
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6433
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6434
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6435
0
      }
6436
0
    else
6437
0
      {
6438
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6439
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6440
0
      }
6441
0
    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6442
6443
0
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6444
0
    OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6445
0
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6446
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6447
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6448
#endif
6449
0
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6450
6451
0
    fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6452
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6453
0
    }
6454
0
  else
6455
0
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6456
0
    {
6457
0
    lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6458
0
    if (HAS_VIRTUAL_REGISTERS)
6459
0
      {
6460
0
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6461
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6462
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6463
0
      }
6464
0
    else
6465
0
      {
6466
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6467
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6468
0
      }
6469
0
    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6470
6471
0
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6472
0
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6473
0
    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6474
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6475
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6476
#endif
6477
0
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6478
6479
0
    loop = LABEL();
6480
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6481
0
    quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6482
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6483
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6484
0
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6485
0
    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6486
6487
0
    JUMPHERE(quit);
6488
0
    JUMPHERE(lastchar);
6489
0
    }
6490
6491
0
  JUMPHERE(firstchar);
6492
6493
0
  if (common->match_end_ptr != 0)
6494
0
    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6495
0
  return;
6496
0
  }
6497
6498
0
if (HAS_VIRTUAL_REGISTERS)
6499
0
  {
6500
0
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6501
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6502
0
  }
6503
0
else
6504
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6505
6506
/* Example: match /^/ to \r\n from offset 1. */
6507
0
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6508
6509
0
if (common->nltype == NLTYPE_ANY)
6510
0
  move_back(common, NULL, FALSE);
6511
0
else
6512
0
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6513
6514
0
loop = LABEL();
6515
0
common->ff_newline_shortcut = loop;
6516
6517
0
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6518
0
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6519
0
  {
6520
0
  if (common->nltype == NLTYPE_ANYCRLF)
6521
0
    {
6522
0
    fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6523
0
    if (common->mode != PCRE2_JIT_COMPLETE)
6524
0
      lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6525
6526
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6527
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6528
0
    quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6529
0
    }
6530
0
   else
6531
0
    {
6532
0
    fast_forward_char_simd(common, common->newline, common->newline, 0);
6533
6534
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6535
0
    if (common->mode != PCRE2_JIT_COMPLETE)
6536
0
      {
6537
0
      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6538
0
      SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
6539
0
      }
6540
0
    }
6541
0
  }
6542
0
else
6543
0
#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6544
0
  {
6545
0
  read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6546
0
  lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6547
0
  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6548
0
    foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6549
0
  check_newlinechar(common, common->nltype, &newline, FALSE);
6550
0
  set_jumps(newline, loop);
6551
0
  }
6552
6553
0
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6554
0
  {
6555
0
  if (quit == NULL)
6556
0
    {
6557
0
    quit = JUMP(SLJIT_JUMP);
6558
0
    JUMPHERE(foundcr);
6559
0
    }
6560
6561
0
  notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6562
0
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6563
0
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6564
0
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6565
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6566
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6567
#endif
6568
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6569
0
  JUMPHERE(notfoundnl);
6570
0
  JUMPHERE(quit);
6571
0
  }
6572
6573
0
if (lastchar)
6574
0
  JUMPHERE(lastchar);
6575
0
JUMPHERE(firstchar);
6576
6577
0
if (common->match_end_ptr != 0)
6578
0
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6579
0
}
6580
6581
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6582
6583
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6584
0
{
6585
0
DEFINE_COMPILER;
6586
0
const sljit_u8 *start_bits = common->re->start_bitmap;
6587
0
struct sljit_label *start;
6588
0
struct sljit_jump *partial_quit;
6589
#if PCRE2_CODE_UNIT_WIDTH != 8
6590
struct sljit_jump *found = NULL;
6591
#endif
6592
0
jump_list *matches = NULL;
6593
6594
0
if (common->match_end_ptr != 0)
6595
0
  {
6596
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6597
0
  OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6598
0
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6599
0
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6600
0
  SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
6601
0
  }
6602
6603
0
start = LABEL();
6604
6605
0
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6606
0
if (common->mode == PCRE2_JIT_COMPLETE)
6607
0
  add_jump(compiler, &common->failed_match, partial_quit);
6608
6609
0
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6610
0
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6611
6612
0
if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6613
0
  {
6614
#if PCRE2_CODE_UNIT_WIDTH != 8
6615
  if ((start_bits[31] & 0x80) != 0)
6616
    found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6617
  else
6618
    CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6619
#elif defined SUPPORT_UNICODE
6620
0
  if (common->utf && is_char7_bitset(start_bits, FALSE))
6621
0
    CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6622
0
#endif
6623
0
  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6624
0
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6625
0
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6626
0
  if (!HAS_VIRTUAL_REGISTERS)
6627
0
    {
6628
0
    OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6629
0
    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6630
0
    }
6631
0
  else
6632
0
    {
6633
0
    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6634
0
    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6635
0
    }
6636
0
  JUMPTO(SLJIT_ZERO, start);
6637
0
  }
6638
0
else
6639
0
  set_jumps(matches, start);
6640
6641
#if PCRE2_CODE_UNIT_WIDTH != 8
6642
if (found != NULL)
6643
  JUMPHERE(found);
6644
#endif
6645
6646
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6647
6648
0
if (common->mode != PCRE2_JIT_COMPLETE)
6649
0
  JUMPHERE(partial_quit);
6650
6651
0
if (common->match_end_ptr != 0)
6652
0
  OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6653
0
}
6654
6655
static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6656
0
{
6657
0
DEFINE_COMPILER;
6658
0
struct sljit_label *loop;
6659
0
struct sljit_jump *toolong;
6660
0
struct sljit_jump *already_found;
6661
0
struct sljit_jump *found;
6662
0
struct sljit_jump *found_oc = NULL;
6663
0
jump_list *not_found = NULL;
6664
0
sljit_u32 oc, bit;
6665
6666
0
SLJIT_ASSERT(common->req_char_ptr != 0);
6667
0
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6668
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6669
0
toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6670
0
already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6671
6672
0
if (has_firstchar)
6673
0
  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6674
0
else
6675
0
  OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6676
6677
0
oc = req_char;
6678
0
if (caseless)
6679
0
  {
6680
0
  oc = TABLE_GET(req_char, common->fcc, req_char);
6681
0
#if defined SUPPORT_UNICODE
6682
0
  if (req_char > 127 && (common->utf || common->ucp))
6683
0
    oc = UCD_OTHERCASE(req_char);
6684
0
#endif
6685
0
  }
6686
6687
0
#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6688
0
if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6689
0
  {
6690
0
  not_found = fast_requested_char_simd(common, req_char, oc);
6691
0
  }
6692
0
else
6693
0
#endif
6694
0
  {
6695
0
  loop = LABEL();
6696
0
  add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6697
6698
0
  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6699
6700
0
  if (req_char == oc)
6701
0
    found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6702
0
  else
6703
0
    {
6704
0
    bit = req_char ^ oc;
6705
0
    if (is_powerof2(bit))
6706
0
      {
6707
0
       OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6708
0
      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6709
0
      }
6710
0
    else
6711
0
      {
6712
0
      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6713
0
      found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6714
0
      }
6715
0
    }
6716
0
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6717
0
  JUMPTO(SLJIT_JUMP, loop);
6718
6719
0
  JUMPHERE(found);
6720
0
  if (found_oc)
6721
0
    JUMPHERE(found_oc);
6722
0
  }
6723
6724
0
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6725
6726
0
JUMPHERE(already_found);
6727
0
JUMPHERE(toolong);
6728
0
return not_found;
6729
0
}
6730
6731
static void do_revertframes(compiler_common *common)
6732
0
{
6733
0
DEFINE_COMPILER;
6734
0
struct sljit_jump *jump;
6735
0
struct sljit_label *mainloop;
6736
6737
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
6738
0
GET_LOCAL_BASE(TMP1, 0, 0);
6739
6740
/* Drop frames until we reach STACK_TOP. */
6741
0
mainloop = LABEL();
6742
0
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
6743
0
OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
6744
0
jump = JUMP(SLJIT_SIG_LESS_EQUAL);
6745
6746
0
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6747
0
if (HAS_VIRTUAL_REGISTERS)
6748
0
  {
6749
0
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6750
0
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6751
0
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6752
0
  }
6753
0
else
6754
0
  {
6755
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6756
0
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6757
0
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6758
0
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6759
0
  GET_LOCAL_BASE(TMP1, 0, 0);
6760
0
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6761
0
  }
6762
0
JUMPTO(SLJIT_JUMP, mainloop);
6763
6764
0
JUMPHERE(jump);
6765
0
sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);
6766
0
jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);
6767
/* End of reverting values. */
6768
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6769
6770
0
JUMPHERE(jump);
6771
0
OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
6772
0
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6773
0
if (HAS_VIRTUAL_REGISTERS)
6774
0
  {
6775
0
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6776
0
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6777
0
  }
6778
0
else
6779
0
  {
6780
0
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6781
0
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6782
0
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6783
0
  }
6784
0
JUMPTO(SLJIT_JUMP, mainloop);
6785
0
}
6786
6787
#ifdef SUPPORT_UNICODE
6788
0
#define UCPCAT(bit) (1 << (bit))
6789
0
#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))
6790
0
#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
6791
0
#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
6792
0
#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
6793
0
#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
6794
0
#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)
6795
#endif
6796
6797
static void check_wordboundary(compiler_common *common, BOOL ucp)
6798
0
{
6799
0
DEFINE_COMPILER;
6800
0
struct sljit_jump *skipread;
6801
0
jump_list *skipread_list = NULL;
6802
0
#ifdef SUPPORT_UNICODE
6803
0
struct sljit_label *valid_utf;
6804
0
jump_list *invalid_utf1 = NULL;
6805
0
#endif /* SUPPORT_UNICODE */
6806
0
jump_list *invalid_utf2 = NULL;
6807
0
#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6808
0
struct sljit_jump *jump;
6809
0
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6810
6811
0
SLJIT_UNUSED_ARG(ucp);
6812
0
SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6813
6814
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6815
/* Get type of the previous char, and put it to TMP3. */
6816
0
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6817
0
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6818
0
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6819
0
skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6820
6821
0
#ifdef SUPPORT_UNICODE
6822
0
if (common->invalid_utf)
6823
0
  {
6824
0
  peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6825
6826
0
  if (common->mode != PCRE2_JIT_COMPLETE)
6827
0
    {
6828
0
    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6829
0
    OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6830
0
    move_back(common, NULL, TRUE);
6831
0
    check_start_used_ptr(common);
6832
0
    OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6833
0
    OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6834
0
    }
6835
0
  }
6836
0
else
6837
0
#endif /* SUPPORT_UNICODE */
6838
0
  {
6839
0
  if (common->mode == PCRE2_JIT_COMPLETE)
6840
0
    peek_char_back(common, READ_CHAR_MAX, NULL);
6841
0
  else
6842
0
    {
6843
0
    move_back(common, NULL, TRUE);
6844
0
    check_start_used_ptr(common);
6845
0
    read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6846
0
    }
6847
0
  }
6848
6849
/* Testing char type. */
6850
0
#ifdef SUPPORT_UNICODE
6851
0
if (ucp)
6852
0
  {
6853
0
  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6854
0
  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6855
0
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6856
0
  OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);
6857
0
  }
6858
0
else
6859
0
#endif /* SUPPORT_UNICODE */
6860
0
  {
6861
#if PCRE2_CODE_UNIT_WIDTH != 8
6862
  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6863
#elif defined SUPPORT_UNICODE
6864
  /* Here TMP3 has already been zeroed. */
6865
0
  jump = NULL;
6866
0
  if (common->utf)
6867
0
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6868
0
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6869
0
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6870
0
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6871
0
  OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6872
#if PCRE2_CODE_UNIT_WIDTH != 8
6873
  JUMPHERE(jump);
6874
#elif defined SUPPORT_UNICODE
6875
0
  if (jump != NULL)
6876
0
    JUMPHERE(jump);
6877
0
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6878
0
  }
6879
0
JUMPHERE(skipread);
6880
6881
0
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6882
0
check_str_end(common, &skipread_list);
6883
0
peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6884
6885
/* Testing char type. This is a code duplication. */
6886
0
#ifdef SUPPORT_UNICODE
6887
6888
0
valid_utf = LABEL();
6889
6890
0
if (ucp)
6891
0
  {
6892
0
  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6893
0
  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
6894
0
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
6895
0
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
6896
0
  }
6897
0
else
6898
0
#endif /* SUPPORT_UNICODE */
6899
0
  {
6900
#if PCRE2_CODE_UNIT_WIDTH != 8
6901
  /* TMP2 may be destroyed by peek_char. */
6902
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6903
  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6904
#elif defined SUPPORT_UNICODE
6905
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6906
0
  jump = NULL;
6907
0
  if (common->utf)
6908
0
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6909
0
#endif
6910
0
  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6911
0
  OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6912
0
  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6913
#if PCRE2_CODE_UNIT_WIDTH != 8
6914
  JUMPHERE(jump);
6915
#elif defined SUPPORT_UNICODE
6916
0
  if (jump != NULL)
6917
0
    JUMPHERE(jump);
6918
0
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6919
0
  }
6920
0
set_jumps(skipread_list, LABEL());
6921
6922
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6923
0
OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6924
0
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6925
6926
0
#ifdef SUPPORT_UNICODE
6927
0
if (common->invalid_utf)
6928
0
  {
6929
0
  set_jumps(invalid_utf1, LABEL());
6930
6931
0
  peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6932
0
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6933
6934
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6935
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6936
0
  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6937
6938
0
  set_jumps(invalid_utf2, LABEL());
6939
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6940
0
  OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6941
0
  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6942
0
  }
6943
0
#endif /* SUPPORT_UNICODE */
6944
0
}
6945
6946
static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6947
0
{
6948
/* May destroy TMP1. */
6949
0
DEFINE_COMPILER;
6950
0
int ranges[MAX_CLASS_RANGE_SIZE];
6951
0
sljit_u8 bit, cbit, all;
6952
0
int i, byte, length = 0;
6953
6954
0
bit = bits[0] & 0x1;
6955
/* All bits will be zero or one (since bit is zero or one). */
6956
0
all = (sljit_u8)-bit;
6957
6958
0
for (i = 0; i < 256; )
6959
0
  {
6960
0
  byte = i >> 3;
6961
0
  if ((i & 0x7) == 0 && bits[byte] == all)
6962
0
    i += 8;
6963
0
  else
6964
0
    {
6965
0
    cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6966
0
    if (cbit != bit)
6967
0
      {
6968
0
      if (length >= MAX_CLASS_RANGE_SIZE)
6969
0
        return FALSE;
6970
0
      ranges[length] = i;
6971
0
      length++;
6972
0
      bit = cbit;
6973
0
      all = (sljit_u8)-cbit; /* sign extend bit into byte */
6974
0
      }
6975
0
    i++;
6976
0
    }
6977
0
  }
6978
6979
0
if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6980
0
  {
6981
0
  if (length >= MAX_CLASS_RANGE_SIZE)
6982
0
    return FALSE;
6983
0
  ranges[length] = 256;
6984
0
  length++;
6985
0
  }
6986
6987
0
if (length < 0 || length > 4)
6988
0
  return FALSE;
6989
6990
0
bit = bits[0] & 0x1;
6991
0
if (invert) bit ^= 0x1;
6992
6993
/* No character is accepted. */
6994
0
if (length == 0 && bit == 0)
6995
0
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6996
6997
0
switch(length)
6998
0
  {
6999
0
  case 0:
7000
  /* When bit != 0, all characters are accepted. */
7001
0
  return TRUE;
7002
7003
0
  case 1:
7004
0
  add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7005
0
  return TRUE;
7006
7007
0
  case 2:
7008
0
  if (ranges[0] + 1 != ranges[1])
7009
0
    {
7010
0
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7011
0
    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7012
0
    }
7013
0
  else
7014
0
    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7015
0
  return TRUE;
7016
7017
0
  case 3:
7018
0
  if (bit != 0)
7019
0
    {
7020
0
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7021
0
    if (ranges[0] + 1 != ranges[1])
7022
0
      {
7023
0
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7024
0
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7025
0
      }
7026
0
    else
7027
0
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7028
0
    return TRUE;
7029
0
    }
7030
7031
0
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
7032
0
  if (ranges[1] + 1 != ranges[2])
7033
0
    {
7034
0
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
7035
0
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7036
0
    }
7037
0
  else
7038
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
7039
0
  return TRUE;
7040
7041
0
  case 4:
7042
0
  if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
7043
0
      && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
7044
0
      && (ranges[1] & (ranges[2] - ranges[0])) == 0
7045
0
      && is_powerof2(ranges[2] - ranges[0]))
7046
0
    {
7047
0
    SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
7048
0
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
7049
0
    if (ranges[2] + 1 != ranges[3])
7050
0
      {
7051
0
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
7052
0
      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7053
0
      }
7054
0
    else
7055
0
      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
7056
0
    return TRUE;
7057
0
    }
7058
7059
0
  if (bit != 0)
7060
0
    {
7061
0
    i = 0;
7062
0
    if (ranges[0] + 1 != ranges[1])
7063
0
      {
7064
0
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7065
0
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7066
0
      i = ranges[0];
7067
0
      }
7068
0
    else
7069
0
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
7070
7071
0
    if (ranges[2] + 1 != ranges[3])
7072
0
      {
7073
0
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
7074
0
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
7075
0
      }
7076
0
    else
7077
0
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
7078
0
    return TRUE;
7079
0
    }
7080
7081
0
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
7082
0
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
7083
0
  if (ranges[1] + 1 != ranges[2])
7084
0
    {
7085
0
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
7086
0
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
7087
0
    }
7088
0
  else
7089
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
7090
0
  return TRUE;
7091
7092
0
  default:
7093
0
  SLJIT_UNREACHABLE();
7094
0
  return FALSE;
7095
0
  }
7096
0
}
7097
7098
static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7099
0
{
7100
/* May destroy TMP1. */
7101
0
DEFINE_COMPILER;
7102
0
uint16_t char_list[MAX_CLASS_CHARS_SIZE];
7103
0
uint8_t byte;
7104
0
sljit_s32 type;
7105
0
int i, j, k, len, c;
7106
7107
0
if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
7108
0
  return FALSE;
7109
7110
0
len = 0;
7111
7112
0
for (i = 0; i < 32; i++)
7113
0
  {
7114
0
  byte = bits[i];
7115
7116
0
  if (nclass)
7117
0
    byte = (sljit_u8)~byte;
7118
7119
0
  j = 0;
7120
0
  while (byte != 0)
7121
0
    {
7122
0
    if (byte & 0x1)
7123
0
      {
7124
0
      c = i * 8 + j;
7125
7126
0
      k = len;
7127
7128
0
      if ((c & 0x20) != 0)
7129
0
        {
7130
0
        for (k = 0; k < len; k++)
7131
0
          if (char_list[k] == c - 0x20)
7132
0
            {
7133
0
            char_list[k] |= 0x120;
7134
0
            break;
7135
0
            }
7136
0
        }
7137
7138
0
      if (k == len)
7139
0
        {
7140
0
        if (len >= MAX_CLASS_CHARS_SIZE)
7141
0
          return FALSE;
7142
7143
0
        char_list[len++] = (uint16_t) c;
7144
0
        }
7145
0
      }
7146
7147
0
    byte >>= 1;
7148
0
    j++;
7149
0
    }
7150
0
  }
7151
7152
0
if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
7153
7154
0
i = 0;
7155
0
j = 0;
7156
7157
0
if (char_list[0] == 0)
7158
0
  {
7159
0
  i++;
7160
0
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
7161
0
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
7162
0
  }
7163
0
else
7164
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
7165
7166
0
while (i < len)
7167
0
  {
7168
0
  if ((char_list[i] & 0x100) != 0)
7169
0
    j++;
7170
0
  else
7171
0
    {
7172
0
    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7173
0
    SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7174
0
    }
7175
0
  i++;
7176
0
  }
7177
7178
0
if (j != 0)
7179
0
  {
7180
0
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7181
7182
0
  for (i = 0; i < len; i++)
7183
0
    if ((char_list[i] & 0x100) != 0)
7184
0
      {
7185
0
      j--;
7186
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7187
0
      SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
7188
0
      }
7189
0
  }
7190
7191
0
if (invert)
7192
0
  nclass = !nclass;
7193
7194
0
type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7195
0
add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7196
0
return TRUE;
7197
0
}
7198
7199
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7200
0
{
7201
/* May destroy TMP1. */
7202
0
if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7203
0
  return TRUE;
7204
0
return optimize_class_chars(common, bits, nclass, invert, backtracks);
7205
0
}
7206
7207
static void check_anynewline(compiler_common *common)
7208
0
{
7209
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7210
0
DEFINE_COMPILER;
7211
7212
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7213
7214
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7215
0
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7216
0
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7217
0
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7218
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7219
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7220
0
if (common->utf)
7221
0
  {
7222
0
#endif
7223
0
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7224
0
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7225
0
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7226
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7227
0
  }
7228
0
#endif
7229
0
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7230
0
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7231
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7232
0
}
7233
7234
static void check_hspace(compiler_common *common)
7235
0
{
7236
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7237
0
DEFINE_COMPILER;
7238
7239
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7240
7241
0
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
7242
0
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7243
0
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
7244
0
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7245
0
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
7246
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7247
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7248
0
if (common->utf)
7249
0
  {
7250
0
#endif
7251
0
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7252
0
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7253
0
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7254
0
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7255
0
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7256
0
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7257
0
  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7258
0
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7259
0
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7260
0
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7261
0
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7262
0
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7263
0
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7264
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7265
0
  }
7266
0
#endif
7267
0
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7268
0
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7269
7270
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7271
0
}
7272
7273
static void check_vspace(compiler_common *common)
7274
0
{
7275
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7276
0
DEFINE_COMPILER;
7277
7278
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
7279
7280
0
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7281
0
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7282
0
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7283
0
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7284
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7285
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7286
0
if (common->utf)
7287
0
  {
7288
0
#endif
7289
0
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7290
0
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7291
0
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7292
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7293
0
  }
7294
0
#endif
7295
0
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7296
0
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7297
7298
0
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7299
0
}
7300
7301
static void do_casefulcmp(compiler_common *common)
7302
0
{
7303
0
DEFINE_COMPILER;
7304
0
struct sljit_jump *jump;
7305
0
struct sljit_label *label;
7306
0
int char1_reg;
7307
0
int char2_reg;
7308
7309
0
if (HAS_VIRTUAL_REGISTERS)
7310
0
  {
7311
0
  char1_reg = STR_END;
7312
0
  char2_reg = STACK_TOP;
7313
0
  }
7314
0
else
7315
0
  {
7316
0
  char1_reg = TMP3;
7317
0
  char2_reg = RETURN_ADDR;
7318
0
  }
7319
7320
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7321
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7322
7323
0
if (char1_reg == STR_END)
7324
0
  {
7325
0
  OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7326
0
  OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7327
0
  }
7328
7329
0
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7330
0
  {
7331
0
  label = LABEL();
7332
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7333
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7334
0
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7335
0
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7336
0
  JUMPTO(SLJIT_NOT_ZERO, label);
7337
7338
0
  JUMPHERE(jump);
7339
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7340
0
  }
7341
0
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7342
0
  {
7343
0
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7344
0
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7345
7346
0
  label = LABEL();
7347
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7348
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7349
0
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7350
0
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7351
0
  JUMPTO(SLJIT_NOT_ZERO, label);
7352
7353
0
  JUMPHERE(jump);
7354
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7355
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7356
0
  }
7357
0
else
7358
0
  {
7359
0
  label = LABEL();
7360
0
  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7361
0
  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7362
0
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7363
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7364
0
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7365
0
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7366
0
  JUMPTO(SLJIT_NOT_ZERO, label);
7367
7368
0
  JUMPHERE(jump);
7369
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7370
0
  }
7371
7372
0
if (char1_reg == STR_END)
7373
0
  {
7374
0
  OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7375
0
  OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7376
0
  }
7377
7378
0
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7379
0
}
7380
7381
static void do_caselesscmp(compiler_common *common)
7382
0
{
7383
0
DEFINE_COMPILER;
7384
0
struct sljit_jump *jump;
7385
0
struct sljit_label *label;
7386
0
int char1_reg = STR_END;
7387
0
int char2_reg;
7388
0
int lcc_table;
7389
0
int opt_type = 0;
7390
7391
0
if (HAS_VIRTUAL_REGISTERS)
7392
0
  {
7393
0
  char2_reg = STACK_TOP;
7394
0
  lcc_table = STACK_LIMIT;
7395
0
  }
7396
0
else
7397
0
  {
7398
0
  char2_reg = RETURN_ADDR;
7399
0
  lcc_table = TMP3;
7400
0
  }
7401
7402
0
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7403
0
  opt_type = 1;
7404
0
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7405
0
  opt_type = 2;
7406
7407
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7408
0
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7409
7410
0
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7411
7412
0
if (char2_reg == STACK_TOP)
7413
0
  {
7414
0
  OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7415
0
  OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7416
0
  }
7417
7418
0
OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7419
7420
0
if (opt_type == 1)
7421
0
  {
7422
0
  label = LABEL();
7423
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7424
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7425
0
  }
7426
0
else if (opt_type == 2)
7427
0
  {
7428
0
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7429
0
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7430
7431
0
  label = LABEL();
7432
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7433
0
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7434
0
  }
7435
0
else
7436
0
  {
7437
0
  label = LABEL();
7438
0
  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7439
0
  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7440
0
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7441
0
  }
7442
7443
#if PCRE2_CODE_UNIT_WIDTH != 8
7444
jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7445
#endif
7446
0
OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7447
#if PCRE2_CODE_UNIT_WIDTH != 8
7448
JUMPHERE(jump);
7449
jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7450
#endif
7451
0
OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7452
#if PCRE2_CODE_UNIT_WIDTH != 8
7453
JUMPHERE(jump);
7454
#endif
7455
7456
0
if (opt_type == 0)
7457
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7458
7459
0
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7460
0
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7461
0
JUMPTO(SLJIT_NOT_ZERO, label);
7462
7463
0
JUMPHERE(jump);
7464
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7465
7466
0
if (opt_type == 2)
7467
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7468
7469
0
if (char2_reg == STACK_TOP)
7470
0
  {
7471
0
  OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7472
0
  OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7473
0
  }
7474
7475
0
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7476
0
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7477
0
}
7478
7479
static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7480
    compare_context *context, jump_list **backtracks)
7481
0
{
7482
0
DEFINE_COMPILER;
7483
0
unsigned int othercasebit = 0;
7484
0
PCRE2_SPTR othercasechar = NULL;
7485
0
#ifdef SUPPORT_UNICODE
7486
0
int utflength;
7487
0
#endif
7488
7489
0
if (caseless && char_has_othercase(common, cc))
7490
0
  {
7491
0
  othercasebit = char_get_othercase_bit(common, cc);
7492
0
  SLJIT_ASSERT(othercasebit);
7493
  /* Extracting bit difference info. */
7494
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7495
0
  othercasechar = cc + (othercasebit >> 8);
7496
0
  othercasebit &= 0xff;
7497
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7498
  /* Note that this code only handles characters in the BMP. If there
7499
  ever are characters outside the BMP whose othercase differs in only one
7500
  bit from itself (there currently are none), this code will need to be
7501
  revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7502
  othercasechar = cc + (othercasebit >> 9);
7503
  if ((othercasebit & 0x100) != 0)
7504
    othercasebit = (othercasebit & 0xff) << 8;
7505
  else
7506
    othercasebit &= 0xff;
7507
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7508
0
  }
7509
7510
0
if (context->sourcereg == -1)
7511
0
  {
7512
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7513
0
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7514
0
  if (context->length >= 4)
7515
0
    OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7516
0
  else if (context->length >= 2)
7517
0
    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7518
0
  else
7519
0
#endif
7520
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7521
#elif PCRE2_CODE_UNIT_WIDTH == 16
7522
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7523
  if (context->length >= 4)
7524
    OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7525
  else
7526
#endif
7527
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7528
#elif PCRE2_CODE_UNIT_WIDTH == 32
7529
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7530
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7531
0
  context->sourcereg = TMP2;
7532
0
  }
7533
7534
0
#ifdef SUPPORT_UNICODE
7535
0
utflength = 1;
7536
0
if (common->utf && HAS_EXTRALEN(*cc))
7537
0
  utflength += GET_EXTRALEN(*cc);
7538
7539
0
do
7540
0
  {
7541
0
#endif
7542
7543
0
  context->length -= IN_UCHARS(1);
7544
0
#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7545
7546
  /* Unaligned read is supported. */
7547
0
  if (othercasebit != 0 && othercasechar == cc)
7548
0
    {
7549
0
    context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7550
0
    context->oc.asuchars[context->ucharptr] = othercasebit;
7551
0
    }
7552
0
  else
7553
0
    {
7554
0
    context->c.asuchars[context->ucharptr] = *cc;
7555
0
    context->oc.asuchars[context->ucharptr] = 0;
7556
0
    }
7557
0
  context->ucharptr++;
7558
7559
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7560
0
  if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7561
#else
7562
  if (context->ucharptr >= 2 || context->length == 0)
7563
#endif
7564
0
    {
7565
0
    if (context->length >= 4)
7566
0
      OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7567
0
    else if (context->length >= 2)
7568
0
      OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7569
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7570
0
    else if (context->length >= 1)
7571
0
      OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7572
0
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7573
0
    context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7574
7575
0
    switch(context->ucharptr)
7576
0
      {
7577
0
      case 4 / sizeof(PCRE2_UCHAR):
7578
0
      if (context->oc.asint != 0)
7579
0
        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7580
0
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7581
0
      break;
7582
7583
0
      case 2 / sizeof(PCRE2_UCHAR):
7584
0
      if (context->oc.asushort != 0)
7585
0
        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7586
0
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7587
0
      break;
7588
7589
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7590
0
      case 1:
7591
0
      if (context->oc.asbyte != 0)
7592
0
        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7593
0
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7594
0
      break;
7595
0
#endif
7596
7597
0
      default:
7598
0
      SLJIT_UNREACHABLE();
7599
0
      break;
7600
0
      }
7601
0
    context->ucharptr = 0;
7602
0
    }
7603
7604
#else
7605
7606
  /* Unaligned read is unsupported or in 32 bit mode. */
7607
  if (context->length >= 1)
7608
    OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7609
7610
  context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7611
7612
  if (othercasebit != 0 && othercasechar == cc)
7613
    {
7614
    OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7615
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7616
    }
7617
  else
7618
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7619
7620
#endif
7621
7622
0
  cc++;
7623
0
#ifdef SUPPORT_UNICODE
7624
0
  utflength--;
7625
0
  }
7626
0
while (utflength > 0);
7627
0
#endif
7628
7629
0
return cc;
7630
0
}
7631
7632
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7633
7634
#define SET_CHAR_OFFSET(value) \
7635
0
  if ((value) != charoffset) \
7636
0
    { \
7637
0
    if ((value) < charoffset) \
7638
0
      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7639
0
    else \
7640
0
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7641
0
    } \
7642
0
  charoffset = (value);
7643
7644
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7645
7646
#ifdef SUPPORT_UNICODE
7647
0
#define XCLASS_SAVE_CHAR 0x001
7648
0
#define XCLASS_CHAR_SAVED 0x002
7649
0
#define XCLASS_HAS_TYPE 0x004
7650
0
#define XCLASS_HAS_SCRIPT 0x008
7651
0
#define XCLASS_HAS_SCRIPT_EXTENSION 0x010
7652
0
#define XCLASS_HAS_BOOL 0x020
7653
0
#define XCLASS_HAS_BIDICL 0x040
7654
0
#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
7655
0
#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
7656
0
#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
7657
0
#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7658
#endif /* SUPPORT_UNICODE */
7659
7660
static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7661
0
{
7662
0
DEFINE_COMPILER;
7663
0
jump_list *found = NULL;
7664
0
jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7665
0
sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7666
0
struct sljit_jump *jump = NULL;
7667
0
PCRE2_SPTR ccbegin;
7668
0
int compares, invertcmp, numberofcmps;
7669
0
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7670
0
BOOL utf = common->utf;
7671
0
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7672
7673
0
#ifdef SUPPORT_UNICODE
7674
0
sljit_u32 unicode_status = 0;
7675
0
sljit_u32 category_list = 0;
7676
0
sljit_u32 items;
7677
0
int typereg = TMP1;
7678
0
const sljit_u32 *other_cases;
7679
0
#endif /* SUPPORT_UNICODE */
7680
7681
/* Scanning the necessary info. */
7682
0
cc++;
7683
0
ccbegin = cc;
7684
0
compares = 0;
7685
7686
0
if (cc[-1] & XCL_MAP)
7687
0
  {
7688
0
  min = 0;
7689
0
  cc += 32 / sizeof(PCRE2_UCHAR);
7690
0
  }
7691
7692
0
while (*cc != XCL_END)
7693
0
  {
7694
0
  compares++;
7695
7696
0
  if (*cc == XCL_SINGLE)
7697
0
    {
7698
0
    cc ++;
7699
0
    GETCHARINCTEST(c, cc);
7700
0
    if (c > max) max = c;
7701
0
    if (c < min) min = c;
7702
0
#ifdef SUPPORT_UNICODE
7703
0
    unicode_status |= XCLASS_SAVE_CHAR;
7704
0
#endif /* SUPPORT_UNICODE */
7705
0
    }
7706
0
  else if (*cc == XCL_RANGE)
7707
0
    {
7708
0
    cc ++;
7709
0
    GETCHARINCTEST(c, cc);
7710
0
    if (c < min) min = c;
7711
0
    GETCHARINCTEST(c, cc);
7712
0
    if (c > max) max = c;
7713
0
#ifdef SUPPORT_UNICODE
7714
0
    unicode_status |= XCLASS_SAVE_CHAR;
7715
0
#endif /* SUPPORT_UNICODE */
7716
0
    }
7717
0
#ifdef SUPPORT_UNICODE
7718
0
  else
7719
0
    {
7720
0
    SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7721
0
    cc++;
7722
7723
0
    if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
7724
0
      {
7725
0
      other_cases = PRIV(ucd_caseless_sets) + cc[1];
7726
0
      while (*other_cases != NOTACHAR)
7727
0
        {
7728
0
        if (*other_cases > max) max = *other_cases;
7729
0
        if (*other_cases < min) min = *other_cases;
7730
0
        other_cases++;
7731
0
        }
7732
0
      }
7733
0
    else
7734
0
      {
7735
0
      max = READ_CHAR_MAX;
7736
0
      min = 0;
7737
0
      }
7738
7739
0
    items = 0;
7740
7741
0
    switch(*cc)
7742
0
      {
7743
0
      case PT_ANY:
7744
      /* Any either accepts everything or ignored. */
7745
0
      if (cc[-1] == XCL_PROP)
7746
0
        items = UCPCAT_ALL;
7747
0
      else
7748
0
        compares--;
7749
0
      break;
7750
7751
0
      case PT_LAMP:
7752
0
      items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt);
7753
0
      break;
7754
7755
0
      case PT_GC:
7756
0
      items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]);
7757
0
      break;
7758
7759
0
      case PT_PC:
7760
0
      items = UCPCAT(cc[1]);
7761
0
      break;
7762
7763
0
      case PT_WORD:
7764
0
      items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N;
7765
0
      break;
7766
7767
0
      case PT_ALNUM:
7768
0
      items = UCPCAT_L | UCPCAT_N;
7769
0
      break;
7770
7771
0
      case PT_SCX:
7772
0
      unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION;
7773
0
      if (cc[-1] == XCL_NOTPROP)
7774
0
        {
7775
0
        unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
7776
0
        break;
7777
0
        }
7778
0
      compares++;
7779
      /* Fall through */
7780
7781
0
      case PT_SC:
7782
0
      unicode_status |= XCLASS_HAS_SCRIPT;
7783
0
      break;
7784
7785
0
      case PT_SPACE:
7786
0
      case PT_PXSPACE:
7787
0
      case PT_PXGRAPH:
7788
0
      case PT_PXPRINT:
7789
0
      case PT_PXPUNCT:
7790
0
      unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
7791
0
      break;
7792
7793
0
      case PT_CLIST:
7794
0
      case PT_UCNC:
7795
0
      case PT_PXXDIGIT:
7796
0
      unicode_status |= XCLASS_SAVE_CHAR;
7797
0
      break;
7798
7799
0
      case PT_BOOL:
7800
0
      unicode_status |= XCLASS_HAS_BOOL;
7801
0
      break;
7802
7803
0
      case PT_BIDICL:
7804
0
      unicode_status |= XCLASS_HAS_BIDICL;
7805
0
      break;
7806
7807
0
      default:
7808
0
      SLJIT_UNREACHABLE();
7809
0
      break;
7810
0
      }
7811
7812
0
    if (items > 0)
7813
0
      {
7814
0
      if (cc[-1] == XCL_NOTPROP)
7815
0
        items ^= UCPCAT_ALL;
7816
0
      category_list |= items;
7817
0
      unicode_status |= XCLASS_HAS_TYPE;
7818
0
      compares--;
7819
0
      }
7820
7821
0
    cc += 2;
7822
0
    }
7823
0
#endif /* SUPPORT_UNICODE */
7824
0
  }
7825
7826
0
#ifdef SUPPORT_UNICODE
7827
0
if (category_list == UCPCAT_ALL)
7828
0
  {
7829
  /* All characters are accepted, same as dotall. */
7830
0
  compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7831
0
  if (list == backtracks)
7832
0
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7833
0
  return;
7834
0
  }
7835
7836
0
if (compares == 0 && category_list == 0)
7837
0
  {
7838
  /* No characters are accepted, same as (*F) or dotall. */
7839
0
  compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7840
0
  if (list != backtracks)
7841
0
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7842
0
  return;
7843
0
  }
7844
#else /* !SUPPORT_UNICODE */
7845
SLJIT_ASSERT(compares > 0);
7846
#endif /* SUPPORT_UNICODE */
7847
7848
/* We are not necessary in utf mode even in 8 bit mode. */
7849
0
cc = ccbegin;
7850
0
if ((cc[-1] & XCL_NOT) != 0)
7851
0
  read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7852
0
else
7853
0
  {
7854
0
#ifdef SUPPORT_UNICODE
7855
0
  read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
7856
#else /* !SUPPORT_UNICODE */
7857
  read_char(common, min, max, NULL, 0);
7858
#endif /* SUPPORT_UNICODE */
7859
0
  }
7860
7861
0
if ((cc[-1] & XCL_HASPROP) == 0)
7862
0
  {
7863
0
  if ((cc[-1] & XCL_MAP) != 0)
7864
0
    {
7865
0
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7866
0
    if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7867
0
      {
7868
0
      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7869
0
      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7870
0
      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7871
0
      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7872
0
      OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7873
0
      add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7874
0
      }
7875
7876
0
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7877
0
    JUMPHERE(jump);
7878
7879
0
    cc += 32 / sizeof(PCRE2_UCHAR);
7880
0
    }
7881
0
  else
7882
0
    {
7883
0
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7884
0
    add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7885
0
    }
7886
0
  }
7887
0
else if ((cc[-1] & XCL_MAP) != 0)
7888
0
  {
7889
0
  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7890
0
#ifdef SUPPORT_UNICODE
7891
0
  unicode_status |= XCLASS_CHAR_SAVED;
7892
0
#endif /* SUPPORT_UNICODE */
7893
0
  if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7894
0
    {
7895
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7896
0
    jump = NULL;
7897
0
    if (common->utf)
7898
0
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7899
0
      jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7900
7901
0
    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7902
0
    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7903
0
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7904
0
    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7905
0
    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7906
0
    add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7907
7908
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7909
0
    if (common->utf)
7910
0
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7911
0
      JUMPHERE(jump);
7912
0
    }
7913
7914
0
  OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7915
0
  cc += 32 / sizeof(PCRE2_UCHAR);
7916
0
  }
7917
7918
0
#ifdef SUPPORT_UNICODE
7919
0
if (unicode_status & XCLASS_NEEDS_UCD)
7920
0
  {
7921
0
  if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
7922
0
    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7923
7924
#if PCRE2_CODE_UNIT_WIDTH == 32
7925
  if (!common->utf)
7926
    {
7927
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7928
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7929
    JUMPHERE(jump);
7930
    }
7931
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7932
7933
0
  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7934
0
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7935
0
  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7936
0
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7937
0
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7938
0
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7939
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7940
0
  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7941
0
  OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7942
0
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7943
0
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7944
7945
0
  ccbegin = cc;
7946
7947
0
  if (category_list != 0)
7948
0
    compares++;
7949
7950
0
  if (unicode_status & XCLASS_HAS_BIDICL)
7951
0
    {
7952
0
    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7953
0
    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
7954
7955
0
    while (*cc != XCL_END)
7956
0
      {
7957
0
      if (*cc == XCL_SINGLE)
7958
0
        {
7959
0
        cc ++;
7960
0
        GETCHARINCTEST(c, cc);
7961
0
        }
7962
0
      else if (*cc == XCL_RANGE)
7963
0
        {
7964
0
        cc ++;
7965
0
        GETCHARINCTEST(c, cc);
7966
0
        GETCHARINCTEST(c, cc);
7967
0
        }
7968
0
      else
7969
0
        {
7970
0
        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7971
0
        cc++;
7972
0
        if (*cc == PT_BIDICL)
7973
0
          {
7974
0
          compares--;
7975
0
          invertcmp = (compares == 0 && list != backtracks);
7976
0
          if (cc[-1] == XCL_NOTPROP)
7977
0
            invertcmp ^= 0x1;
7978
0
          jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7979
0
          add_jump(compiler, compares > 0 ? list : backtracks, jump);
7980
0
          }
7981
0
        cc += 2;
7982
0
        }
7983
0
      }
7984
7985
0
    cc = ccbegin;
7986
0
    }
7987
7988
0
  if (unicode_status & XCLASS_HAS_BOOL)
7989
0
    {
7990
0
    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
7991
0
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
7992
0
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7993
7994
0
    while (*cc != XCL_END)
7995
0
      {
7996
0
      if (*cc == XCL_SINGLE)
7997
0
        {
7998
0
        cc ++;
7999
0
        GETCHARINCTEST(c, cc);
8000
0
        }
8001
0
      else if (*cc == XCL_RANGE)
8002
0
        {
8003
0
        cc ++;
8004
0
        GETCHARINCTEST(c, cc);
8005
0
        GETCHARINCTEST(c, cc);
8006
0
        }
8007
0
      else
8008
0
        {
8009
0
        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8010
0
        cc++;
8011
0
        if (*cc == PT_BOOL)
8012
0
          {
8013
0
          compares--;
8014
0
          invertcmp = (compares == 0 && list != backtracks);
8015
0
          if (cc[-1] == XCL_NOTPROP)
8016
0
            invertcmp ^= 0x1;
8017
8018
0
          OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
8019
0
          add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8020
0
          }
8021
0
        cc += 2;
8022
0
        }
8023
0
      }
8024
8025
0
    cc = ccbegin;
8026
0
    }
8027
8028
0
  if (unicode_status & XCLASS_HAS_SCRIPT)
8029
0
    {
8030
0
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8031
8032
0
    while (*cc != XCL_END)
8033
0
      {
8034
0
      if (*cc == XCL_SINGLE)
8035
0
        {
8036
0
        cc ++;
8037
0
        GETCHARINCTEST(c, cc);
8038
0
        }
8039
0
      else if (*cc == XCL_RANGE)
8040
0
        {
8041
0
        cc ++;
8042
0
        GETCHARINCTEST(c, cc);
8043
0
        GETCHARINCTEST(c, cc);
8044
0
        }
8045
0
      else
8046
0
        {
8047
0
        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8048
0
        cc++;
8049
0
        switch (*cc)
8050
0
          {
8051
0
          case PT_SCX:
8052
0
          if (cc[-1] == XCL_NOTPROP)
8053
0
            break;
8054
          /* Fall through */
8055
8056
0
          case PT_SC:
8057
0
          compares--;
8058
0
          invertcmp = (compares == 0 && list != backtracks);
8059
0
          if (cc[-1] == XCL_NOTPROP)
8060
0
            invertcmp ^= 0x1;
8061
8062
0
          add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
8063
0
          }
8064
0
        cc += 2;
8065
0
        }
8066
0
      }
8067
8068
0
    cc = ccbegin;
8069
0
    }
8070
8071
0
  if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
8072
0
    {
8073
0
    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
8074
0
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
8075
0
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
8076
8077
0
    if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
8078
0
      {
8079
0
      if (unicode_status & XCLASS_HAS_TYPE)
8080
0
        {
8081
0
        if (unicode_status & XCLASS_SAVE_CHAR)
8082
0
          {
8083
0
          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0);
8084
0
          unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0;
8085
0
          }
8086
0
        else
8087
0
          {
8088
0
          OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
8089
0
          unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
8090
0
          }
8091
0
        }
8092
0
      OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
8093
0
      }
8094
8095
0
    while (*cc != XCL_END)
8096
0
      {
8097
0
      if (*cc == XCL_SINGLE)
8098
0
        {
8099
0
        cc ++;
8100
0
        GETCHARINCTEST(c, cc);
8101
0
        }
8102
0
      else if (*cc == XCL_RANGE)
8103
0
        {
8104
0
        cc ++;
8105
0
        GETCHARINCTEST(c, cc);
8106
0
        GETCHARINCTEST(c, cc);
8107
0
        }
8108
0
      else
8109
0
        {
8110
0
        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8111
0
        cc++;
8112
0
        if (*cc == PT_SCX)
8113
0
          {
8114
0
          compares--;
8115
0
          invertcmp = (compares == 0 && list != backtracks);
8116
8117
0
          jump = NULL;
8118
0
          if (cc[-1] == XCL_NOTPROP)
8119
0
            {
8120
0
            jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
8121
0
            if (invertcmp)
8122
0
              {
8123
0
              add_jump(compiler, backtracks, jump);
8124
0
              jump = NULL;
8125
0
              }
8126
0
            invertcmp ^= 0x1;
8127
0
            }
8128
8129
0
          OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
8130
0
          add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8131
8132
0
          if (jump != NULL)
8133
0
            JUMPHERE(jump);
8134
0
          }
8135
0
        cc += 2;
8136
0
        }
8137
0
      }
8138
8139
0
    if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0)
8140
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
8141
0
    else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
8142
0
      OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
8143
0
    cc = ccbegin;
8144
0
    }
8145
8146
0
  if (unicode_status & XCLASS_SAVE_CHAR)
8147
0
    OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
8148
8149
0
  if (unicode_status & XCLASS_HAS_TYPE)
8150
0
    {
8151
0
    if (unicode_status & XCLASS_SAVE_CHAR)
8152
0
      typereg = RETURN_ADDR;
8153
8154
0
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
8155
0
    OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0);
8156
8157
0
    if (category_list > 0)
8158
0
      {
8159
0
      compares--;
8160
0
      invertcmp = (compares == 0 && list != backtracks);
8161
0
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list);
8162
0
      add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
8163
0
      }
8164
0
    }
8165
0
  }
8166
0
#endif /* SUPPORT_UNICODE */
8167
8168
/* Generating code. */
8169
0
charoffset = 0;
8170
0
numberofcmps = 0;
8171
8172
0
while (*cc != XCL_END)
8173
0
  {
8174
0
  compares--;
8175
0
  invertcmp = (compares == 0 && list != backtracks);
8176
0
  jump = NULL;
8177
8178
0
  if (*cc == XCL_SINGLE)
8179
0
    {
8180
0
    cc ++;
8181
0
    GETCHARINCTEST(c, cc);
8182
8183
0
    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8184
0
      {
8185
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8186
0
      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8187
0
      numberofcmps++;
8188
0
      }
8189
0
    else if (numberofcmps > 0)
8190
0
      {
8191
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8192
0
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
8193
0
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8194
0
      numberofcmps = 0;
8195
0
      }
8196
0
    else
8197
0
      {
8198
0
      jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8199
0
      numberofcmps = 0;
8200
0
      }
8201
0
    }
8202
0
  else if (*cc == XCL_RANGE)
8203
0
    {
8204
0
    cc ++;
8205
0
    GETCHARINCTEST(c, cc);
8206
0
    SET_CHAR_OFFSET(c);
8207
0
    GETCHARINCTEST(c, cc);
8208
8209
0
    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
8210
0
      {
8211
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8212
0
      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8213
0
      numberofcmps++;
8214
0
      }
8215
0
    else if (numberofcmps > 0)
8216
0
      {
8217
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8218
0
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8219
0
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8220
0
      numberofcmps = 0;
8221
0
      }
8222
0
    else
8223
0
      {
8224
0
      jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8225
0
      numberofcmps = 0;
8226
0
      }
8227
0
    }
8228
0
#ifdef SUPPORT_UNICODE
8229
0
  else
8230
0
    {
8231
0
    SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8232
0
    if (*cc == XCL_NOTPROP)
8233
0
      invertcmp ^= 0x1;
8234
0
    cc++;
8235
0
    switch(*cc)
8236
0
      {
8237
0
      case PT_ANY:
8238
0
      case PT_LAMP:
8239
0
      case PT_GC:
8240
0
      case PT_PC:
8241
0
      case PT_SC:
8242
0
      case PT_SCX:
8243
0
      case PT_BOOL:
8244
0
      case PT_BIDICL:
8245
0
      case PT_WORD:
8246
0
      case PT_ALNUM:
8247
0
      compares++;
8248
      /* Already handled. */
8249
0
      break;
8250
8251
0
      case PT_SPACE:
8252
0
      case PT_PXSPACE:
8253
0
      SET_CHAR_OFFSET(9);
8254
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
8255
0
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8256
8257
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
8258
0
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8259
8260
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
8261
0
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8262
8263
0
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8264
0
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8265
0
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8266
0
      break;
8267
8268
0
      case PT_CLIST:
8269
0
      other_cases = PRIV(ucd_caseless_sets) + cc[1];
8270
8271
      /* At least three characters are required.
8272
         Otherwise this case would be handled by the normal code path. */
8273
0
      SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8274
0
      SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8275
8276
      /* Optimizing character pairs, if their difference is power of 2. */
8277
0
      if (is_powerof2(other_cases[1] ^ other_cases[0]))
8278
0
        {
8279
0
        if (charoffset == 0)
8280
0
          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8281
0
        else
8282
0
          {
8283
0
          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8284
0
          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8285
0
          }
8286
0
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
8287
0
        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8288
0
        other_cases += 2;
8289
0
        }
8290
0
      else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8291
0
        {
8292
0
        if (charoffset == 0)
8293
0
          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8294
0
        else
8295
0
          {
8296
0
          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8297
0
          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8298
0
          }
8299
0
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
8300
0
        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8301
8302
0
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
8303
0
        OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8304
8305
0
        other_cases += 3;
8306
0
        }
8307
0
      else
8308
0
        {
8309
0
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8310
0
        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8311
0
        }
8312
8313
0
      while (*other_cases != NOTACHAR)
8314
0
        {
8315
0
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8316
0
        OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8317
0
        }
8318
0
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8319
0
      break;
8320
8321
0
      case PT_UCNC:
8322
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
8323
0
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8324
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
8325
0
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8326
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
8327
0
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8328
8329
0
      SET_CHAR_OFFSET(0xa0);
8330
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
8331
0
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8332
0
      SET_CHAR_OFFSET(0);
8333
0
      OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
8334
0
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8335
0
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8336
0
      break;
8337
8338
0
      case PT_PXGRAPH:
8339
0
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs));
8340
0
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8341
8342
0
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8343
0
      jump = JUMP(SLJIT_ZERO);
8344
8345
0
      c = charoffset;
8346
      /* In case of ucp_Cf, we overwrite the result. */
8347
0
      SET_CHAR_OFFSET(0x2066);
8348
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8349
0
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8350
8351
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8352
0
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8353
8354
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8355
0
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8356
8357
      /* Restore charoffset. */
8358
0
      SET_CHAR_OFFSET(c);
8359
8360
0
      JUMPHERE(jump);
8361
0
      jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8362
0
      break;
8363
8364
0
      case PT_PXPRINT:
8365
0
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp));
8366
0
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8367
8368
0
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
8369
0
      jump = JUMP(SLJIT_ZERO);
8370
8371
0
      c = charoffset;
8372
      /* In case of ucp_Cf, we overwrite the result. */
8373
0
      SET_CHAR_OFFSET(0x2066);
8374
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8375
0
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8376
8377
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8378
0
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8379
8380
      /* Restore charoffset. */
8381
0
      SET_CHAR_OFFSET(c);
8382
8383
0
      JUMPHERE(jump);
8384
0
      jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8385
0
      break;
8386
8387
0
      case PT_PXPUNCT:
8388
0
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So));
8389
0
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
8390
8391
0
      SET_CHAR_OFFSET(0);
8392
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
8393
0
      OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8394
8395
0
      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps));
8396
0
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
8397
0
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8398
0
      break;
8399
8400
0
      case PT_PXXDIGIT:
8401
0
      SET_CHAR_OFFSET(CHAR_A);
8402
0
      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20);
8403
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A);
8404
0
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8405
8406
0
      SET_CHAR_OFFSET(CHAR_0);
8407
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0);
8408
0
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8409
8410
0
      SET_CHAR_OFFSET(0xff10);
8411
0
      jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10);
8412
8413
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10);
8414
0
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8415
8416
0
      SET_CHAR_OFFSET(0xff21);
8417
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21);
8418
0
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8419
8420
0
      SET_CHAR_OFFSET(0xff41);
8421
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41);
8422
0
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8423
8424
0
      SET_CHAR_OFFSET(0xff10);
8425
8426
0
      JUMPHERE(jump);
8427
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
8428
0
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8429
0
      break;
8430
8431
0
      default:
8432
0
      SLJIT_UNREACHABLE();
8433
0
      break;
8434
0
      }
8435
0
    cc += 2;
8436
0
    }
8437
0
#endif /* SUPPORT_UNICODE */
8438
8439
0
  if (jump != NULL)
8440
0
    add_jump(compiler, compares > 0 ? list : backtracks, jump);
8441
0
  }
8442
8443
0
SLJIT_ASSERT(compares == 0);
8444
0
if (found != NULL)
8445
0
  set_jumps(found, LABEL());
8446
0
}
8447
8448
#undef SET_TYPE_OFFSET
8449
#undef SET_CHAR_OFFSET
8450
8451
#endif
8452
8453
static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8454
0
{
8455
0
DEFINE_COMPILER;
8456
0
struct sljit_jump *jump[4];
8457
8458
0
switch(type)
8459
0
  {
8460
0
  case OP_SOD:
8461
0
  if (HAS_VIRTUAL_REGISTERS)
8462
0
    {
8463
0
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8464
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8465
0
    }
8466
0
  else
8467
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8468
0
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8469
0
  return cc;
8470
8471
0
  case OP_SOM:
8472
0
  if (HAS_VIRTUAL_REGISTERS)
8473
0
    {
8474
0
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8475
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8476
0
    }
8477
0
  else
8478
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8479
0
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8480
0
  return cc;
8481
8482
0
  case OP_NOT_WORD_BOUNDARY:
8483
0
  case OP_WORD_BOUNDARY:
8484
0
  case OP_NOT_UCP_WORD_BOUNDARY:
8485
0
  case OP_UCP_WORD_BOUNDARY:
8486
0
  add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));
8487
0
#ifdef SUPPORT_UNICODE
8488
0
  if (common->invalid_utf)
8489
0
    {
8490
0
    add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8491
0
    return cc;
8492
0
    }
8493
0
#endif /* SUPPORT_UNICODE */
8494
0
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
8495
0
  add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8496
0
  return cc;
8497
8498
0
  case OP_EODN:
8499
  /* Requires rather complex checks. */
8500
0
  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8501
0
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8502
0
    {
8503
0
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8504
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8505
0
    if (common->mode == PCRE2_JIT_COMPLETE)
8506
0
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8507
0
    else
8508
0
      {
8509
0
      jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8510
0
      OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
8511
0
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8512
0
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8513
0
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8514
0
      add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8515
0
      check_partial(common, TRUE);
8516
0
      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8517
0
      JUMPHERE(jump[1]);
8518
0
      }
8519
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8520
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8521
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8522
0
    }
8523
0
  else if (common->nltype == NLTYPE_FIXED)
8524
0
    {
8525
0
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8526
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8527
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8528
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8529
0
    }
8530
0
  else
8531
0
    {
8532
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8533
0
    jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8534
0
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8535
0
    OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
8536
0
    jump[2] = JUMP(SLJIT_GREATER);
8537
0
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8538
    /* Equal. */
8539
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8540
0
    jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8541
0
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8542
8543
0
    JUMPHERE(jump[1]);
8544
0
    if (common->nltype == NLTYPE_ANYCRLF)
8545
0
      {
8546
0
      OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8547
0
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8548
0
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8549
0
      }
8550
0
    else
8551
0
      {
8552
0
      OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8553
0
      read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8554
0
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8555
0
      add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8556
0
      sljit_set_current_flags(compiler, SLJIT_SET_Z);
8557
0
      add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8558
0
      OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8559
0
      }
8560
0
    JUMPHERE(jump[2]);
8561
0
    JUMPHERE(jump[3]);
8562
0
    }
8563
0
  JUMPHERE(jump[0]);
8564
0
  if (common->mode != PCRE2_JIT_COMPLETE)
8565
0
    check_partial(common, TRUE);
8566
0
  return cc;
8567
8568
0
  case OP_EOD:
8569
0
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8570
0
  if (common->mode != PCRE2_JIT_COMPLETE)
8571
0
    check_partial(common, TRUE);
8572
0
  return cc;
8573
8574
0
  case OP_DOLL:
8575
0
  if (HAS_VIRTUAL_REGISTERS)
8576
0
    {
8577
0
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8578
0
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8579
0
    }
8580
0
  else
8581
0
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8582
0
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8583
8584
0
  if (!common->endonly)
8585
0
    compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8586
0
  else
8587
0
    {
8588
0
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8589
0
    check_partial(common, FALSE);
8590
0
    }
8591
0
  return cc;
8592
8593
0
  case OP_DOLLM:
8594
0
  jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8595
0
  if (HAS_VIRTUAL_REGISTERS)
8596
0
    {
8597
0
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8598
0
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8599
0
    }
8600
0
  else
8601
0
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8602
0
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8603
0
  check_partial(common, FALSE);
8604
0
  jump[0] = JUMP(SLJIT_JUMP);
8605
0
  JUMPHERE(jump[1]);
8606
8607
0
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8608
0
    {
8609
0
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8610
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8611
0
    if (common->mode == PCRE2_JIT_COMPLETE)
8612
0
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8613
0
    else
8614
0
      {
8615
0
      jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8616
      /* STR_PTR = STR_END - IN_UCHARS(1) */
8617
0
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8618
0
      check_partial(common, TRUE);
8619
0
      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8620
0
      JUMPHERE(jump[1]);
8621
0
      }
8622
8623
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8624
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8625
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8626
0
    }
8627
0
  else
8628
0
    {
8629
0
    peek_char(common, common->nlmax, TMP3, 0, NULL);
8630
0
    check_newlinechar(common, common->nltype, backtracks, FALSE);
8631
0
    }
8632
0
  JUMPHERE(jump[0]);
8633
0
  return cc;
8634
8635
0
  case OP_CIRC:
8636
0
  if (HAS_VIRTUAL_REGISTERS)
8637
0
    {
8638
0
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8639
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8640
0
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8641
0
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8642
0
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8643
0
    }
8644
0
  else
8645
0
    {
8646
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8647
0
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8648
0
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8649
0
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8650
0
    }
8651
0
  return cc;
8652
8653
0
  case OP_CIRCM:
8654
  /* TMP2 might be used by peek_char_back. */
8655
0
  if (HAS_VIRTUAL_REGISTERS)
8656
0
    {
8657
0
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8658
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8659
0
    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8660
0
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8661
0
    }
8662
0
  else
8663
0
    {
8664
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8665
0
    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8666
0
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8667
0
    }
8668
0
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8669
0
  jump[0] = JUMP(SLJIT_JUMP);
8670
0
  JUMPHERE(jump[1]);
8671
8672
0
  if (!common->alt_circumflex)
8673
0
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8674
8675
0
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8676
0
    {
8677
0
    OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8678
0
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8679
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8680
0
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8681
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8682
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8683
0
    }
8684
0
  else
8685
0
    {
8686
0
    peek_char_back(common, common->nlmax, backtracks);
8687
0
    check_newlinechar(common, common->nltype, backtracks, FALSE);
8688
0
    }
8689
0
  JUMPHERE(jump[0]);
8690
0
  return cc;
8691
0
  }
8692
0
SLJIT_UNREACHABLE();
8693
0
return cc;
8694
0
}
8695
8696
#ifdef SUPPORT_UNICODE
8697
8698
#if PCRE2_CODE_UNIT_WIDTH != 32
8699
8700
/* The code in this function copies the logic of the interpreter function that
8701
is defined in the pcre2_extuni.c source. If that code is updated, this
8702
function, and those below it, must be kept in step (note by PH, June 2024). */
8703
8704
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8705
0
{
8706
0
PCRE2_SPTR start_subject = args->begin;
8707
0
PCRE2_SPTR end_subject = args->end;
8708
0
int lgb, rgb, ricount;
8709
0
PCRE2_SPTR prevcc, endcc, bptr;
8710
0
BOOL first = TRUE;
8711
0
BOOL was_ep_ZWJ = FALSE;
8712
0
uint32_t c;
8713
8714
0
prevcc = cc;
8715
0
endcc = NULL;
8716
0
do
8717
0
  {
8718
0
  GETCHARINC(c, cc);
8719
0
  rgb = UCD_GRAPHBREAK(c);
8720
8721
0
  if (first)
8722
0
    {
8723
0
    lgb = rgb;
8724
0
    endcc = cc;
8725
0
    first = FALSE;
8726
0
    continue;
8727
0
    }
8728
8729
0
  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8730
0
    break;
8731
8732
  /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8733
  preceded by Extended Pictographic. */
8734
8735
0
  if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8736
0
    break;
8737
8738
  /* Not breaking between Regional Indicators is allowed only if there
8739
  are an even number of preceding RIs. */
8740
8741
0
  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8742
0
    {
8743
0
    ricount = 0;
8744
0
    bptr = prevcc;
8745
8746
    /* bptr is pointing to the left-hand character */
8747
0
    while (bptr > start_subject)
8748
0
      {
8749
0
      bptr--;
8750
0
      BACKCHAR(bptr);
8751
0
      GETCHAR(c, bptr);
8752
8753
0
      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8754
0
        break;
8755
8756
0
      ricount++;
8757
0
      }
8758
8759
0
    if ((ricount & 1) != 0) break;  /* Grapheme break required */
8760
0
    }
8761
8762
  /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8763
  between; see next statement). */
8764
8765
0
  was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8766
8767
  /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8768
  any number of them before a following ZWJ. */
8769
8770
0
  if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8771
0
    lgb = rgb;
8772
8773
0
  prevcc = endcc;
8774
0
  endcc = cc;
8775
0
  }
8776
0
while (cc < end_subject);
8777
8778
0
return endcc;
8779
0
}
8780
8781
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8782
8783
/* The code in this function copies the logic of the interpreter function that
8784
is defined in the pcre2_extuni.c source. If that code is updated, this
8785
function, and the one below it, must be kept in step (note by PH, June 2024). */
8786
8787
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8788
0
{
8789
0
PCRE2_SPTR start_subject = args->begin;
8790
0
PCRE2_SPTR end_subject = args->end;
8791
0
int lgb, rgb, ricount;
8792
0
PCRE2_SPTR prevcc, endcc, bptr;
8793
0
BOOL first = TRUE;
8794
0
BOOL was_ep_ZWJ = FALSE;
8795
0
uint32_t c;
8796
8797
0
prevcc = cc;
8798
0
endcc = NULL;
8799
0
do
8800
0
  {
8801
0
  GETCHARINC_INVALID(c, cc, end_subject, break);
8802
0
  rgb = UCD_GRAPHBREAK(c);
8803
8804
0
  if (first)
8805
0
    {
8806
0
    lgb = rgb;
8807
0
    endcc = cc;
8808
0
    first = FALSE;
8809
0
    continue;
8810
0
    }
8811
8812
0
  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8813
0
    break;
8814
8815
  /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8816
  preceded by Extended Pictographic. */
8817
8818
0
  if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8819
0
    break;
8820
8821
  /* Not breaking between Regional Indicators is allowed only if there
8822
  are an even number of preceding RIs. */
8823
8824
0
  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8825
0
    {
8826
0
    ricount = 0;
8827
0
    bptr = prevcc;
8828
8829
    /* bptr is pointing to the left-hand character */
8830
0
    while (bptr > start_subject)
8831
0
      {
8832
0
      GETCHARBACK_INVALID(c, bptr, start_subject, break);
8833
8834
0
      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8835
0
        break;
8836
8837
0
      ricount++;
8838
0
      }
8839
8840
0
    if ((ricount & 1) != 0)
8841
0
      break;  /* Grapheme break required */
8842
0
    }
8843
8844
  /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8845
  between; see next statement). */
8846
8847
0
  was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8848
8849
  /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8850
  any number of them before a following ZWJ. */
8851
8852
0
  if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8853
0
    lgb = rgb;
8854
8855
0
  prevcc = endcc;
8856
0
  endcc = cc;
8857
0
  }
8858
0
while (cc < end_subject);
8859
8860
0
return endcc;
8861
0
}
8862
8863
/* The code in this function copies the logic of the interpreter function that
8864
is defined in the pcre2_extuni.c source. If that code is updated, this
8865
function must be kept in step (note by PH, June 2024). */
8866
8867
static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8868
0
{
8869
0
PCRE2_SPTR start_subject = args->begin;
8870
0
PCRE2_SPTR end_subject = args->end;
8871
0
int lgb, rgb, ricount;
8872
0
PCRE2_SPTR bptr;
8873
0
uint32_t c;
8874
0
BOOL was_ep_ZWJ = FALSE;
8875
8876
/* Patch by PH */
8877
/* GETCHARINC(c, cc); */
8878
0
c = *cc++;
8879
8880
#if PCRE2_CODE_UNIT_WIDTH == 32
8881
if (c >= 0x110000)
8882
  return cc;
8883
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8884
0
lgb = UCD_GRAPHBREAK(c);
8885
8886
0
while (cc < end_subject)
8887
0
  {
8888
0
  c = *cc;
8889
#if PCRE2_CODE_UNIT_WIDTH == 32
8890
  if (c >= 0x110000)
8891
    break;
8892
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8893
0
  rgb = UCD_GRAPHBREAK(c);
8894
8895
0
  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8896
0
    break;
8897
8898
  /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
8899
  preceded by Extended Pictographic. */
8900
8901
0
  if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
8902
0
    break;
8903
8904
  /* Not breaking between Regional Indicators is allowed only if there
8905
  are an even number of preceding RIs. */
8906
8907
0
  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8908
0
    {
8909
0
    ricount = 0;
8910
0
    bptr = cc - 1;
8911
8912
    /* bptr is pointing to the left-hand character */
8913
0
    while (bptr > start_subject)
8914
0
      {
8915
0
      bptr--;
8916
0
      c = *bptr;
8917
#if PCRE2_CODE_UNIT_WIDTH == 32
8918
      if (c >= 0x110000)
8919
        break;
8920
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8921
8922
0
      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
8923
8924
0
      ricount++;
8925
0
      }
8926
8927
0
    if ((ricount & 1) != 0)
8928
0
      break;  /* Grapheme break required */
8929
0
    }
8930
8931
  /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
8932
  between; see next statement). */
8933
8934
0
  was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
8935
8936
  /* If Extend follows Extended_Pictographic, do not update lgb; this allows
8937
  any number of them before a following ZWJ. */
8938
8939
0
  if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
8940
0
    lgb = rgb;
8941
8942
0
  cc++;
8943
0
  }
8944
8945
0
return cc;
8946
0
}
8947
8948
#endif /* SUPPORT_UNICODE */
8949
8950
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8951
0
{
8952
0
DEFINE_COMPILER;
8953
0
int length;
8954
0
unsigned int c, oc, bit;
8955
0
compare_context context;
8956
0
struct sljit_jump *jump[3];
8957
0
jump_list *end_list;
8958
0
#ifdef SUPPORT_UNICODE
8959
0
PCRE2_UCHAR propdata[5];
8960
0
#endif /* SUPPORT_UNICODE */
8961
8962
0
switch(type)
8963
0
  {
8964
0
  case OP_NOT_DIGIT:
8965
0
  case OP_DIGIT:
8966
  /* Digits are usually 0-9, so it is worth to optimize them. */
8967
0
  if (check_str_ptr)
8968
0
    detect_partial_match(common, backtracks);
8969
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8970
0
  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8971
0
    read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8972
0
  else
8973
0
#endif
8974
0
    read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8975
    /* Flip the starting bit in the negative case. */
8976
0
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
8977
0
  add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8978
0
  return cc;
8979
8980
0
  case OP_NOT_WHITESPACE:
8981
0
  case OP_WHITESPACE:
8982
0
  if (check_str_ptr)
8983
0
    detect_partial_match(common, backtracks);
8984
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8985
0
  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8986
0
    read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8987
0
  else
8988
0
#endif
8989
0
    read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8990
0
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
8991
0
  add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8992
0
  return cc;
8993
8994
0
  case OP_NOT_WORDCHAR:
8995
0
  case OP_WORDCHAR:
8996
0
  if (check_str_ptr)
8997
0
    detect_partial_match(common, backtracks);
8998
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8999
0
  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
9000
0
    read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
9001
0
  else
9002
0
#endif
9003
0
    read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
9004
0
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
9005
0
  add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
9006
0
  return cc;
9007
9008
0
  case OP_ANY:
9009
0
  if (check_str_ptr)
9010
0
    detect_partial_match(common, backtracks);
9011
0
  read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
9012
0
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
9013
0
    {
9014
0
    jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
9015
0
    end_list = NULL;
9016
0
    if (common->mode != PCRE2_JIT_PARTIAL_HARD)
9017
0
      add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9018
0
    else
9019
0
      check_str_end(common, &end_list);
9020
9021
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9022
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
9023
0
    set_jumps(end_list, LABEL());
9024
0
    JUMPHERE(jump[0]);
9025
0
    }
9026
0
  else
9027
0
    check_newlinechar(common, common->nltype, backtracks, TRUE);
9028
0
  return cc;
9029
9030
0
  case OP_ALLANY:
9031
0
  if (check_str_ptr)
9032
0
    detect_partial_match(common, backtracks);
9033
0
#ifdef SUPPORT_UNICODE
9034
0
  if (common->utf && common->invalid_utf)
9035
0
    {
9036
0
    read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
9037
0
    return cc;
9038
0
    }
9039
0
#endif /* SUPPORT_UNICODE */
9040
9041
0
  skip_valid_char(common);
9042
0
  return cc;
9043
9044
0
  case OP_ANYBYTE:
9045
0
  if (check_str_ptr)
9046
0
    detect_partial_match(common, backtracks);
9047
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9048
0
  return cc;
9049
9050
0
#ifdef SUPPORT_UNICODE
9051
0
  case OP_NOTPROP:
9052
0
  case OP_PROP:
9053
0
  propdata[0] = XCL_HASPROP;
9054
0
  propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
9055
0
  propdata[2] = cc[0];
9056
0
  propdata[3] = cc[1];
9057
0
  propdata[4] = XCL_END;
9058
0
  if (check_str_ptr)
9059
0
    detect_partial_match(common, backtracks);
9060
0
  compile_xclass_matchingpath(common, propdata, backtracks);
9061
0
  return cc + 2;
9062
0
#endif
9063
9064
0
  case OP_ANYNL:
9065
0
  if (check_str_ptr)
9066
0
    detect_partial_match(common, backtracks);
9067
0
  read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
9068
0
  jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
9069
  /* We don't need to handle soft partial matching case. */
9070
0
  end_list = NULL;
9071
0
  if (common->mode != PCRE2_JIT_PARTIAL_HARD)
9072
0
    add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9073
0
  else
9074
0
    check_str_end(common, &end_list);
9075
0
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9076
0
  jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
9077
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9078
0
  jump[2] = JUMP(SLJIT_JUMP);
9079
0
  JUMPHERE(jump[0]);
9080
0
  check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
9081
0
  set_jumps(end_list, LABEL());
9082
0
  JUMPHERE(jump[1]);
9083
0
  JUMPHERE(jump[2]);
9084
0
  return cc;
9085
9086
0
  case OP_NOT_HSPACE:
9087
0
  case OP_HSPACE:
9088
0
  if (check_str_ptr)
9089
0
    detect_partial_match(common, backtracks);
9090
9091
0
  if (type == OP_NOT_HSPACE)
9092
0
    read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
9093
0
  else
9094
0
    read_char(common, 0x9, 0x3000, NULL, 0);
9095
9096
0
  add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
9097
0
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
9098
0
  add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9099
0
  return cc;
9100
9101
0
  case OP_NOT_VSPACE:
9102
0
  case OP_VSPACE:
9103
0
  if (check_str_ptr)
9104
0
    detect_partial_match(common, backtracks);
9105
9106
0
  if (type == OP_NOT_VSPACE)
9107
0
    read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
9108
0
  else
9109
0
    read_char(common, 0xa, 0x2029, NULL, 0);
9110
9111
0
  add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
9112
0
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
9113
0
  add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
9114
0
  return cc;
9115
9116
0
#ifdef SUPPORT_UNICODE
9117
0
  case OP_EXTUNI:
9118
0
  if (check_str_ptr)
9119
0
    detect_partial_match(common, backtracks);
9120
9121
0
  SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9122
0
  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
9123
9124
0
#if PCRE2_CODE_UNIT_WIDTH != 32
9125
0
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9126
0
    common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9127
0
  if (common->invalid_utf)
9128
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9129
#else
9130
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
9131
    common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
9132
  if (common->invalid_utf)
9133
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
9134
#endif
9135
9136
0
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
9137
9138
0
  if (common->mode == PCRE2_JIT_PARTIAL_HARD)
9139
0
    {
9140
0
    jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
9141
    /* Since we successfully read a char above, partial matching must occure. */
9142
0
    check_partial(common, TRUE);
9143
0
    JUMPHERE(jump[0]);
9144
0
    }
9145
0
  return cc;
9146
0
#endif
9147
9148
0
  case OP_CHAR:
9149
0
  case OP_CHARI:
9150
0
  length = 1;
9151
0
#ifdef SUPPORT_UNICODE
9152
0
  if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
9153
0
#endif
9154
9155
0
  if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
9156
0
    detect_partial_match(common, backtracks);
9157
9158
0
  if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
9159
0
    {
9160
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
9161
0
    if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
9162
0
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9163
9164
0
    context.length = IN_UCHARS(length);
9165
0
    context.sourcereg = -1;
9166
0
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9167
0
    context.ucharptr = 0;
9168
0
#endif
9169
0
    return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
9170
0
    }
9171
9172
0
#ifdef SUPPORT_UNICODE
9173
0
  if (common->utf)
9174
0
    {
9175
0
    GETCHAR(c, cc);
9176
0
    }
9177
0
  else
9178
0
#endif
9179
0
    c = *cc;
9180
9181
0
  SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
9182
9183
0
  if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
9184
0
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
9185
9186
0
  oc = char_othercase(common, c);
9187
0
  read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
9188
9189
0
  SLJIT_ASSERT(!is_powerof2(c ^ oc));
9190
9191
0
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
9192
0
    {
9193
0
    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
9194
0
    SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1);
9195
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9196
0
    }
9197
0
  else
9198
0
    {
9199
0
    jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
9200
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9201
0
    JUMPHERE(jump[0]);
9202
0
    }
9203
0
  return cc + length;
9204
9205
0
  case OP_NOT:
9206
0
  case OP_NOTI:
9207
0
  if (check_str_ptr)
9208
0
    detect_partial_match(common, backtracks);
9209
9210
0
  length = 1;
9211
0
#ifdef SUPPORT_UNICODE
9212
0
  if (common->utf)
9213
0
    {
9214
0
#if PCRE2_CODE_UNIT_WIDTH == 8
9215
0
    c = *cc;
9216
0
    if (c < 128 && !common->invalid_utf)
9217
0
      {
9218
0
      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9219
0
      if (type == OP_NOT || !char_has_othercase(common, cc))
9220
0
        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9221
0
      else
9222
0
        {
9223
        /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
9224
0
        OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
9225
0
        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
9226
0
        }
9227
      /* Skip the variable-length character. */
9228
0
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9229
0
      jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
9230
0
      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
9231
0
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
9232
0
      JUMPHERE(jump[0]);
9233
0
      return cc + 1;
9234
0
      }
9235
0
    else
9236
0
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
9237
0
      {
9238
0
      GETCHARLEN(c, cc, length);
9239
0
      }
9240
0
    }
9241
0
  else
9242
0
#endif /* SUPPORT_UNICODE */
9243
0
    c = *cc;
9244
9245
0
  if (type == OP_NOT || !char_has_othercase(common, cc))
9246
0
    {
9247
0
    read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
9248
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9249
0
    }
9250
0
  else
9251
0
    {
9252
0
    oc = char_othercase(common, c);
9253
0
    read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
9254
0
    bit = c ^ oc;
9255
0
    if (is_powerof2(bit))
9256
0
      {
9257
0
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
9258
0
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
9259
0
      }
9260
0
    else
9261
0
      {
9262
0
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9263
0
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9264
0
      }
9265
0
    }
9266
0
  return cc + length;
9267
9268
0
  case OP_CLASS:
9269
0
  case OP_NCLASS:
9270
0
  if (check_str_ptr)
9271
0
    detect_partial_match(common, backtracks);
9272
9273
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9274
0
  bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
9275
0
  if (type == OP_NCLASS)
9276
0
    read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
9277
0
  else
9278
0
    read_char(common, 0, bit, NULL, 0);
9279
#else
9280
  if (type == OP_NCLASS)
9281
    read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
9282
  else
9283
    read_char(common, 0, 255, NULL, 0);
9284
#endif
9285
9286
0
  if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
9287
0
    return cc + 32 / sizeof(PCRE2_UCHAR);
9288
9289
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9290
0
  jump[0] = NULL;
9291
0
  if (common->utf)
9292
0
    {
9293
0
    jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
9294
0
    if (type == OP_CLASS)
9295
0
      {
9296
0
      add_jump(compiler, backtracks, jump[0]);
9297
0
      jump[0] = NULL;
9298
0
      }
9299
0
    }
9300
#elif PCRE2_CODE_UNIT_WIDTH != 8
9301
  jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
9302
  if (type == OP_CLASS)
9303
    {
9304
    add_jump(compiler, backtracks, jump[0]);
9305
    jump[0] = NULL;
9306
    }
9307
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
9308
9309
0
  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
9310
0
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
9311
0
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
9312
0
  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
9313
0
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
9314
0
  add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
9315
9316
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
9317
0
  if (jump[0] != NULL)
9318
0
    JUMPHERE(jump[0]);
9319
0
#endif
9320
0
  return cc + 32 / sizeof(PCRE2_UCHAR);
9321
9322
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9323
0
  case OP_XCLASS:
9324
0
  if (check_str_ptr)
9325
0
    detect_partial_match(common, backtracks);
9326
0
  compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
9327
0
  return cc + GET(cc, 0) - 1;
9328
0
#endif
9329
0
  }
9330
0
SLJIT_UNREACHABLE();
9331
0
return cc;
9332
0
}
9333
9334
static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9335
0
{
9336
/* This function consumes at least one input character. */
9337
/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9338
0
DEFINE_COMPILER;
9339
0
PCRE2_SPTR ccbegin = cc;
9340
0
compare_context context;
9341
0
int size;
9342
9343
0
context.length = 0;
9344
0
do
9345
0
  {
9346
0
  if (cc >= ccend)
9347
0
    break;
9348
9349
0
  if (*cc == OP_CHAR)
9350
0
    {
9351
0
    size = 1;
9352
0
#ifdef SUPPORT_UNICODE
9353
0
    if (common->utf && HAS_EXTRALEN(cc[1]))
9354
0
      size += GET_EXTRALEN(cc[1]);
9355
0
#endif
9356
0
    }
9357
0
  else if (*cc == OP_CHARI)
9358
0
    {
9359
0
    size = 1;
9360
0
#ifdef SUPPORT_UNICODE
9361
0
    if (common->utf)
9362
0
      {
9363
0
      if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9364
0
        size = 0;
9365
0
      else if (HAS_EXTRALEN(cc[1]))
9366
0
        size += GET_EXTRALEN(cc[1]);
9367
0
      }
9368
0
    else
9369
0
#endif
9370
0
    if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9371
0
      size = 0;
9372
0
    }
9373
0
  else
9374
0
    size = 0;
9375
9376
0
  cc += 1 + size;
9377
0
  context.length += IN_UCHARS(size);
9378
0
  }
9379
0
while (size > 0 && context.length <= 128);
9380
9381
0
cc = ccbegin;
9382
0
if (context.length > 0)
9383
0
  {
9384
  /* We have a fixed-length byte sequence. */
9385
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9386
0
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9387
9388
0
  context.sourcereg = -1;
9389
0
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9390
0
  context.ucharptr = 0;
9391
0
#endif
9392
0
  do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9393
0
  return cc;
9394
0
  }
9395
9396
/* A non-fixed length character will be checked if length == 0. */
9397
0
return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9398
0
}
9399
9400
/* Forward definitions. */
9401
static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9402
static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9403
9404
#define PUSH_BACKTRACK(size, ccstart, error) \
9405
0
  do \
9406
0
    { \
9407
0
    backtrack = sljit_alloc_memory(compiler, (size)); \
9408
0
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9409
0
      return error; \
9410
0
    memset(backtrack, 0, size); \
9411
0
    backtrack->prev = parent->top; \
9412
0
    backtrack->cc = (ccstart); \
9413
0
    parent->top = backtrack; \
9414
0
    } \
9415
0
  while (0)
9416
9417
#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9418
0
  do \
9419
0
    { \
9420
0
    backtrack = sljit_alloc_memory(compiler, (size)); \
9421
0
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9422
0
      return; \
9423
0
    memset(backtrack, 0, size); \
9424
0
    backtrack->prev = parent->top; \
9425
0
    backtrack->cc = (ccstart); \
9426
0
    parent->top = backtrack; \
9427
0
    } \
9428
0
  while (0)
9429
9430
0
#define BACKTRACK_AS(type) ((type *)backtrack)
9431
9432
static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9433
0
{
9434
/* The OVECTOR offset goes to TMP2. */
9435
0
DEFINE_COMPILER;
9436
0
int count = GET2(cc, 1 + IMM2_SIZE);
9437
0
PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9438
0
unsigned int offset;
9439
0
jump_list *found = NULL;
9440
9441
0
SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9442
9443
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9444
9445
0
count--;
9446
0
while (count-- > 0)
9447
0
  {
9448
0
  offset = GET2(slot, 0) << 1;
9449
0
  GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9450
0
  add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9451
0
  slot += common->name_entry_size;
9452
0
  }
9453
9454
0
offset = GET2(slot, 0) << 1;
9455
0
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9456
0
if (backtracks != NULL && !common->unset_backref)
9457
0
  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9458
9459
0
set_jumps(found, LABEL());
9460
0
}
9461
9462
static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9463
0
{
9464
0
DEFINE_COMPILER;
9465
0
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9466
0
int offset = 0;
9467
0
struct sljit_jump *jump = NULL;
9468
0
struct sljit_jump *partial;
9469
0
struct sljit_jump *nopartial;
9470
0
#if defined SUPPORT_UNICODE
9471
0
struct sljit_label *loop;
9472
0
struct sljit_label *caseless_loop;
9473
0
jump_list *no_match = NULL;
9474
0
int source_reg = COUNT_MATCH;
9475
0
int source_end_reg = ARGUMENTS;
9476
0
int char1_reg = STACK_LIMIT;
9477
0
#endif /* SUPPORT_UNICODE */
9478
9479
0
if (ref)
9480
0
  {
9481
0
  offset = GET2(cc, 1) << 1;
9482
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9483
  /* OVECTOR(1) contains the "string begin - 1" constant. */
9484
0
  if (withchecks && !common->unset_backref)
9485
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9486
0
  }
9487
0
else
9488
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9489
9490
0
#if defined SUPPORT_UNICODE
9491
0
if (common->utf && *cc == OP_REFI)
9492
0
  {
9493
0
  SLJIT_ASSERT(common->iref_ptr != 0);
9494
9495
0
  if (ref)
9496
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9497
0
  else
9498
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9499
9500
0
  if (withchecks && emptyfail)
9501
0
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9502
9503
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9504
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9505
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9506
9507
0
  OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9508
0
  OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9509
9510
0
  loop = LABEL();
9511
0
  jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9512
0
  partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9513
9514
  /* Read original character. It must be a valid UTF character. */
9515
0
  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9516
0
  OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9517
9518
0
  read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9519
9520
0
  OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9521
0
  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9522
0
  OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9523
9524
  /* Read second character. */
9525
0
  read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9526
9527
0
  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9528
9529
0
  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9530
9531
0
  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9532
9533
0
  OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9534
0
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9535
0
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9536
9537
0
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9538
9539
0
  OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9540
0
  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9541
0
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9542
0
  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9543
9544
0
  add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9545
0
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9546
0
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9547
9548
0
  caseless_loop = LABEL();
9549
0
  OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9550
0
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9551
0
  OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
9552
0
  JUMPTO(SLJIT_EQUAL, loop);
9553
0
  JUMPTO(SLJIT_LESS, caseless_loop);
9554
9555
0
  set_jumps(no_match, LABEL());
9556
0
  if (common->mode == PCRE2_JIT_COMPLETE)
9557
0
    JUMPHERE(partial);
9558
9559
0
  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9560
0
  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9561
0
  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9562
0
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9563
9564
0
  if (common->mode != PCRE2_JIT_COMPLETE)
9565
0
    {
9566
0
    JUMPHERE(partial);
9567
0
    OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9568
0
    OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9569
0
    OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9570
9571
0
    check_partial(common, FALSE);
9572
0
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9573
0
    }
9574
9575
0
  JUMPHERE(jump);
9576
0
  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9577
0
  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9578
0
  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9579
0
  return;
9580
0
  }
9581
0
else
9582
0
#endif /* SUPPORT_UNICODE */
9583
0
  {
9584
0
  if (ref)
9585
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9586
0
  else
9587
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9588
9589
0
  if (withchecks)
9590
0
    jump = JUMP(SLJIT_ZERO);
9591
9592
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9593
0
  partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9594
0
  if (common->mode == PCRE2_JIT_COMPLETE)
9595
0
    add_jump(compiler, backtracks, partial);
9596
9597
0
  add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9598
0
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9599
9600
0
  if (common->mode != PCRE2_JIT_COMPLETE)
9601
0
    {
9602
0
    nopartial = JUMP(SLJIT_JUMP);
9603
0
    JUMPHERE(partial);
9604
    /* TMP2 -= STR_END - STR_PTR */
9605
0
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9606
0
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9607
0
    partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9608
0
    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9609
0
    add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9610
0
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9611
0
    JUMPHERE(partial);
9612
0
    check_partial(common, FALSE);
9613
0
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9614
0
    JUMPHERE(nopartial);
9615
0
    }
9616
0
  }
9617
9618
0
if (jump != NULL)
9619
0
  {
9620
0
  if (emptyfail)
9621
0
    add_jump(compiler, backtracks, jump);
9622
0
  else
9623
0
    JUMPHERE(jump);
9624
0
  }
9625
0
}
9626
9627
static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9628
0
{
9629
0
DEFINE_COMPILER;
9630
0
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9631
0
backtrack_common *backtrack;
9632
0
PCRE2_UCHAR type;
9633
0
int offset = 0;
9634
0
struct sljit_label *label;
9635
0
struct sljit_jump *zerolength;
9636
0
struct sljit_jump *jump = NULL;
9637
0
PCRE2_SPTR ccbegin = cc;
9638
0
int min = 0, max = 0;
9639
0
BOOL minimize;
9640
9641
0
PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9642
9643
0
if (ref)
9644
0
  offset = GET2(cc, 1) << 1;
9645
0
else
9646
0
  cc += IMM2_SIZE;
9647
0
type = cc[1 + IMM2_SIZE];
9648
9649
0
SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9650
0
minimize = (type & 0x1) != 0;
9651
0
switch(type)
9652
0
  {
9653
0
  case OP_CRSTAR:
9654
0
  case OP_CRMINSTAR:
9655
0
  min = 0;
9656
0
  max = 0;
9657
0
  cc += 1 + IMM2_SIZE + 1;
9658
0
  break;
9659
0
  case OP_CRPLUS:
9660
0
  case OP_CRMINPLUS:
9661
0
  min = 1;
9662
0
  max = 0;
9663
0
  cc += 1 + IMM2_SIZE + 1;
9664
0
  break;
9665
0
  case OP_CRQUERY:
9666
0
  case OP_CRMINQUERY:
9667
0
  min = 0;
9668
0
  max = 1;
9669
0
  cc += 1 + IMM2_SIZE + 1;
9670
0
  break;
9671
0
  case OP_CRRANGE:
9672
0
  case OP_CRMINRANGE:
9673
0
  min = GET2(cc, 1 + IMM2_SIZE + 1);
9674
0
  max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9675
0
  cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9676
0
  break;
9677
0
  default:
9678
0
  SLJIT_UNREACHABLE();
9679
0
  break;
9680
0
  }
9681
9682
0
if (!minimize)
9683
0
  {
9684
0
  if (min == 0)
9685
0
    {
9686
0
    allocate_stack(common, 2);
9687
0
    if (ref)
9688
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9689
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9690
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9691
    /* Temporary release of STR_PTR. */
9692
0
    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9693
    /* Handles both invalid and empty cases. Since the minimum repeat,
9694
    is zero the invalid case is basically the same as an empty case. */
9695
0
    if (ref)
9696
0
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9697
0
    else
9698
0
      {
9699
0
      compile_dnref_search(common, ccbegin, NULL);
9700
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9701
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9702
0
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9703
0
      }
9704
    /* Restore if not zero length. */
9705
0
    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9706
0
    }
9707
0
  else
9708
0
    {
9709
0
    allocate_stack(common, 1);
9710
0
    if (ref)
9711
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9712
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9713
9714
0
    if (ref)
9715
0
      {
9716
0
      if (!common->unset_backref)
9717
0
        add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9718
0
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9719
0
      }
9720
0
    else
9721
0
      {
9722
0
      compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9723
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9724
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9725
0
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9726
0
      }
9727
0
    }
9728
9729
0
  if (min > 1 || max > 1)
9730
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9731
9732
0
  label = LABEL();
9733
0
  if (!ref)
9734
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9735
0
  compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);
9736
9737
0
  if (min > 1 || max > 1)
9738
0
    {
9739
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9740
0
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9741
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9742
0
    if (min > 1)
9743
0
      CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9744
0
    if (max > 1)
9745
0
      {
9746
0
      jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9747
0
      allocate_stack(common, 1);
9748
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9749
0
      JUMPTO(SLJIT_JUMP, label);
9750
0
      JUMPHERE(jump);
9751
0
      }
9752
0
    }
9753
9754
0
  if (max == 0)
9755
0
    {
9756
    /* Includes min > 1 case as well. */
9757
0
    allocate_stack(common, 1);
9758
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9759
0
    JUMPTO(SLJIT_JUMP, label);
9760
0
    }
9761
9762
0
  JUMPHERE(zerolength);
9763
0
  BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9764
9765
0
  count_match(common);
9766
0
  return cc;
9767
0
  }
9768
9769
0
allocate_stack(common, ref ? 2 : 3);
9770
0
if (ref)
9771
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9772
0
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9773
0
if (type != OP_CRMINSTAR)
9774
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9775
9776
0
if (min == 0)
9777
0
  {
9778
  /* Handles both invalid and empty cases. Since the minimum repeat,
9779
  is zero the invalid case is basically the same as an empty case. */
9780
0
  if (ref)
9781
0
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9782
0
  else
9783
0
    {
9784
0
    compile_dnref_search(common, ccbegin, NULL);
9785
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9786
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9787
0
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9788
0
    }
9789
  /* Length is non-zero, we can match real repeats. */
9790
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9791
0
  jump = JUMP(SLJIT_JUMP);
9792
0
  }
9793
0
else
9794
0
  {
9795
0
  if (ref)
9796
0
    {
9797
0
    if (!common->unset_backref)
9798
0
      add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9799
0
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9800
0
    }
9801
0
  else
9802
0
    {
9803
0
    compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
9804
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9805
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9806
0
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9807
0
    }
9808
0
  }
9809
9810
0
BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9811
0
if (max > 0)
9812
0
  add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9813
9814
0
if (!ref)
9815
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9816
0
compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);
9817
0
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9818
9819
0
if (min > 1)
9820
0
  {
9821
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9822
0
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9823
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9824
0
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9825
0
  }
9826
0
else if (max > 0)
9827
0
  OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9828
9829
0
if (jump != NULL)
9830
0
  JUMPHERE(jump);
9831
0
JUMPHERE(zerolength);
9832
9833
0
count_match(common);
9834
0
return cc;
9835
0
}
9836
9837
static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9838
0
{
9839
0
DEFINE_COMPILER;
9840
0
backtrack_common *backtrack;
9841
0
recurse_entry *entry = common->entries;
9842
0
recurse_entry *prev = NULL;
9843
0
sljit_sw start = GET(cc, 1);
9844
0
PCRE2_SPTR start_cc;
9845
0
BOOL needs_control_head;
9846
9847
0
PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9848
9849
/* Inlining simple patterns. */
9850
0
if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9851
0
  {
9852
0
  start_cc = common->start + start;
9853
0
  compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9854
0
  BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9855
0
  return cc + 1 + LINK_SIZE;
9856
0
  }
9857
9858
0
while (entry != NULL)
9859
0
  {
9860
0
  if (entry->start == start)
9861
0
    break;
9862
0
  prev = entry;
9863
0
  entry = entry->next;
9864
0
  }
9865
9866
0
if (entry == NULL)
9867
0
  {
9868
0
  entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9869
0
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9870
0
    return NULL;
9871
0
  entry->next = NULL;
9872
0
  entry->entry_label = NULL;
9873
0
  entry->backtrack_label = NULL;
9874
0
  entry->entry_calls = NULL;
9875
0
  entry->backtrack_calls = NULL;
9876
0
  entry->start = start;
9877
9878
0
  if (prev != NULL)
9879
0
    prev->next = entry;
9880
0
  else
9881
0
    common->entries = entry;
9882
0
  }
9883
9884
0
BACKTRACK_AS(recurse_backtrack)->entry = entry;
9885
9886
0
if (entry->entry_label == NULL)
9887
0
  add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9888
0
else
9889
0
  JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9890
/* Leave if the match is failed. */
9891
0
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9892
0
BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9893
0
return cc + 1 + LINK_SIZE;
9894
0
}
9895
9896
static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9897
0
{
9898
0
PCRE2_SPTR begin;
9899
0
PCRE2_SIZE *ovector;
9900
0
sljit_u32 oveccount, capture_top;
9901
9902
0
if (arguments->callout == NULL)
9903
0
  return 0;
9904
9905
0
SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9906
9907
0
begin = arguments->begin;
9908
0
ovector = (PCRE2_SIZE*)(callout_block + 1);
9909
0
oveccount = callout_block->capture_top;
9910
9911
0
SLJIT_ASSERT(oveccount >= 1);
9912
9913
0
callout_block->version = 2;
9914
0
callout_block->callout_flags = 0;
9915
9916
/* Offsets in subject. */
9917
0
callout_block->subject_length = arguments->end - arguments->begin;
9918
0
callout_block->start_match = jit_ovector[0] - begin;
9919
0
callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9920
0
callout_block->subject = begin;
9921
9922
/* Convert and copy the JIT offset vector to the ovector array. */
9923
0
callout_block->capture_top = 1;
9924
0
callout_block->offset_vector = ovector;
9925
9926
0
ovector[0] = PCRE2_UNSET;
9927
0
ovector[1] = PCRE2_UNSET;
9928
0
ovector += 2;
9929
0
jit_ovector += 2;
9930
0
capture_top = 1;
9931
9932
/* Convert pointers to sizes. */
9933
0
while (--oveccount != 0)
9934
0
  {
9935
0
  capture_top++;
9936
9937
0
  ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9938
0
  ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9939
9940
0
  if (ovector[0] != PCRE2_UNSET)
9941
0
    callout_block->capture_top = capture_top;
9942
9943
0
  ovector += 2;
9944
0
  jit_ovector += 2;
9945
0
  }
9946
9947
0
return (arguments->callout)(callout_block, arguments->callout_data);
9948
0
}
9949
9950
#define CALLOUT_ARG_OFFSET(arg) \
9951
    SLJIT_OFFSETOF(pcre2_callout_block, arg)
9952
9953
static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9954
0
{
9955
0
DEFINE_COMPILER;
9956
0
backtrack_common *backtrack;
9957
0
sljit_s32 mov_opcode;
9958
0
unsigned int callout_length = (*cc == OP_CALLOUT)
9959
0
    ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9960
0
sljit_sw value1;
9961
0
sljit_sw value2;
9962
0
sljit_sw value3;
9963
0
sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
9964
9965
0
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9966
9967
0
callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9968
9969
0
allocate_stack(common, callout_arg_size);
9970
9971
0
SLJIT_ASSERT(common->capture_last_ptr != 0);
9972
0
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9973
0
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9974
0
value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9975
0
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9976
0
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9977
0
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9978
9979
/* These pointer sized fields temporarly stores internal variables. */
9980
0
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9981
9982
0
if (common->mark_ptr != 0)
9983
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9984
0
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9985
0
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9986
0
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9987
9988
0
if (*cc == OP_CALLOUT)
9989
0
  {
9990
0
  value1 = 0;
9991
0
  value2 = 0;
9992
0
  value3 = 0;
9993
0
  }
9994
0
else
9995
0
  {
9996
0
  value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9997
0
  value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9998
0
  value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9999
0
  }
10000
10001
0
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
10002
0
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
10003
0
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
10004
0
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
10005
10006
0
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10007
10008
/* Needed to save important temporary registers. */
10009
0
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
10010
/* SLJIT_R0 = arguments */
10011
0
OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
10012
0
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
10013
0
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
10014
0
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
10015
0
free_stack(common, callout_arg_size);
10016
10017
/* Check return value. */
10018
0
OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
10019
0
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));
10020
0
if (common->abort_label == NULL)
10021
0
  add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
10022
0
else
10023
0
  JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
10024
0
return cc + callout_length;
10025
0
}
10026
10027
#undef CALLOUT_ARG_SIZE
10028
#undef CALLOUT_ARG_OFFSET
10029
10030
static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10031
0
{
10032
0
DEFINE_COMPILER;
10033
0
backtrack_common *backtrack = NULL;
10034
0
jump_list **reverse_failed;
10035
0
unsigned int lmin, lmax;
10036
0
#ifdef SUPPORT_UNICODE
10037
0
struct sljit_jump *jump;
10038
0
struct sljit_label *label;
10039
0
#endif
10040
10041
0
SLJIT_ASSERT(parent->top == NULL);
10042
10043
0
if (*cc == OP_REVERSE)
10044
0
  {
10045
0
  reverse_failed = &parent->own_backtracks;
10046
0
  lmin = GET2(cc, 1);
10047
0
  lmax = lmin;
10048
0
  cc += 1 + IMM2_SIZE;
10049
10050
0
  SLJIT_ASSERT(lmin > 0);
10051
0
  }
10052
0
else
10053
0
  {
10054
0
  SLJIT_ASSERT(*cc == OP_VREVERSE);
10055
0
  PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL);
10056
10057
0
  reverse_failed = &backtrack->own_backtracks;
10058
0
  lmin = GET2(cc, 1);
10059
0
  lmax = GET2(cc, 1 + IMM2_SIZE);
10060
0
  cc += 1 + 2 * IMM2_SIZE;
10061
10062
0
  SLJIT_ASSERT(lmin < lmax);
10063
0
  }
10064
10065
0
if (HAS_VIRTUAL_REGISTERS)
10066
0
  {
10067
0
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
10068
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
10069
0
  }
10070
0
else
10071
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
10072
10073
0
#ifdef SUPPORT_UNICODE
10074
0
if (common->utf)
10075
0
  {
10076
0
  if (lmin > 0)
10077
0
    {
10078
0
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);
10079
0
    label = LABEL();
10080
0
    add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
10081
0
    move_back(common, reverse_failed, FALSE);
10082
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10083
0
    JUMPTO(SLJIT_NOT_ZERO, label);
10084
0
    }
10085
10086
0
  if (lmin < lmax)
10087
0
    {
10088
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10089
10090
0
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);
10091
0
    label = LABEL();
10092
0
    jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
10093
0
    move_back(common, reverse_failed, FALSE);
10094
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
10095
0
    JUMPTO(SLJIT_NOT_ZERO, label);
10096
10097
0
    JUMPHERE(jump);
10098
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10099
0
    }
10100
0
  }
10101
0
else
10102
0
#endif
10103
0
  {
10104
0
  if (lmin > 0)
10105
0
    {
10106
0
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));
10107
0
    add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
10108
0
    }
10109
10110
0
  if (lmin < lmax)
10111
0
    {
10112
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
10113
10114
0
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));
10115
0
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);
10116
0
    SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
10117
10118
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
10119
0
    }
10120
0
  }
10121
10122
0
check_start_used_ptr(common);
10123
10124
0
if (lmin < lmax)
10125
0
  BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();
10126
10127
0
return cc;
10128
0
}
10129
10130
static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
10131
0
{
10132
0
while (TRUE)
10133
0
  {
10134
0
  switch (*cc)
10135
0
    {
10136
0
    case OP_CALLOUT_STR:
10137
0
    cc += GET(cc, 1 + 2*LINK_SIZE);
10138
0
    break;
10139
10140
0
    case OP_NOT_WORD_BOUNDARY:
10141
0
    case OP_WORD_BOUNDARY:
10142
0
    case OP_CIRC:
10143
0
    case OP_CIRCM:
10144
0
    case OP_DOLL:
10145
0
    case OP_DOLLM:
10146
0
    case OP_CALLOUT:
10147
0
    case OP_ALT:
10148
0
    case OP_NOT_UCP_WORD_BOUNDARY:
10149
0
    case OP_UCP_WORD_BOUNDARY:
10150
0
    cc += PRIV(OP_lengths)[*cc];
10151
0
    break;
10152
10153
0
    case OP_KET:
10154
0
    return FALSE;
10155
10156
0
    default:
10157
0
    return TRUE;
10158
0
    }
10159
0
  }
10160
0
}
10161
10162
static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
10163
0
{
10164
0
DEFINE_COMPILER;
10165
0
int framesize;
10166
0
int extrasize;
10167
0
BOOL local_quit_available = FALSE;
10168
0
BOOL needs_control_head;
10169
0
BOOL end_block_size = 0;
10170
0
BOOL has_vreverse;
10171
0
int private_data_ptr;
10172
0
backtrack_common altbacktrack;
10173
0
PCRE2_SPTR ccbegin;
10174
0
PCRE2_UCHAR opcode;
10175
0
PCRE2_UCHAR bra = OP_BRA;
10176
0
jump_list *tmp = NULL;
10177
0
jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;
10178
0
jump_list **found;
10179
/* Saving previous accept variables. */
10180
0
BOOL save_local_quit_available = common->local_quit_available;
10181
0
BOOL save_in_positive_assertion = common->in_positive_assertion;
10182
0
then_trap_backtrack *save_then_trap = common->then_trap;
10183
0
struct sljit_label *save_quit_label = common->quit_label;
10184
0
struct sljit_label *save_accept_label = common->accept_label;
10185
0
jump_list *save_quit = common->quit;
10186
0
jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
10187
0
jump_list *save_accept = common->accept;
10188
0
struct sljit_jump *jump;
10189
0
struct sljit_jump *brajump = NULL;
10190
10191
/* Assert captures then. */
10192
0
common->then_trap = NULL;
10193
10194
0
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10195
0
  {
10196
0
  SLJIT_ASSERT(!conditional);
10197
0
  bra = *cc;
10198
0
  cc++;
10199
0
  }
10200
10201
0
private_data_ptr = PRIVATE_DATA(cc);
10202
0
SLJIT_ASSERT(private_data_ptr != 0);
10203
0
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10204
0
backtrack->framesize = framesize;
10205
0
backtrack->private_data_ptr = private_data_ptr;
10206
0
opcode = *cc;
10207
0
SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
10208
0
found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
10209
0
ccbegin = cc;
10210
0
cc += GET(cc, 1);
10211
10212
0
if (bra == OP_BRAMINZERO)
10213
0
  {
10214
  /* This is a braminzero backtrack path. */
10215
0
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10216
0
  free_stack(common, 1);
10217
0
  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10218
0
  }
10219
10220
0
if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))
10221
0
  end_block_size = 3;
10222
10223
0
if (framesize < 0)
10224
0
  {
10225
0
  extrasize = 1;
10226
0
  if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
10227
0
    extrasize = 0;
10228
10229
0
  extrasize += end_block_size;
10230
10231
0
  if (needs_control_head)
10232
0
    extrasize++;
10233
10234
0
  if (framesize == no_frame)
10235
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10236
10237
0
  if (extrasize > 0)
10238
0
    allocate_stack(common, extrasize);
10239
10240
0
  if (needs_control_head)
10241
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10242
10243
0
  if (extrasize > 0)
10244
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10245
10246
0
  if (needs_control_head)
10247
0
    {
10248
0
    SLJIT_ASSERT(extrasize == end_block_size + 2);
10249
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10250
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10251
0
    }
10252
0
  }
10253
0
else
10254
0
  {
10255
0
  extrasize = (needs_control_head ? 3 : 2) + end_block_size;
10256
10257
0
  OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
10258
0
  allocate_stack(common, framesize + extrasize);
10259
10260
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10261
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10262
0
  if (needs_control_head)
10263
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10264
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10265
10266
0
  if (needs_control_head)
10267
0
    {
10268
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);
10269
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);
10270
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
10271
0
    }
10272
0
  else
10273
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
10274
10275
0
  init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
10276
0
  }
10277
10278
0
if (end_block_size > 0)
10279
0
  {
10280
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);
10281
0
  OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
10282
0
  }
10283
10284
0
memset(&altbacktrack, 0, sizeof(backtrack_common));
10285
0
if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
10286
0
  {
10287
  /* Control verbs cannot escape from these asserts. */
10288
0
  local_quit_available = TRUE;
10289
0
  common->local_quit_available = TRUE;
10290
0
  common->quit_label = NULL;
10291
0
  common->quit = NULL;
10292
0
  }
10293
10294
0
common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
10295
0
common->positive_assertion_quit = NULL;
10296
10297
0
while (1)
10298
0
  {
10299
0
  common->accept_label = NULL;
10300
0
  common->accept = NULL;
10301
0
  altbacktrack.top = NULL;
10302
0
  altbacktrack.own_backtracks = NULL;
10303
10304
0
  if (*ccbegin == OP_ALT && extrasize > 0)
10305
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10306
10307
0
  altbacktrack.cc = ccbegin;
10308
0
  ccbegin += 1 + LINK_SIZE;
10309
10310
0
  has_vreverse = (*ccbegin == OP_VREVERSE);
10311
0
  if (*ccbegin == OP_REVERSE || has_vreverse)
10312
0
    ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);
10313
10314
0
  compile_matchingpath(common, ccbegin, cc, &altbacktrack);
10315
0
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10316
0
    {
10317
0
    if (local_quit_available)
10318
0
      {
10319
0
      common->local_quit_available = save_local_quit_available;
10320
0
      common->quit_label = save_quit_label;
10321
0
      common->quit = save_quit;
10322
0
      }
10323
0
    common->in_positive_assertion = save_in_positive_assertion;
10324
0
    common->then_trap = save_then_trap;
10325
0
    common->accept_label = save_accept_label;
10326
0
    common->positive_assertion_quit = save_positive_assertion_quit;
10327
0
    common->accept = save_accept;
10328
0
    return NULL;
10329
0
    }
10330
10331
0
  if (has_vreverse)
10332
0
    {
10333
0
    SLJIT_ASSERT(altbacktrack.top != NULL);
10334
0
    add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
10335
0
    }
10336
10337
0
  common->accept_label = LABEL();
10338
0
  if (common->accept != NULL)
10339
0
    set_jumps(common->accept, common->accept_label);
10340
10341
  /* Reset stack. */
10342
0
  if (framesize < 0)
10343
0
    {
10344
0
    if (framesize == no_frame)
10345
0
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10346
0
    else if (extrasize > 0)
10347
0
      free_stack(common, extrasize);
10348
10349
0
    if (end_block_size > 0)
10350
0
      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10351
10352
0
    if (needs_control_head)
10353
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10354
0
    }
10355
0
  else
10356
0
    {
10357
0
    if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
10358
0
      {
10359
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10360
0
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10361
10362
0
      if (end_block_size > 0)
10363
0
        OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));
10364
10365
0
      if (needs_control_head)
10366
0
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10367
0
      }
10368
0
    else
10369
0
      {
10370
0
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10371
10372
0
      if (end_block_size > 0)
10373
0
        OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));
10374
10375
0
      if (needs_control_head)
10376
0
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
10377
0
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10378
0
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10379
0
      }
10380
0
    }
10381
10382
0
  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
10383
0
    {
10384
    /* We know that STR_PTR was stored on the top of the stack. */
10385
0
    if (conditional)
10386
0
      {
10387
0
      if (extrasize > 0)
10388
0
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));
10389
0
      }
10390
0
    else if (bra == OP_BRAZERO)
10391
0
      {
10392
0
      if (framesize < 0)
10393
0
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10394
0
      else
10395
0
        {
10396
0
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10397
0
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
10398
0
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10399
0
        }
10400
0
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10401
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10402
0
      }
10403
0
    else if (framesize >= 0)
10404
0
      {
10405
      /* For OP_BRA and OP_BRAMINZERO. */
10406
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10407
0
      }
10408
0
    }
10409
0
  add_jump(compiler, found, JUMP(SLJIT_JUMP));
10410
10411
0
  compile_backtrackingpath(common, altbacktrack.top);
10412
0
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10413
0
    {
10414
0
    if (local_quit_available)
10415
0
      {
10416
0
      common->local_quit_available = save_local_quit_available;
10417
0
      common->quit_label = save_quit_label;
10418
0
      common->quit = save_quit;
10419
0
      }
10420
0
    common->in_positive_assertion = save_in_positive_assertion;
10421
0
    common->then_trap = save_then_trap;
10422
0
    common->accept_label = save_accept_label;
10423
0
    common->positive_assertion_quit = save_positive_assertion_quit;
10424
0
    common->accept = save_accept;
10425
0
    return NULL;
10426
0
    }
10427
0
  set_jumps(altbacktrack.own_backtracks, LABEL());
10428
10429
0
  if (*cc != OP_ALT)
10430
0
    break;
10431
10432
0
  ccbegin = cc;
10433
0
  cc += GET(cc, 1);
10434
0
  }
10435
10436
0
if (local_quit_available)
10437
0
  {
10438
0
  SLJIT_ASSERT(common->positive_assertion_quit == NULL);
10439
  /* Makes the check less complicated below. */
10440
0
  common->positive_assertion_quit = common->quit;
10441
0
  }
10442
10443
/* None of them matched. */
10444
0
if (common->positive_assertion_quit != NULL)
10445
0
  {
10446
0
  jump = JUMP(SLJIT_JUMP);
10447
0
  set_jumps(common->positive_assertion_quit, LABEL());
10448
0
  SLJIT_ASSERT(framesize != no_stack);
10449
0
  if (framesize < 0)
10450
0
    OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
10451
0
  else
10452
0
    {
10453
0
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10454
0
    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10455
0
    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
10456
0
    }
10457
0
  JUMPHERE(jump);
10458
0
  }
10459
10460
0
if (end_block_size > 0)
10461
0
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10462
10463
0
if (needs_control_head)
10464
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));
10465
10466
0
if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
10467
0
  {
10468
  /* Assert is failed. */
10469
0
  if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
10470
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10471
10472
0
  if (framesize < 0)
10473
0
    {
10474
    /* The topmost item should be 0. */
10475
0
    if (bra == OP_BRAZERO)
10476
0
      {
10477
0
      if (extrasize >= 2)
10478
0
        free_stack(common, extrasize - 1);
10479
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10480
0
      }
10481
0
    else if (extrasize > 0)
10482
0
      free_stack(common, extrasize);
10483
0
    }
10484
0
  else
10485
0
    {
10486
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10487
    /* The topmost item should be 0. */
10488
0
    if (bra == OP_BRAZERO)
10489
0
      {
10490
0
      free_stack(common, framesize + extrasize - 1);
10491
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10492
0
      }
10493
0
    else
10494
0
      free_stack(common, framesize + extrasize);
10495
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10496
0
    }
10497
0
  jump = JUMP(SLJIT_JUMP);
10498
0
  if (bra != OP_BRAZERO)
10499
0
    add_jump(compiler, target, jump);
10500
10501
  /* Assert is successful. */
10502
0
  set_jumps(tmp, LABEL());
10503
0
  if (framesize < 0)
10504
0
    {
10505
    /* We know that STR_PTR was stored on the top of the stack. */
10506
0
    if (extrasize > 0)
10507
0
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10508
10509
    /* Keep the STR_PTR on the top of the stack. */
10510
0
    if (bra == OP_BRAZERO)
10511
0
      {
10512
      /* This allocation is always successful. */
10513
0
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10514
0
      if (extrasize >= 2)
10515
0
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10516
0
      }
10517
0
    else if (bra == OP_BRAMINZERO)
10518
0
      {
10519
0
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10520
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10521
0
      }
10522
0
    }
10523
0
  else
10524
0
    {
10525
0
    if (bra == OP_BRA)
10526
0
      {
10527
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10528
0
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10529
0
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10530
0
      }
10531
0
    else
10532
0
      {
10533
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10534
0
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));
10535
10536
0
      if (extrasize == 2 + end_block_size)
10537
0
        {
10538
0
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10539
0
        if (bra == OP_BRAMINZERO)
10540
0
          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10541
0
        }
10542
0
      else
10543
0
        {
10544
0
        SLJIT_ASSERT(extrasize == 3 + end_block_size);
10545
0
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10546
0
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10547
0
        }
10548
0
      }
10549
0
    }
10550
10551
0
  if (bra == OP_BRAZERO)
10552
0
    {
10553
0
    backtrack->matchingpath = LABEL();
10554
0
    SET_LABEL(jump, backtrack->matchingpath);
10555
0
    }
10556
0
  else if (bra == OP_BRAMINZERO)
10557
0
    {
10558
0
    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10559
0
    JUMPHERE(brajump);
10560
0
    if (framesize >= 0)
10561
0
      {
10562
0
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10563
0
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10564
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10565
0
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10566
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10567
0
      }
10568
0
    set_jumps(backtrack->common.own_backtracks, LABEL());
10569
0
    }
10570
0
  }
10571
0
else
10572
0
  {
10573
  /* AssertNot is successful. */
10574
0
  if (framesize < 0)
10575
0
    {
10576
0
    if (extrasize > 0)
10577
0
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10578
10579
0
    if (bra != OP_BRA)
10580
0
      {
10581
0
      if (extrasize >= 2)
10582
0
        free_stack(common, extrasize - 1);
10583
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10584
0
      }
10585
0
    else if (extrasize > 0)
10586
0
      free_stack(common, extrasize);
10587
0
    }
10588
0
  else
10589
0
    {
10590
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10591
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10592
    /* The topmost item should be 0. */
10593
0
    if (bra != OP_BRA)
10594
0
      {
10595
0
      free_stack(common, framesize + extrasize - 1);
10596
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10597
0
      }
10598
0
    else
10599
0
      free_stack(common, framesize + extrasize);
10600
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10601
0
    }
10602
10603
0
  if (bra == OP_BRAZERO)
10604
0
    backtrack->matchingpath = LABEL();
10605
0
  else if (bra == OP_BRAMINZERO)
10606
0
    {
10607
0
    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10608
0
    JUMPHERE(brajump);
10609
0
    }
10610
10611
0
  if (bra != OP_BRA)
10612
0
    {
10613
0
    SLJIT_ASSERT(found == &backtrack->common.own_backtracks);
10614
0
    set_jumps(backtrack->common.own_backtracks, LABEL());
10615
0
    backtrack->common.own_backtracks = NULL;
10616
0
    }
10617
0
  }
10618
10619
0
if (local_quit_available)
10620
0
  {
10621
0
  common->local_quit_available = save_local_quit_available;
10622
0
  common->quit_label = save_quit_label;
10623
0
  common->quit = save_quit;
10624
0
  }
10625
0
common->in_positive_assertion = save_in_positive_assertion;
10626
0
common->then_trap = save_then_trap;
10627
0
common->accept_label = save_accept_label;
10628
0
common->positive_assertion_quit = save_positive_assertion_quit;
10629
0
common->accept = save_accept;
10630
0
return cc + 1 + LINK_SIZE;
10631
0
}
10632
10633
static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10634
0
{
10635
0
DEFINE_COMPILER;
10636
0
int stacksize;
10637
10638
0
if (framesize < 0)
10639
0
  {
10640
0
  if (framesize == no_frame)
10641
0
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10642
0
  else
10643
0
    {
10644
0
    stacksize = needs_control_head ? 1 : 0;
10645
0
    if (ket != OP_KET || has_alternatives)
10646
0
      stacksize++;
10647
10648
0
    if (stacksize > 0)
10649
0
      free_stack(common, stacksize);
10650
0
    }
10651
10652
0
  if (needs_control_head)
10653
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10654
10655
  /* TMP2 which is set here used by OP_KETRMAX below. */
10656
0
  if (ket == OP_KETRMAX)
10657
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10658
0
  else if (ket == OP_KETRMIN)
10659
0
    {
10660
    /* Move the STR_PTR to the private_data_ptr. */
10661
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10662
0
    }
10663
0
  }
10664
0
else
10665
0
  {
10666
0
  stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10667
0
  OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10668
0
  if (needs_control_head)
10669
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10670
10671
0
  if (ket == OP_KETRMAX)
10672
0
    {
10673
    /* TMP2 which is set here used by OP_KETRMAX below. */
10674
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10675
0
    }
10676
0
  }
10677
0
if (needs_control_head)
10678
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10679
0
}
10680
10681
static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10682
0
{
10683
0
DEFINE_COMPILER;
10684
10685
0
if (common->capture_last_ptr != 0)
10686
0
  {
10687
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10688
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10689
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10690
0
  stacksize++;
10691
0
  }
10692
0
if (common->optimized_cbracket[offset >> 1] == 0)
10693
0
  {
10694
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10695
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10696
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10697
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10698
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10699
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10700
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10701
0
  stacksize += 2;
10702
0
  }
10703
0
return stacksize;
10704
0
}
10705
10706
static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10707
0
{
10708
0
  if (PRIV(script_run)(ptr, endptr, FALSE))
10709
0
    return endptr;
10710
0
  return NULL;
10711
0
}
10712
10713
#ifdef SUPPORT_UNICODE
10714
10715
static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10716
0
{
10717
0
  if (PRIV(script_run)(ptr, endptr, TRUE))
10718
0
    return endptr;
10719
0
  return NULL;
10720
0
}
10721
10722
#endif /* SUPPORT_UNICODE */
10723
10724
static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10725
0
{
10726
0
DEFINE_COMPILER;
10727
10728
0
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10729
10730
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10731
0
#ifdef SUPPORT_UNICODE
10732
0
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
10733
0
  common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
10734
#else
10735
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
10736
#endif
10737
10738
0
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10739
0
add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10740
0
}
10741
10742
/*
10743
  Handling bracketed expressions is probably the most complex part.
10744
10745
  Stack layout naming characters:
10746
    S - Push the current STR_PTR
10747
    0 - Push a 0 (NULL)
10748
    A - Push the current STR_PTR. Needed for restoring the STR_PTR
10749
        before the next alternative. Not pushed if there are no alternatives.
10750
    M - Any values pushed by the current alternative. Can be empty, or anything.
10751
    C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10752
    L - Push the previous local (pointed by localptr) to the stack
10753
   () - opional values stored on the stack
10754
  ()* - optonal, can be stored multiple times
10755
10756
  The following list shows the regular expression templates, their PCRE byte codes
10757
  and stack layout supported by pcre-sljit.
10758
10759
  (?:)                     OP_BRA     | OP_KET                A M
10760
  ()                       OP_CBRA    | OP_KET                C M
10761
  (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
10762
                           OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
10763
  (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
10764
                           OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
10765
  ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
10766
                           OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
10767
  ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
10768
                           OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
10769
  (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
10770
  (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
10771
  ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
10772
  ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
10773
  (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
10774
           OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
10775
  (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
10776
           OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
10777
  ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
10778
           OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
10779
  ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
10780
           OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
10781
10782
10783
  Stack layout naming characters:
10784
    A - Push the alternative index (starting from 0) on the stack.
10785
        Not pushed if there is no alternatives.
10786
    M - Any values pushed by the current alternative. Can be empty, or anything.
10787
10788
  The next list shows the possible content of a bracket:
10789
  (|)     OP_*BRA    | OP_ALT ...         M A
10790
  (?()|)  OP_*COND   | OP_ALT             M A
10791
  (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
10792
                                          Or nothing, if trace is unnecessary
10793
*/
10794
10795
static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10796
0
{
10797
0
DEFINE_COMPILER;
10798
0
backtrack_common *backtrack;
10799
0
PCRE2_UCHAR opcode;
10800
0
int private_data_ptr = 0;
10801
0
int offset = 0;
10802
0
int i, stacksize;
10803
0
int repeat_ptr = 0, repeat_length = 0;
10804
0
int repeat_type = 0, repeat_count = 0;
10805
0
PCRE2_SPTR ccbegin;
10806
0
PCRE2_SPTR matchingpath;
10807
0
PCRE2_SPTR slot;
10808
0
PCRE2_UCHAR bra = OP_BRA;
10809
0
PCRE2_UCHAR ket;
10810
0
assert_backtrack *assert;
10811
0
BOOL has_alternatives;
10812
0
BOOL needs_control_head = FALSE;
10813
0
BOOL has_vreverse = FALSE;
10814
0
struct sljit_jump *jump;
10815
0
struct sljit_jump *skip;
10816
0
struct sljit_label *rmax_label = NULL;
10817
0
struct sljit_jump *braminzero = NULL;
10818
10819
0
PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10820
10821
0
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10822
0
  {
10823
0
  bra = *cc;
10824
0
  cc++;
10825
0
  opcode = *cc;
10826
0
  }
10827
10828
0
opcode = *cc;
10829
0
ccbegin = cc;
10830
0
matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10831
0
ket = *matchingpath;
10832
0
if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10833
0
  {
10834
0
  repeat_ptr = PRIVATE_DATA(matchingpath);
10835
0
  repeat_length = PRIVATE_DATA(matchingpath + 1);
10836
0
  repeat_type = PRIVATE_DATA(matchingpath + 2);
10837
0
  repeat_count = PRIVATE_DATA(matchingpath + 3);
10838
0
  SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10839
0
  if (repeat_type == OP_UPTO)
10840
0
    ket = OP_KETRMAX;
10841
0
  if (repeat_type == OP_MINUPTO)
10842
0
    ket = OP_KETRMIN;
10843
0
  }
10844
10845
0
matchingpath = ccbegin + 1 + LINK_SIZE;
10846
0
SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10847
0
SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10848
0
cc += GET(cc, 1);
10849
10850
0
has_alternatives = *cc == OP_ALT;
10851
0
if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10852
0
  {
10853
0
  SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10854
0
    compile_time_checks_must_be_grouped_together);
10855
0
  has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10856
0
  }
10857
10858
0
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10859
0
  opcode = OP_SCOND;
10860
10861
0
if (opcode == OP_CBRA || opcode == OP_SCBRA)
10862
0
  {
10863
  /* Capturing brackets has a pre-allocated space. */
10864
0
  offset = GET2(ccbegin, 1 + LINK_SIZE);
10865
0
  if (common->optimized_cbracket[offset] == 0)
10866
0
    {
10867
0
    private_data_ptr = OVECTOR_PRIV(offset);
10868
0
    offset <<= 1;
10869
0
    }
10870
0
  else
10871
0
    {
10872
0
    offset <<= 1;
10873
0
    private_data_ptr = OVECTOR(offset);
10874
0
    }
10875
0
  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10876
0
  matchingpath += IMM2_SIZE;
10877
0
  }
10878
0
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10879
0
  {
10880
  /* Other brackets simply allocate the next entry. */
10881
0
  private_data_ptr = PRIVATE_DATA(ccbegin);
10882
0
  SLJIT_ASSERT(private_data_ptr != 0);
10883
0
  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10884
0
  if (opcode == OP_ONCE)
10885
0
    BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10886
0
  }
10887
10888
/* Instructions before the first alternative. */
10889
0
stacksize = 0;
10890
0
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10891
0
  stacksize++;
10892
0
if (bra == OP_BRAZERO)
10893
0
  stacksize++;
10894
10895
0
if (stacksize > 0)
10896
0
  allocate_stack(common, stacksize);
10897
10898
0
stacksize = 0;
10899
0
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10900
0
  {
10901
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10902
0
  stacksize++;
10903
0
  }
10904
10905
0
if (bra == OP_BRAZERO)
10906
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10907
10908
0
if (bra == OP_BRAMINZERO)
10909
0
  {
10910
  /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10911
0
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10912
0
  if (ket != OP_KETRMIN)
10913
0
    {
10914
0
    free_stack(common, 1);
10915
0
    braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10916
0
    }
10917
0
  else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10918
0
    {
10919
0
    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10920
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10921
    /* Nothing stored during the first run. */
10922
0
    skip = JUMP(SLJIT_JUMP);
10923
0
    JUMPHERE(jump);
10924
    /* Checking zero-length iteration. */
10925
0
    if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10926
0
      {
10927
      /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10928
0
      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10929
0
      }
10930
0
    else
10931
0
      {
10932
      /* Except when the whole stack frame must be saved. */
10933
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10934
0
      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10935
0
      }
10936
0
    JUMPHERE(skip);
10937
0
    }
10938
0
  else
10939
0
    {
10940
0
    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10941
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10942
0
    JUMPHERE(jump);
10943
0
    }
10944
0
  }
10945
10946
0
if (repeat_type != 0)
10947
0
  {
10948
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10949
0
  if (repeat_type == OP_EXACT)
10950
0
    rmax_label = LABEL();
10951
0
  }
10952
10953
0
if (ket == OP_KETRMIN)
10954
0
  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10955
10956
0
if (ket == OP_KETRMAX)
10957
0
  {
10958
0
  rmax_label = LABEL();
10959
0
  if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10960
0
    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10961
0
  }
10962
10963
/* Handling capturing brackets and alternatives. */
10964
0
if (opcode == OP_ONCE)
10965
0
  {
10966
0
  stacksize = 0;
10967
0
  if (needs_control_head)
10968
0
    {
10969
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10970
0
    stacksize++;
10971
0
    }
10972
10973
0
  if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10974
0
    {
10975
    /* Neither capturing brackets nor recursions are found in the block. */
10976
0
    if (ket == OP_KETRMIN)
10977
0
      {
10978
0
      stacksize += 2;
10979
0
      if (!needs_control_head)
10980
0
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10981
0
      }
10982
0
    else
10983
0
      {
10984
0
      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10985
0
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10986
0
      if (ket == OP_KETRMAX || has_alternatives)
10987
0
        stacksize++;
10988
0
      }
10989
10990
0
    if (stacksize > 0)
10991
0
      allocate_stack(common, stacksize);
10992
10993
0
    stacksize = 0;
10994
0
    if (needs_control_head)
10995
0
      {
10996
0
      stacksize++;
10997
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10998
0
      }
10999
11000
0
    if (ket == OP_KETRMIN)
11001
0
      {
11002
0
      if (needs_control_head)
11003
0
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11004
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11005
0
      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
11006
0
        OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
11007
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
11008
0
      }
11009
0
    else if (ket == OP_KETRMAX || has_alternatives)
11010
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11011
0
    }
11012
0
  else
11013
0
    {
11014
0
    if (ket != OP_KET || has_alternatives)
11015
0
      stacksize++;
11016
11017
0
    stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
11018
0
    allocate_stack(common, stacksize);
11019
11020
0
    if (needs_control_head)
11021
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11022
11023
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11024
0
    OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11025
11026
0
    stacksize = needs_control_head ? 1 : 0;
11027
0
    if (ket != OP_KET || has_alternatives)
11028
0
      {
11029
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11030
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
11031
0
      stacksize++;
11032
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
11033
0
      }
11034
0
    else
11035
0
      {
11036
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
11037
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
11038
0
      }
11039
0
    init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
11040
0
    }
11041
0
  }
11042
0
else if (opcode == OP_CBRA || opcode == OP_SCBRA)
11043
0
  {
11044
  /* Saving the previous values. */
11045
0
  if (common->optimized_cbracket[offset >> 1] != 0)
11046
0
    {
11047
0
    SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
11048
0
    allocate_stack(common, 2);
11049
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11050
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11051
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11052
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11053
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11054
0
    }
11055
0
  else
11056
0
    {
11057
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11058
0
    allocate_stack(common, 1);
11059
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11060
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11061
0
    }
11062
0
  }
11063
0
else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
11064
0
  {
11065
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11066
0
  allocate_stack(common, 4);
11067
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11068
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11069
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
11070
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11071
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
11072
0
  OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
11073
11074
0
  has_vreverse = (*matchingpath == OP_VREVERSE);
11075
0
  if (*matchingpath == OP_REVERSE || has_vreverse)
11076
0
    matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11077
0
  }
11078
0
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
11079
0
  {
11080
  /* Saving the previous value. */
11081
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11082
0
  allocate_stack(common, 1);
11083
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
11084
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11085
11086
0
  if (*matchingpath == OP_REVERSE)
11087
0
    matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
11088
0
  }
11089
0
else if (has_alternatives)
11090
0
  {
11091
  /* Pushing the starting string pointer. */
11092
0
  allocate_stack(common, 1);
11093
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11094
0
  }
11095
11096
/* Generating code for the first alternative. */
11097
0
if (opcode == OP_COND || opcode == OP_SCOND)
11098
0
  {
11099
0
  if (*matchingpath == OP_CREF)
11100
0
    {
11101
0
    SLJIT_ASSERT(has_alternatives);
11102
0
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
11103
0
      CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
11104
0
    matchingpath += 1 + IMM2_SIZE;
11105
0
    }
11106
0
  else if (*matchingpath == OP_DNCREF)
11107
0
    {
11108
0
    SLJIT_ASSERT(has_alternatives);
11109
11110
0
    i = GET2(matchingpath, 1 + IMM2_SIZE);
11111
0
    slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11112
0
    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
11113
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
11114
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11115
0
    slot += common->name_entry_size;
11116
0
    i--;
11117
0
    while (i-- > 0)
11118
0
      {
11119
0
      OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
11120
0
      OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
11121
0
      slot += common->name_entry_size;
11122
0
      }
11123
0
    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11124
0
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
11125
0
    matchingpath += 1 + 2 * IMM2_SIZE;
11126
0
    }
11127
0
  else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
11128
0
    {
11129
    /* Never has other case. */
11130
0
    BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
11131
0
    SLJIT_ASSERT(!has_alternatives);
11132
11133
0
    if (*matchingpath == OP_TRUE)
11134
0
      {
11135
0
      stacksize = 1;
11136
0
      matchingpath++;
11137
0
      }
11138
0
    else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
11139
0
      stacksize = 0;
11140
0
    else if (*matchingpath == OP_RREF)
11141
0
      {
11142
0
      stacksize = GET2(matchingpath, 1);
11143
0
      if (common->currententry == NULL)
11144
0
        stacksize = 0;
11145
0
      else if (stacksize == RREF_ANY)
11146
0
        stacksize = 1;
11147
0
      else if (common->currententry->start == 0)
11148
0
        stacksize = stacksize == 0;
11149
0
      else
11150
0
        stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11151
11152
0
      if (stacksize != 0)
11153
0
        matchingpath += 1 + IMM2_SIZE;
11154
0
      }
11155
0
    else
11156
0
      {
11157
0
      if (common->currententry == NULL || common->currententry->start == 0)
11158
0
        stacksize = 0;
11159
0
      else
11160
0
        {
11161
0
        stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
11162
0
        slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
11163
0
        i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
11164
0
        while (stacksize > 0)
11165
0
          {
11166
0
          if ((int)GET2(slot, 0) == i)
11167
0
            break;
11168
0
          slot += common->name_entry_size;
11169
0
          stacksize--;
11170
0
          }
11171
0
        }
11172
11173
0
      if (stacksize != 0)
11174
0
        matchingpath += 1 + 2 * IMM2_SIZE;
11175
0
      }
11176
11177
      /* The stacksize == 0 is a common "else" case. */
11178
0
      if (stacksize == 0)
11179
0
        {
11180
0
        if (*cc == OP_ALT)
11181
0
          {
11182
0
          matchingpath = cc + 1 + LINK_SIZE;
11183
0
          cc += GET(cc, 1);
11184
0
          }
11185
0
        else
11186
0
          matchingpath = cc;
11187
0
        }
11188
0
    }
11189
0
  else
11190
0
    {
11191
0
    SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
11192
    /* Similar code as PUSH_BACKTRACK macro. */
11193
0
    assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
11194
0
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11195
0
      return NULL;
11196
0
    memset(assert, 0, sizeof(assert_backtrack));
11197
0
    assert->common.cc = matchingpath;
11198
0
    BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
11199
0
    matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
11200
0
    }
11201
0
  }
11202
11203
0
compile_matchingpath(common, matchingpath, cc, backtrack);
11204
0
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11205
0
  return NULL;
11206
11207
0
switch (opcode)
11208
0
  {
11209
0
  case OP_ASSERTBACK_NA:
11210
0
    if (has_vreverse)
11211
0
      {
11212
0
      SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));
11213
0
      add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
11214
0
      }
11215
11216
0
    if (PRIVATE_DATA(ccbegin + 1))
11217
0
      OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
11218
0
    break;
11219
0
  case OP_ASSERT_NA:
11220
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11221
0
    break;
11222
0
  case OP_ONCE:
11223
0
    match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
11224
0
    break;
11225
0
  case OP_SCRIPT_RUN:
11226
0
    match_script_run_common(common, private_data_ptr, backtrack);
11227
0
    break;
11228
0
  }
11229
11230
0
stacksize = 0;
11231
0
if (repeat_type == OP_MINUPTO)
11232
0
  {
11233
  /* We need to preserve the counter. TMP2 will be used below. */
11234
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11235
0
  stacksize++;
11236
0
  }
11237
0
if (ket != OP_KET || bra != OP_BRA)
11238
0
  stacksize++;
11239
0
if (offset != 0)
11240
0
  {
11241
0
  if (common->capture_last_ptr != 0)
11242
0
    stacksize++;
11243
0
  if (common->optimized_cbracket[offset >> 1] == 0)
11244
0
    stacksize += 2;
11245
0
  }
11246
0
if (has_alternatives && opcode != OP_ONCE)
11247
0
  stacksize++;
11248
11249
0
if (stacksize > 0)
11250
0
  allocate_stack(common, stacksize);
11251
11252
0
stacksize = 0;
11253
0
if (repeat_type == OP_MINUPTO)
11254
0
  {
11255
  /* TMP2 was set above. */
11256
0
  OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
11257
0
  stacksize++;
11258
0
  }
11259
11260
0
if (ket != OP_KET || bra != OP_BRA)
11261
0
  {
11262
0
  if (ket != OP_KET)
11263
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
11264
0
  else
11265
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11266
0
  stacksize++;
11267
0
  }
11268
11269
0
if (offset != 0)
11270
0
  stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
11271
11272
/* Skip and count the other alternatives. */
11273
0
i = 1;
11274
0
while (*cc == OP_ALT)
11275
0
  {
11276
0
  cc += GET(cc, 1);
11277
0
  i++;
11278
0
  }
11279
11280
0
if (has_alternatives)
11281
0
  {
11282
0
  if (opcode != OP_ONCE)
11283
0
    {
11284
0
    if (i <= 3)
11285
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
11286
0
    else
11287
0
      BACKTRACK_AS(bracket_backtrack)->u.matching_mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
11288
0
    }
11289
0
  if (ket != OP_KETRMAX)
11290
0
    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11291
0
  }
11292
11293
/* Must be after the matchingpath label. */
11294
0
if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
11295
0
  {
11296
0
  SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
11297
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11298
0
  }
11299
11300
0
if (ket == OP_KETRMAX)
11301
0
  {
11302
0
  if (repeat_type != 0)
11303
0
    {
11304
0
    if (has_alternatives)
11305
0
      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11306
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11307
0
    JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11308
    /* Drop STR_PTR for greedy plus quantifier. */
11309
0
    if (opcode != OP_ONCE)
11310
0
      free_stack(common, 1);
11311
0
    }
11312
0
  else if (opcode < OP_BRA || opcode >= OP_SBRA)
11313
0
    {
11314
0
    if (has_alternatives)
11315
0
      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
11316
11317
    /* Checking zero-length iteration. */
11318
0
    if (opcode != OP_ONCE)
11319
0
      {
11320
      /* This case includes opcodes such as OP_SCRIPT_RUN. */
11321
0
      CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
11322
      /* Drop STR_PTR for greedy plus quantifier. */
11323
0
      if (bra != OP_BRAZERO)
11324
0
        free_stack(common, 1);
11325
0
      }
11326
0
    else
11327
      /* TMP2 must contain the starting STR_PTR. */
11328
0
      CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
11329
0
    }
11330
0
  else
11331
0
    JUMPTO(SLJIT_JUMP, rmax_label);
11332
0
  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
11333
0
  }
11334
11335
0
if (repeat_type == OP_EXACT)
11336
0
  {
11337
0
  count_match(common);
11338
0
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
11339
0
  JUMPTO(SLJIT_NOT_ZERO, rmax_label);
11340
0
  }
11341
0
else if (repeat_type == OP_UPTO)
11342
0
  {
11343
  /* We need to preserve the counter. */
11344
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
11345
0
  allocate_stack(common, 1);
11346
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11347
0
  }
11348
11349
0
if (bra == OP_BRAZERO)
11350
0
  BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
11351
11352
0
if (bra == OP_BRAMINZERO)
11353
0
  {
11354
  /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
11355
0
  JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
11356
0
  if (braminzero != NULL)
11357
0
    {
11358
0
    JUMPHERE(braminzero);
11359
    /* We need to release the end pointer to perform the
11360
    backtrack for the zero-length iteration. When
11361
    framesize is < 0, OP_ONCE will do the release itself. */
11362
0
    if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
11363
0
      {
11364
0
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11365
0
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
11366
0
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
11367
0
      }
11368
0
    else if (ket == OP_KETRMIN && opcode != OP_ONCE)
11369
0
      free_stack(common, 1);
11370
0
    }
11371
  /* Continue to the normal backtrack. */
11372
0
  }
11373
11374
0
if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT))
11375
0
  count_match(common);
11376
11377
0
cc += 1 + LINK_SIZE;
11378
11379
0
if (opcode == OP_ONCE)
11380
0
  {
11381
0
  int data;
11382
0
  int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;
11383
11384
0
  SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2);
11385
  /* We temporarily encode the needs_control_head in the lowest bit.
11386
     The real value should be short enough for this operation to work
11387
     without triggering Undefined Behaviour. */
11388
0
  data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0));
11389
0
  BACKTRACK_AS(bracket_backtrack)->u.framesize = data;
11390
0
  }
11391
0
return cc + repeat_length;
11392
0
}
11393
11394
static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11395
0
{
11396
0
DEFINE_COMPILER;
11397
0
backtrack_common *backtrack;
11398
0
PCRE2_UCHAR opcode;
11399
0
int private_data_ptr;
11400
0
int cbraprivptr = 0;
11401
0
BOOL needs_control_head;
11402
0
int framesize;
11403
0
int stacksize;
11404
0
int offset = 0;
11405
0
BOOL zero = FALSE;
11406
0
PCRE2_SPTR ccbegin = NULL;
11407
0
int stack; /* Also contains the offset of control head. */
11408
0
struct sljit_label *loop = NULL;
11409
0
struct jump_list *emptymatch = NULL;
11410
11411
0
PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
11412
0
if (*cc == OP_BRAPOSZERO)
11413
0
  {
11414
0
  zero = TRUE;
11415
0
  cc++;
11416
0
  }
11417
11418
0
opcode = *cc;
11419
0
private_data_ptr = PRIVATE_DATA(cc);
11420
0
SLJIT_ASSERT(private_data_ptr != 0);
11421
0
BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
11422
0
switch(opcode)
11423
0
  {
11424
0
  case OP_BRAPOS:
11425
0
  case OP_SBRAPOS:
11426
0
  ccbegin = cc + 1 + LINK_SIZE;
11427
0
  break;
11428
11429
0
  case OP_CBRAPOS:
11430
0
  case OP_SCBRAPOS:
11431
0
  offset = GET2(cc, 1 + LINK_SIZE);
11432
  /* This case cannot be optimized in the same way as
11433
  normal capturing brackets. */
11434
0
  SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
11435
0
  cbraprivptr = OVECTOR_PRIV(offset);
11436
0
  offset <<= 1;
11437
0
  ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
11438
0
  break;
11439
11440
0
  default:
11441
0
  SLJIT_UNREACHABLE();
11442
0
  break;
11443
0
  }
11444
11445
0
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
11446
0
BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
11447
0
if (framesize < 0)
11448
0
  {
11449
0
  if (offset != 0)
11450
0
    {
11451
0
    stacksize = 2;
11452
0
    if (common->capture_last_ptr != 0)
11453
0
      stacksize++;
11454
0
    }
11455
0
  else
11456
0
    stacksize = 1;
11457
11458
0
  if (needs_control_head)
11459
0
    stacksize++;
11460
0
  if (!zero)
11461
0
    stacksize++;
11462
11463
0
  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11464
0
  allocate_stack(common, stacksize);
11465
0
  if (framesize == no_frame)
11466
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
11467
11468
0
  stack = 0;
11469
0
  if (offset != 0)
11470
0
    {
11471
0
    stack = 2;
11472
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
11473
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
11474
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11475
0
    if (common->capture_last_ptr != 0)
11476
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
11477
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11478
0
    if (needs_control_head)
11479
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11480
0
    if (common->capture_last_ptr != 0)
11481
0
      {
11482
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
11483
0
      stack = 3;
11484
0
      }
11485
0
    }
11486
0
  else
11487
0
    {
11488
0
    if (needs_control_head)
11489
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11490
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11491
0
    stack = 1;
11492
0
    }
11493
11494
0
  if (needs_control_head)
11495
0
    stack++;
11496
0
  if (!zero)
11497
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
11498
0
  if (needs_control_head)
11499
0
    {
11500
0
    stack--;
11501
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11502
0
    }
11503
0
  }
11504
0
else
11505
0
  {
11506
0
  stacksize = framesize + 1;
11507
0
  if (!zero)
11508
0
    stacksize++;
11509
0
  if (needs_control_head)
11510
0
    stacksize++;
11511
0
  if (offset == 0)
11512
0
    stacksize++;
11513
0
  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11514
11515
0
  allocate_stack(common, stacksize);
11516
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11517
0
  if (needs_control_head)
11518
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11519
0
  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11520
11521
0
  stack = 0;
11522
0
  if (!zero)
11523
0
    {
11524
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11525
0
    stack = 1;
11526
0
    }
11527
0
  if (needs_control_head)
11528
0
    {
11529
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11530
0
    stack++;
11531
0
    }
11532
0
  if (offset == 0)
11533
0
    {
11534
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11535
0
    stack++;
11536
0
    }
11537
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11538
0
  init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11539
0
  stack -= 1 + (offset == 0);
11540
0
  }
11541
11542
0
if (offset != 0)
11543
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11544
11545
0
loop = LABEL();
11546
0
while (*cc != OP_KETRPOS)
11547
0
  {
11548
0
  backtrack->top = NULL;
11549
0
  backtrack->own_backtracks = NULL;
11550
0
  cc += GET(cc, 1);
11551
11552
0
  compile_matchingpath(common, ccbegin, cc, backtrack);
11553
0
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11554
0
    return NULL;
11555
11556
0
  if (framesize < 0)
11557
0
    {
11558
0
    if (framesize == no_frame)
11559
0
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11560
11561
0
    if (offset != 0)
11562
0
      {
11563
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11564
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11565
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11566
0
      if (common->capture_last_ptr != 0)
11567
0
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11568
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11569
0
      }
11570
0
    else
11571
0
      {
11572
0
      if (opcode == OP_SBRAPOS)
11573
0
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11574
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11575
0
      }
11576
11577
    /* Even if the match is empty, we need to reset the control head. */
11578
0
    if (needs_control_head)
11579
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11580
11581
0
    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11582
0
      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11583
11584
0
    if (!zero)
11585
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11586
0
    }
11587
0
  else
11588
0
    {
11589
0
    if (offset != 0)
11590
0
      {
11591
0
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11592
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11593
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11594
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11595
0
      if (common->capture_last_ptr != 0)
11596
0
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11597
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11598
0
      }
11599
0
    else
11600
0
      {
11601
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11602
0
      OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11603
0
      if (opcode == OP_SBRAPOS)
11604
0
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11605
0
      OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11606
0
      }
11607
11608
    /* Even if the match is empty, we need to reset the control head. */
11609
0
    if (needs_control_head)
11610
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11611
11612
0
    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11613
0
      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11614
11615
0
    if (!zero)
11616
0
      {
11617
0
      if (framesize < 0)
11618
0
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11619
0
      else
11620
0
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11621
0
      }
11622
0
    }
11623
11624
0
  JUMPTO(SLJIT_JUMP, loop);
11625
0
  flush_stubs(common);
11626
11627
0
  compile_backtrackingpath(common, backtrack->top);
11628
0
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11629
0
    return NULL;
11630
0
  set_jumps(backtrack->own_backtracks, LABEL());
11631
11632
0
  if (framesize < 0)
11633
0
    {
11634
0
    if (offset != 0)
11635
0
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11636
0
    else
11637
0
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11638
0
    }
11639
0
  else
11640
0
    {
11641
0
    if (offset != 0)
11642
0
      {
11643
      /* Last alternative. */
11644
0
      if (*cc == OP_KETRPOS)
11645
0
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11646
0
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11647
0
      }
11648
0
    else
11649
0
      {
11650
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11651
0
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11652
0
      }
11653
0
    }
11654
11655
0
  if (*cc == OP_KETRPOS)
11656
0
    break;
11657
0
  ccbegin = cc + 1 + LINK_SIZE;
11658
0
  }
11659
11660
/* We don't have to restore the control head in case of a failed match. */
11661
11662
0
backtrack->own_backtracks = NULL;
11663
0
if (!zero)
11664
0
  {
11665
0
  if (framesize < 0)
11666
0
    add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11667
0
  else /* TMP2 is set to [private_data_ptr] above. */
11668
0
    add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11669
0
  }
11670
11671
/* None of them matched. */
11672
0
set_jumps(emptymatch, LABEL());
11673
0
count_match(common);
11674
0
return cc + 1 + LINK_SIZE;
11675
0
}
11676
11677
static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11678
0
{
11679
0
int class_len;
11680
11681
0
*opcode = *cc;
11682
0
*exact = 0;
11683
11684
0
if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11685
0
  {
11686
0
  cc++;
11687
0
  *type = OP_CHAR;
11688
0
  }
11689
0
else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11690
0
  {
11691
0
  cc++;
11692
0
  *type = OP_CHARI;
11693
0
  *opcode -= OP_STARI - OP_STAR;
11694
0
  }
11695
0
else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11696
0
  {
11697
0
  cc++;
11698
0
  *type = OP_NOT;
11699
0
  *opcode -= OP_NOTSTAR - OP_STAR;
11700
0
  }
11701
0
else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11702
0
  {
11703
0
  cc++;
11704
0
  *type = OP_NOTI;
11705
0
  *opcode -= OP_NOTSTARI - OP_STAR;
11706
0
  }
11707
0
else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11708
0
  {
11709
0
  cc++;
11710
0
  *opcode -= OP_TYPESTAR - OP_STAR;
11711
0
  *type = OP_END;
11712
0
  }
11713
0
else
11714
0
  {
11715
0
  SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11716
0
  *type = *opcode;
11717
0
  cc++;
11718
0
  class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11719
0
  *opcode = cc[class_len - 1];
11720
11721
0
  if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11722
0
    {
11723
0
    *opcode -= OP_CRSTAR - OP_STAR;
11724
0
    *end = cc + class_len;
11725
11726
0
    if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11727
0
      {
11728
0
      *exact = 1;
11729
0
      *opcode -= OP_PLUS - OP_STAR;
11730
0
      }
11731
0
    }
11732
0
  else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11733
0
    {
11734
0
    *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11735
0
    *end = cc + class_len;
11736
11737
0
    if (*opcode == OP_POSPLUS)
11738
0
      {
11739
0
      *exact = 1;
11740
0
      *opcode = OP_POSSTAR;
11741
0
      }
11742
0
    }
11743
0
  else
11744
0
    {
11745
0
    SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11746
0
    *max = GET2(cc, (class_len + IMM2_SIZE));
11747
0
    *exact = GET2(cc, class_len);
11748
11749
0
    if (*max == 0)
11750
0
      {
11751
0
      if (*opcode == OP_CRPOSRANGE)
11752
0
        *opcode = OP_POSSTAR;
11753
0
      else
11754
0
        *opcode -= OP_CRRANGE - OP_STAR;
11755
0
      }
11756
0
    else
11757
0
      {
11758
0
      *max -= *exact;
11759
0
      if (*max == 0)
11760
0
        *opcode = OP_EXACT;
11761
0
      else if (*max == 1)
11762
0
        {
11763
0
        if (*opcode == OP_CRPOSRANGE)
11764
0
          *opcode = OP_POSQUERY;
11765
0
        else
11766
0
          *opcode -= OP_CRRANGE - OP_QUERY;
11767
0
        }
11768
0
      else
11769
0
        {
11770
0
        if (*opcode == OP_CRPOSRANGE)
11771
0
          *opcode = OP_POSUPTO;
11772
0
        else
11773
0
          *opcode -= OP_CRRANGE - OP_UPTO;
11774
0
        }
11775
0
      }
11776
0
    *end = cc + class_len + 2 * IMM2_SIZE;
11777
0
    }
11778
0
  return cc;
11779
0
  }
11780
11781
0
switch(*opcode)
11782
0
  {
11783
0
  case OP_EXACT:
11784
0
  *exact = GET2(cc, 0);
11785
0
  cc += IMM2_SIZE;
11786
0
  break;
11787
11788
0
  case OP_PLUS:
11789
0
  case OP_MINPLUS:
11790
0
  *exact = 1;
11791
0
  *opcode -= OP_PLUS - OP_STAR;
11792
0
  break;
11793
11794
0
  case OP_POSPLUS:
11795
0
  *exact = 1;
11796
0
  *opcode = OP_POSSTAR;
11797
0
  break;
11798
11799
0
  case OP_UPTO:
11800
0
  case OP_MINUPTO:
11801
0
  case OP_POSUPTO:
11802
0
  *max = GET2(cc, 0);
11803
0
  cc += IMM2_SIZE;
11804
0
  break;
11805
0
  }
11806
11807
0
if (*type == OP_END)
11808
0
  {
11809
0
  *type = *cc;
11810
0
  *end = next_opcode(common, cc);
11811
0
  cc++;
11812
0
  return cc;
11813
0
  }
11814
11815
0
*end = cc + 1;
11816
0
#ifdef SUPPORT_UNICODE
11817
0
if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11818
0
#endif
11819
0
return cc;
11820
0
}
11821
11822
static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11823
0
{
11824
0
DEFINE_COMPILER;
11825
0
backtrack_common *backtrack;
11826
0
PCRE2_UCHAR opcode;
11827
0
PCRE2_UCHAR type;
11828
0
sljit_u32 max = 0, exact;
11829
0
sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11830
0
sljit_s32 early_fail_type;
11831
0
BOOL charpos_enabled;
11832
0
PCRE2_UCHAR charpos_char;
11833
0
unsigned int charpos_othercasebit;
11834
0
PCRE2_SPTR end;
11835
0
jump_list *no_match = NULL;
11836
0
jump_list *no_char1_match = NULL;
11837
0
struct sljit_jump *jump = NULL;
11838
0
struct sljit_label *label;
11839
0
int private_data_ptr = PRIVATE_DATA(cc);
11840
0
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11841
0
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11842
0
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
11843
0
int tmp_base, tmp_offset;
11844
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11845
0
BOOL use_tmp;
11846
0
#endif
11847
11848
0
PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11849
11850
0
early_fail_type = (early_fail_ptr & 0x7);
11851
0
early_fail_ptr >>= 3;
11852
11853
/* During recursion, these optimizations are disabled. */
11854
0
if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11855
0
  {
11856
0
  early_fail_ptr = 0;
11857
0
  early_fail_type = type_skip;
11858
0
  }
11859
11860
0
SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11861
0
  || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11862
11863
0
if (early_fail_type == type_fail)
11864
0
  add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11865
11866
0
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11867
11868
0
if (type != OP_EXTUNI)
11869
0
  {
11870
0
  tmp_base = TMP3;
11871
0
  tmp_offset = 0;
11872
0
  }
11873
0
else
11874
0
  {
11875
0
  tmp_base = SLJIT_MEM1(SLJIT_SP);
11876
0
  tmp_offset = POSSESSIVE0;
11877
0
  }
11878
11879
/* Handle fixed part first. */
11880
0
if (exact > 1)
11881
0
  {
11882
0
  SLJIT_ASSERT(early_fail_ptr == 0);
11883
11884
0
  if (common->mode == PCRE2_JIT_COMPLETE
11885
0
#ifdef SUPPORT_UNICODE
11886
0
      && !common->utf
11887
0
#endif
11888
0
      && type != OP_ANYNL && type != OP_EXTUNI)
11889
0
    {
11890
0
    OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11891
0
    add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11892
0
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11893
0
    label = LABEL();
11894
0
    compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
11895
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11896
0
    JUMPTO(SLJIT_NOT_ZERO, label);
11897
0
    }
11898
0
  else
11899
0
    {
11900
0
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11901
0
    label = LABEL();
11902
0
    compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11903
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11904
0
    JUMPTO(SLJIT_NOT_ZERO, label);
11905
0
    }
11906
0
  }
11907
0
else if (exact == 1)
11908
0
  compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
11909
11910
0
if (early_fail_type == type_fail_range)
11911
0
  {
11912
  /* Range end first, followed by range start. */
11913
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11914
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
11915
0
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11916
0
  OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11917
0
  add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11918
11919
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11920
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
11921
0
  }
11922
11923
0
switch(opcode)
11924
0
  {
11925
0
  case OP_STAR:
11926
0
  case OP_UPTO:
11927
0
  SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11928
11929
0
  if (type == OP_ANYNL || type == OP_EXTUNI)
11930
0
    {
11931
0
    SLJIT_ASSERT(private_data_ptr == 0);
11932
0
    SLJIT_ASSERT(early_fail_ptr == 0);
11933
11934
0
    allocate_stack(common, 2);
11935
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11936
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11937
11938
0
    if (opcode == OP_UPTO)
11939
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11940
11941
0
    label = LABEL();
11942
0
    compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11943
0
    if (opcode == OP_UPTO)
11944
0
      {
11945
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11946
0
      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11947
0
      jump = JUMP(SLJIT_ZERO);
11948
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11949
0
      }
11950
11951
    /* We cannot use TMP3 because of allocate_stack. */
11952
0
    allocate_stack(common, 1);
11953
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11954
0
    JUMPTO(SLJIT_JUMP, label);
11955
0
    if (jump != NULL)
11956
0
      JUMPHERE(jump);
11957
0
    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11958
0
    break;
11959
0
    }
11960
0
#ifdef SUPPORT_UNICODE
11961
0
  else if (type == OP_ALLANY && !common->invalid_utf)
11962
#else
11963
  else if (type == OP_ALLANY)
11964
#endif
11965
0
    {
11966
0
    if (opcode == OP_STAR)
11967
0
      {
11968
0
      if (private_data_ptr == 0)
11969
0
        allocate_stack(common, 2);
11970
11971
0
      OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11972
0
      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11973
11974
0
      OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11975
0
      process_partial_match(common);
11976
11977
0
      if (early_fail_ptr != 0)
11978
0
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11979
0
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11980
0
      break;
11981
0
      }
11982
0
#ifdef SUPPORT_UNICODE
11983
0
    else if (!common->utf)
11984
#else
11985
    else
11986
#endif
11987
0
      {
11988
0
      if (private_data_ptr == 0)
11989
0
        allocate_stack(common, 2);
11990
11991
0
      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11992
0
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11993
11994
0
      if (common->mode == PCRE2_JIT_COMPLETE)
11995
0
        {
11996
0
        OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11997
0
        SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
11998
0
        }
11999
0
      else
12000
0
        {
12001
0
        jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
12002
0
        process_partial_match(common);
12003
0
        JUMPHERE(jump);
12004
0
        }
12005
12006
0
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12007
12008
0
      if (early_fail_ptr != 0)
12009
0
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12010
0
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12011
0
      break;
12012
0
      }
12013
0
    }
12014
12015
0
  charpos_enabled = FALSE;
12016
0
  charpos_char = 0;
12017
0
  charpos_othercasebit = 0;
12018
12019
0
  if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
12020
0
    {
12021
0
#ifdef SUPPORT_UNICODE
12022
0
    charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
12023
#else
12024
    charpos_enabled = TRUE;
12025
#endif
12026
0
    if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
12027
0
      {
12028
0
      charpos_othercasebit = char_get_othercase_bit(common, end + 1);
12029
0
      if (charpos_othercasebit == 0)
12030
0
        charpos_enabled = FALSE;
12031
0
      }
12032
12033
0
    if (charpos_enabled)
12034
0
      {
12035
0
      charpos_char = end[1];
12036
      /* Consume the OP_CHAR opcode. */
12037
0
      end += 2;
12038
0
#if PCRE2_CODE_UNIT_WIDTH == 8
12039
0
      SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
12040
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12041
      SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
12042
      if ((charpos_othercasebit & 0x100) != 0)
12043
        charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
12044
#endif
12045
0
      if (charpos_othercasebit != 0)
12046
0
        charpos_char |= charpos_othercasebit;
12047
12048
0
      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
12049
0
      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
12050
0
      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
12051
0
      }
12052
0
    }
12053
12054
0
  if (charpos_enabled)
12055
0
    {
12056
0
    if (opcode == OP_UPTO)
12057
0
      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
12058
12059
    /* Search the first instance of charpos_char. */
12060
0
    jump = JUMP(SLJIT_JUMP);
12061
0
    label = LABEL();
12062
0
    if (opcode == OP_UPTO)
12063
0
      {
12064
0
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12065
0
      add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_ZERO));
12066
0
      }
12067
0
    compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
12068
0
    if (early_fail_ptr != 0)
12069
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12070
0
    JUMPHERE(jump);
12071
12072
0
    detect_partial_match(common, &backtrack->own_backtracks);
12073
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12074
0
    if (charpos_othercasebit != 0)
12075
0
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12076
0
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12077
12078
0
    if (private_data_ptr == 0)
12079
0
      allocate_stack(common, 2);
12080
0
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12081
0
    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12082
12083
0
    if (opcode == OP_UPTO)
12084
0
      {
12085
0
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12086
0
      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12087
0
      }
12088
12089
    /* Search the last instance of charpos_char. */
12090
0
    label = LABEL();
12091
0
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12092
0
    if (early_fail_ptr != 0)
12093
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12094
0
    detect_partial_match(common, &no_match);
12095
0
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
12096
0
    if (charpos_othercasebit != 0)
12097
0
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
12098
12099
0
    if (opcode == OP_STAR)
12100
0
      {
12101
0
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
12102
0
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12103
0
      JUMPTO(SLJIT_JUMP, label);
12104
0
      }
12105
0
    else
12106
0
      {
12107
0
      jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
12108
0
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12109
0
      JUMPHERE(jump);
12110
0
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12111
0
      JUMPTO(SLJIT_NOT_ZERO, label);
12112
0
      }
12113
12114
0
    set_jumps(no_match, LABEL());
12115
0
    OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
12116
0
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12117
0
    }
12118
0
  else
12119
0
    {
12120
0
    if (private_data_ptr == 0)
12121
0
      allocate_stack(common, 2);
12122
12123
0
    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
12124
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12125
0
    use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
12126
0
    SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
12127
12128
0
    if (common->utf)
12129
0
      OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12130
0
#endif
12131
0
    if (opcode == OP_UPTO)
12132
0
      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12133
12134
0
    detect_partial_match(common, &no_match);
12135
0
    label = LABEL();
12136
0
    compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12137
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12138
0
    if (common->utf)
12139
0
      OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
12140
0
#endif
12141
12142
0
    if (opcode == OP_UPTO)
12143
0
      {
12144
0
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12145
0
      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12146
0
      }
12147
12148
0
    detect_partial_match_to(common, label);
12149
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12150
12151
0
    set_jumps(no_char1_match, LABEL());
12152
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12153
0
    if (common->utf)
12154
0
      {
12155
0
      set_jumps(no_match, LABEL());
12156
0
      if (use_tmp)
12157
0
        {
12158
0
        OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
12159
0
        OP1(SLJIT_MOV, base, offset0, TMP3, 0);
12160
0
        }
12161
0
      else
12162
0
        OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12163
0
      }
12164
0
    else
12165
0
#endif
12166
0
      {
12167
0
      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12168
0
      set_jumps(no_match, LABEL());
12169
0
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12170
0
      }
12171
12172
0
    if (early_fail_ptr != 0)
12173
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12174
0
    }
12175
12176
0
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12177
0
  break;
12178
12179
0
  case OP_MINSTAR:
12180
0
  if (private_data_ptr == 0)
12181
0
    allocate_stack(common, 1);
12182
0
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12183
0
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12184
0
  if (early_fail_ptr != 0)
12185
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12186
0
  break;
12187
12188
0
  case OP_MINUPTO:
12189
0
  SLJIT_ASSERT(early_fail_ptr == 0);
12190
0
  if (private_data_ptr == 0)
12191
0
    allocate_stack(common, 2);
12192
0
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12193
0
  OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
12194
0
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12195
0
  break;
12196
12197
0
  case OP_QUERY:
12198
0
  case OP_MINQUERY:
12199
0
  SLJIT_ASSERT(early_fail_ptr == 0);
12200
0
  if (private_data_ptr == 0)
12201
0
    allocate_stack(common, 1);
12202
0
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12203
0
  if (opcode == OP_QUERY)
12204
0
    compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
12205
0
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
12206
0
  break;
12207
12208
0
  case OP_EXACT:
12209
0
  break;
12210
12211
0
  case OP_POSSTAR:
12212
0
#if defined SUPPORT_UNICODE
12213
0
  if (type == OP_ALLANY && !common->invalid_utf)
12214
#else
12215
  if (type == OP_ALLANY)
12216
#endif
12217
0
    {
12218
0
    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
12219
0
    process_partial_match(common);
12220
0
    if (early_fail_ptr != 0)
12221
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
12222
0
    break;
12223
0
    }
12224
12225
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12226
0
  if (type == OP_EXTUNI || common->utf)
12227
0
    {
12228
0
    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12229
0
    detect_partial_match(common, &no_match);
12230
0
    label = LABEL();
12231
0
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12232
0
    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12233
0
    detect_partial_match_to(common, label);
12234
12235
0
    set_jumps(no_match, LABEL());
12236
0
    OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12237
0
    if (early_fail_ptr != 0)
12238
0
      {
12239
0
      if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
12240
0
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
12241
0
      else
12242
0
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12243
0
      }
12244
0
    break;
12245
0
    }
12246
0
#endif
12247
12248
0
  detect_partial_match(common, &no_match);
12249
0
  label = LABEL();
12250
0
  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12251
0
  detect_partial_match_to(common, label);
12252
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12253
12254
0
  set_jumps(no_char1_match, LABEL());
12255
0
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12256
0
  set_jumps(no_match, LABEL());
12257
0
  if (early_fail_ptr != 0)
12258
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
12259
0
  break;
12260
12261
0
  case OP_POSUPTO:
12262
0
  SLJIT_ASSERT(early_fail_ptr == 0);
12263
0
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12264
0
  if (common->utf)
12265
0
    {
12266
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12267
0
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12268
12269
0
    detect_partial_match(common, &no_match);
12270
0
    label = LABEL();
12271
0
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
12272
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
12273
0
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12274
0
    add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12275
0
    detect_partial_match_to(common, label);
12276
12277
0
    set_jumps(no_match, LABEL());
12278
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
12279
0
    break;
12280
0
    }
12281
0
#endif
12282
12283
0
  if (type == OP_ALLANY)
12284
0
    {
12285
0
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
12286
12287
0
    if (common->mode == PCRE2_JIT_COMPLETE)
12288
0
      {
12289
0
      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
12290
0
      SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
12291
0
      }
12292
0
    else
12293
0
      {
12294
0
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
12295
0
      process_partial_match(common);
12296
0
      JUMPHERE(jump);
12297
0
      }
12298
0
    break;
12299
0
    }
12300
12301
0
  OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
12302
12303
0
  detect_partial_match(common, &no_match);
12304
0
  label = LABEL();
12305
0
  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
12306
0
  OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
12307
0
  add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
12308
0
  detect_partial_match_to(common, label);
12309
0
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12310
12311
0
  set_jumps(no_char1_match, LABEL());
12312
0
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12313
0
  set_jumps(no_match, LABEL());
12314
0
  break;
12315
12316
0
  case OP_POSQUERY:
12317
0
  SLJIT_ASSERT(early_fail_ptr == 0);
12318
0
  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12319
0
  compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
12320
0
  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
12321
0
  set_jumps(no_match, LABEL());
12322
0
  OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
12323
0
  break;
12324
12325
0
  default:
12326
0
  SLJIT_UNREACHABLE();
12327
0
  break;
12328
0
  }
12329
12330
0
count_match(common);
12331
0
return end;
12332
0
}
12333
12334
static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12335
0
{
12336
0
DEFINE_COMPILER;
12337
0
backtrack_common *backtrack;
12338
12339
0
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12340
12341
0
if (*cc == OP_FAIL)
12342
0
  {
12343
0
  add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12344
0
  return cc + 1;
12345
0
  }
12346
12347
0
if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
12348
0
  add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
12349
12350
0
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
12351
0
  {
12352
  /* No need to check notempty conditions. */
12353
0
  if (common->accept_label == NULL)
12354
0
    add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
12355
0
  else
12356
0
    JUMPTO(SLJIT_JUMP, common->accept_label);
12357
0
  return cc + 1;
12358
0
  }
12359
12360
0
if (common->accept_label == NULL)
12361
0
  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
12362
0
else
12363
0
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
12364
12365
0
if (HAS_VIRTUAL_REGISTERS)
12366
0
  {
12367
0
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12368
0
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
12369
0
  }
12370
0
else
12371
0
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
12372
12373
0
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
12374
0
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));
12375
0
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
12376
0
if (common->accept_label == NULL)
12377
0
  add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
12378
0
else
12379
0
  JUMPTO(SLJIT_ZERO, common->accept_label);
12380
12381
0
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
12382
0
if (common->accept_label == NULL)
12383
0
  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
12384
0
else
12385
0
  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
12386
0
add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
12387
0
return cc + 1;
12388
0
}
12389
12390
static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
12391
0
{
12392
0
DEFINE_COMPILER;
12393
0
int offset = GET2(cc, 1);
12394
0
BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
12395
12396
/* Data will be discarded anyway... */
12397
0
if (common->currententry != NULL)
12398
0
  return cc + 1 + IMM2_SIZE;
12399
12400
0
if (!optimized_cbracket)
12401
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
12402
0
offset <<= 1;
12403
0
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12404
0
if (!optimized_cbracket)
12405
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12406
0
return cc + 1 + IMM2_SIZE;
12407
0
}
12408
12409
static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12410
0
{
12411
0
DEFINE_COMPILER;
12412
0
backtrack_common *backtrack;
12413
0
PCRE2_UCHAR opcode = *cc;
12414
0
PCRE2_SPTR ccend = cc + 1;
12415
12416
0
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
12417
0
    opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
12418
0
  ccend += 2 + cc[1];
12419
12420
0
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12421
12422
0
if (opcode == OP_SKIP)
12423
0
  {
12424
0
  allocate_stack(common, 1);
12425
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12426
0
  return ccend;
12427
0
  }
12428
12429
0
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
12430
0
  {
12431
0
  if (HAS_VIRTUAL_REGISTERS)
12432
0
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12433
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12434
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12435
0
  OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12436
0
  }
12437
12438
0
return ccend;
12439
0
}
12440
12441
static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
12442
12443
static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12444
0
{
12445
0
DEFINE_COMPILER;
12446
0
backtrack_common *backtrack;
12447
0
BOOL needs_control_head;
12448
0
int size;
12449
12450
0
PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12451
0
common->then_trap = BACKTRACK_AS(then_trap_backtrack);
12452
0
BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12453
0
BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
12454
0
BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
12455
12456
0
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12457
0
size = 3 + (size < 0 ? 0 : size);
12458
12459
0
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12460
0
allocate_stack(common, size);
12461
0
if (size > 3)
12462
0
  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
12463
0
else
12464
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12465
0
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
12466
0
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
12467
0
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
12468
12469
0
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12470
0
if (size >= 0)
12471
0
  init_frame(common, cc, ccend, size - 1, 0);
12472
0
}
12473
12474
static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12475
0
{
12476
0
DEFINE_COMPILER;
12477
0
backtrack_common *backtrack;
12478
0
BOOL has_then_trap = FALSE;
12479
0
then_trap_backtrack *save_then_trap = NULL;
12480
12481
0
SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
12482
12483
0
if (common->has_then && common->then_offsets[cc - common->start] != 0)
12484
0
  {
12485
0
  SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
12486
0
  has_then_trap = TRUE;
12487
0
  save_then_trap = common->then_trap;
12488
  /* Tail item on backtrack. */
12489
0
  compile_then_trap_matchingpath(common, cc, ccend, parent);
12490
0
  }
12491
12492
0
while (cc < ccend)
12493
0
  {
12494
0
  switch(*cc)
12495
0
    {
12496
0
    case OP_SOD:
12497
0
    case OP_SOM:
12498
0
    case OP_NOT_WORD_BOUNDARY:
12499
0
    case OP_WORD_BOUNDARY:
12500
0
    case OP_EODN:
12501
0
    case OP_EOD:
12502
0
    case OP_DOLL:
12503
0
    case OP_DOLLM:
12504
0
    case OP_CIRC:
12505
0
    case OP_CIRCM:
12506
0
    case OP_NOT_UCP_WORD_BOUNDARY:
12507
0
    case OP_UCP_WORD_BOUNDARY:
12508
0
    cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12509
0
    break;
12510
12511
0
    case OP_NOT_DIGIT:
12512
0
    case OP_DIGIT:
12513
0
    case OP_NOT_WHITESPACE:
12514
0
    case OP_WHITESPACE:
12515
0
    case OP_NOT_WORDCHAR:
12516
0
    case OP_WORDCHAR:
12517
0
    case OP_ANY:
12518
0
    case OP_ALLANY:
12519
0
    case OP_ANYBYTE:
12520
0
    case OP_NOTPROP:
12521
0
    case OP_PROP:
12522
0
    case OP_ANYNL:
12523
0
    case OP_NOT_HSPACE:
12524
0
    case OP_HSPACE:
12525
0
    case OP_NOT_VSPACE:
12526
0
    case OP_VSPACE:
12527
0
    case OP_EXTUNI:
12528
0
    case OP_NOT:
12529
0
    case OP_NOTI:
12530
0
    cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12531
0
    break;
12532
12533
0
    case OP_SET_SOM:
12534
0
    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12535
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12536
0
    allocate_stack(common, 1);
12537
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12538
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12539
0
    cc++;
12540
0
    break;
12541
12542
0
    case OP_CHAR:
12543
0
    case OP_CHARI:
12544
0
    if (common->mode == PCRE2_JIT_COMPLETE)
12545
0
      cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12546
0
    else
12547
0
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12548
0
    break;
12549
12550
0
    case OP_STAR:
12551
0
    case OP_MINSTAR:
12552
0
    case OP_PLUS:
12553
0
    case OP_MINPLUS:
12554
0
    case OP_QUERY:
12555
0
    case OP_MINQUERY:
12556
0
    case OP_UPTO:
12557
0
    case OP_MINUPTO:
12558
0
    case OP_EXACT:
12559
0
    case OP_POSSTAR:
12560
0
    case OP_POSPLUS:
12561
0
    case OP_POSQUERY:
12562
0
    case OP_POSUPTO:
12563
0
    case OP_STARI:
12564
0
    case OP_MINSTARI:
12565
0
    case OP_PLUSI:
12566
0
    case OP_MINPLUSI:
12567
0
    case OP_QUERYI:
12568
0
    case OP_MINQUERYI:
12569
0
    case OP_UPTOI:
12570
0
    case OP_MINUPTOI:
12571
0
    case OP_EXACTI:
12572
0
    case OP_POSSTARI:
12573
0
    case OP_POSPLUSI:
12574
0
    case OP_POSQUERYI:
12575
0
    case OP_POSUPTOI:
12576
0
    case OP_NOTSTAR:
12577
0
    case OP_NOTMINSTAR:
12578
0
    case OP_NOTPLUS:
12579
0
    case OP_NOTMINPLUS:
12580
0
    case OP_NOTQUERY:
12581
0
    case OP_NOTMINQUERY:
12582
0
    case OP_NOTUPTO:
12583
0
    case OP_NOTMINUPTO:
12584
0
    case OP_NOTEXACT:
12585
0
    case OP_NOTPOSSTAR:
12586
0
    case OP_NOTPOSPLUS:
12587
0
    case OP_NOTPOSQUERY:
12588
0
    case OP_NOTPOSUPTO:
12589
0
    case OP_NOTSTARI:
12590
0
    case OP_NOTMINSTARI:
12591
0
    case OP_NOTPLUSI:
12592
0
    case OP_NOTMINPLUSI:
12593
0
    case OP_NOTQUERYI:
12594
0
    case OP_NOTMINQUERYI:
12595
0
    case OP_NOTUPTOI:
12596
0
    case OP_NOTMINUPTOI:
12597
0
    case OP_NOTEXACTI:
12598
0
    case OP_NOTPOSSTARI:
12599
0
    case OP_NOTPOSPLUSI:
12600
0
    case OP_NOTPOSQUERYI:
12601
0
    case OP_NOTPOSUPTOI:
12602
0
    case OP_TYPESTAR:
12603
0
    case OP_TYPEMINSTAR:
12604
0
    case OP_TYPEPLUS:
12605
0
    case OP_TYPEMINPLUS:
12606
0
    case OP_TYPEQUERY:
12607
0
    case OP_TYPEMINQUERY:
12608
0
    case OP_TYPEUPTO:
12609
0
    case OP_TYPEMINUPTO:
12610
0
    case OP_TYPEEXACT:
12611
0
    case OP_TYPEPOSSTAR:
12612
0
    case OP_TYPEPOSPLUS:
12613
0
    case OP_TYPEPOSQUERY:
12614
0
    case OP_TYPEPOSUPTO:
12615
0
    cc = compile_iterator_matchingpath(common, cc, parent);
12616
0
    break;
12617
12618
0
    case OP_CLASS:
12619
0
    case OP_NCLASS:
12620
0
    if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12621
0
      cc = compile_iterator_matchingpath(common, cc, parent);
12622
0
    else
12623
0
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12624
0
    break;
12625
12626
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12627
0
    case OP_XCLASS:
12628
0
    if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12629
0
      cc = compile_iterator_matchingpath(common, cc, parent);
12630
0
    else
12631
0
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
12632
0
    break;
12633
0
#endif
12634
12635
0
    case OP_REF:
12636
0
    case OP_REFI:
12637
0
    if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12638
0
      cc = compile_ref_iterator_matchingpath(common, cc, parent);
12639
0
    else
12640
0
      {
12641
0
      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12642
0
      cc += 1 + IMM2_SIZE;
12643
0
      }
12644
0
    break;
12645
12646
0
    case OP_DNREF:
12647
0
    case OP_DNREFI:
12648
0
    if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12649
0
      cc = compile_ref_iterator_matchingpath(common, cc, parent);
12650
0
    else
12651
0
      {
12652
0
      compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
12653
0
      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
12654
0
      cc += 1 + 2 * IMM2_SIZE;
12655
0
      }
12656
0
    break;
12657
12658
0
    case OP_RECURSE:
12659
0
    cc = compile_recurse_matchingpath(common, cc, parent);
12660
0
    break;
12661
12662
0
    case OP_CALLOUT:
12663
0
    case OP_CALLOUT_STR:
12664
0
    cc = compile_callout_matchingpath(common, cc, parent);
12665
0
    break;
12666
12667
0
    case OP_ASSERT:
12668
0
    case OP_ASSERT_NOT:
12669
0
    case OP_ASSERTBACK:
12670
0
    case OP_ASSERTBACK_NOT:
12671
0
    PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12672
0
    cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12673
0
    break;
12674
12675
0
    case OP_BRAMINZERO:
12676
0
    PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12677
0
    cc = bracketend(cc + 1);
12678
0
    if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12679
0
      {
12680
0
      allocate_stack(common, 1);
12681
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12682
0
      }
12683
0
    else
12684
0
      {
12685
0
      allocate_stack(common, 2);
12686
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12687
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12688
0
      }
12689
0
    BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12690
0
    count_match(common);
12691
0
    break;
12692
12693
0
    case OP_ASSERT_NA:
12694
0
    case OP_ASSERTBACK_NA:
12695
0
    case OP_ONCE:
12696
0
    case OP_SCRIPT_RUN:
12697
0
    case OP_BRA:
12698
0
    case OP_CBRA:
12699
0
    case OP_COND:
12700
0
    case OP_SBRA:
12701
0
    case OP_SCBRA:
12702
0
    case OP_SCOND:
12703
0
    cc = compile_bracket_matchingpath(common, cc, parent);
12704
0
    break;
12705
12706
0
    case OP_BRAZERO:
12707
0
    if (cc[1] > OP_ASSERTBACK_NOT)
12708
0
      cc = compile_bracket_matchingpath(common, cc, parent);
12709
0
    else
12710
0
      {
12711
0
      PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12712
0
      cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12713
0
      }
12714
0
    break;
12715
12716
0
    case OP_BRAPOS:
12717
0
    case OP_CBRAPOS:
12718
0
    case OP_SBRAPOS:
12719
0
    case OP_SCBRAPOS:
12720
0
    case OP_BRAPOSZERO:
12721
0
    cc = compile_bracketpos_matchingpath(common, cc, parent);
12722
0
    break;
12723
12724
0
    case OP_MARK:
12725
0
    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12726
0
    SLJIT_ASSERT(common->mark_ptr != 0);
12727
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12728
0
    allocate_stack(common, common->has_skip_arg ? 5 : 1);
12729
0
    if (HAS_VIRTUAL_REGISTERS)
12730
0
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12731
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12732
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12733
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12734
0
    OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12735
0
    if (common->has_skip_arg)
12736
0
      {
12737
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12738
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12739
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12740
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12741
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12742
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12743
0
      }
12744
0
    cc += 1 + 2 + cc[1];
12745
0
    break;
12746
12747
0
    case OP_PRUNE:
12748
0
    case OP_PRUNE_ARG:
12749
0
    case OP_SKIP:
12750
0
    case OP_SKIP_ARG:
12751
0
    case OP_THEN:
12752
0
    case OP_THEN_ARG:
12753
0
    case OP_COMMIT:
12754
0
    case OP_COMMIT_ARG:
12755
0
    cc = compile_control_verb_matchingpath(common, cc, parent);
12756
0
    break;
12757
12758
0
    case OP_FAIL:
12759
0
    case OP_ACCEPT:
12760
0
    case OP_ASSERT_ACCEPT:
12761
0
    cc = compile_fail_accept_matchingpath(common, cc, parent);
12762
0
    break;
12763
12764
0
    case OP_CLOSE:
12765
0
    cc = compile_close_matchingpath(common, cc);
12766
0
    break;
12767
12768
0
    case OP_SKIPZERO:
12769
0
    cc = bracketend(cc + 1);
12770
0
    break;
12771
12772
0
    default:
12773
0
    SLJIT_UNREACHABLE();
12774
0
    return;
12775
0
    }
12776
0
  if (cc == NULL)
12777
0
    return;
12778
0
  }
12779
12780
0
if (has_then_trap)
12781
0
  {
12782
  /* Head item on backtrack. */
12783
0
  PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12784
0
  BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12785
0
  BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12786
0
  common->then_trap = save_then_trap;
12787
0
  }
12788
0
SLJIT_ASSERT(cc == ccend);
12789
0
}
12790
12791
#undef PUSH_BACKTRACK
12792
#undef PUSH_BACKTRACK_NOVALUE
12793
#undef BACKTRACK_AS
12794
12795
#define COMPILE_BACKTRACKINGPATH(current) \
12796
0
  do \
12797
0
    { \
12798
0
    compile_backtrackingpath(common, (current)); \
12799
0
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12800
0
      return; \
12801
0
    } \
12802
0
  while (0)
12803
12804
0
#define CURRENT_AS(type) ((type *)current)
12805
12806
static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12807
0
{
12808
0
DEFINE_COMPILER;
12809
0
PCRE2_SPTR cc = current->cc;
12810
0
PCRE2_UCHAR opcode;
12811
0
PCRE2_UCHAR type;
12812
0
sljit_u32 max = 0, exact;
12813
0
struct sljit_label *label = NULL;
12814
0
struct sljit_jump *jump = NULL;
12815
0
jump_list *jumplist = NULL;
12816
0
PCRE2_SPTR end;
12817
0
int private_data_ptr = PRIVATE_DATA(cc);
12818
0
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12819
0
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12820
0
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
12821
12822
0
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12823
12824
0
switch(opcode)
12825
0
  {
12826
0
  case OP_STAR:
12827
0
  case OP_UPTO:
12828
0
  if (type == OP_ANYNL || type == OP_EXTUNI)
12829
0
    {
12830
0
    SLJIT_ASSERT(private_data_ptr == 0);
12831
0
    set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12832
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12833
0
    free_stack(common, 1);
12834
0
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12835
0
    }
12836
0
  else
12837
0
    {
12838
0
    if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12839
0
      {
12840
0
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12841
0
      OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12842
0
      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12843
12844
0
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12845
0
      label = LABEL();
12846
0
      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12847
0
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12848
0
      if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12849
0
        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12850
0
      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12851
0
      move_back(common, NULL, TRUE);
12852
0
      CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12853
0
      }
12854
0
    else
12855
0
      {
12856
0
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12857
0
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12858
0
      move_back(common, NULL, TRUE);
12859
0
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12860
0
      JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12861
0
      }
12862
0
    JUMPHERE(jump);
12863
0
    if (private_data_ptr == 0)
12864
0
      free_stack(common, 2);
12865
0
    }
12866
0
  break;
12867
12868
0
  case OP_MINSTAR:
12869
0
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12870
0
  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12871
0
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12872
0
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12873
0
  set_jumps(jumplist, LABEL());
12874
0
  if (private_data_ptr == 0)
12875
0
    free_stack(common, 1);
12876
0
  break;
12877
12878
0
  case OP_MINUPTO:
12879
0
  OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12880
0
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12881
0
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12882
0
  add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12883
12884
0
  OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12885
0
  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12886
0
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12887
0
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12888
12889
0
  set_jumps(jumplist, LABEL());
12890
0
  if (private_data_ptr == 0)
12891
0
    free_stack(common, 2);
12892
0
  break;
12893
12894
0
  case OP_QUERY:
12895
0
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12896
0
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12897
0
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12898
0
  jump = JUMP(SLJIT_JUMP);
12899
0
  set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12900
0
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12901
0
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12902
0
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12903
0
  JUMPHERE(jump);
12904
0
  if (private_data_ptr == 0)
12905
0
    free_stack(common, 1);
12906
0
  break;
12907
12908
0
  case OP_MINQUERY:
12909
0
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12910
0
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12911
0
  jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12912
0
  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12913
0
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12914
0
  set_jumps(jumplist, LABEL());
12915
0
  JUMPHERE(jump);
12916
0
  if (private_data_ptr == 0)
12917
0
    free_stack(common, 1);
12918
0
  break;
12919
12920
0
  case OP_EXACT:
12921
0
  case OP_POSSTAR:
12922
0
  case OP_POSQUERY:
12923
0
  case OP_POSUPTO:
12924
0
  break;
12925
12926
0
  default:
12927
0
  SLJIT_UNREACHABLE();
12928
0
  break;
12929
0
  }
12930
12931
0
set_jumps(current->own_backtracks, LABEL());
12932
0
}
12933
12934
static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12935
0
{
12936
0
DEFINE_COMPILER;
12937
0
PCRE2_SPTR cc = current->cc;
12938
0
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12939
0
PCRE2_UCHAR type;
12940
12941
0
type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12942
12943
0
if ((type & 0x1) == 0)
12944
0
  {
12945
  /* Maximize case. */
12946
0
  set_jumps(current->own_backtracks, LABEL());
12947
0
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12948
0
  free_stack(common, 1);
12949
0
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12950
0
  return;
12951
0
  }
12952
12953
0
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12954
0
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12955
0
set_jumps(current->own_backtracks, LABEL());
12956
0
free_stack(common, ref ? 2 : 3);
12957
0
}
12958
12959
static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12960
0
{
12961
0
DEFINE_COMPILER;
12962
0
recurse_entry *entry;
12963
12964
0
if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12965
0
  {
12966
0
  entry = CURRENT_AS(recurse_backtrack)->entry;
12967
0
  if (entry->backtrack_label == NULL)
12968
0
    add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12969
0
  else
12970
0
    JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12971
0
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12972
0
  }
12973
0
else
12974
0
  compile_backtrackingpath(common, current->top);
12975
12976
0
set_jumps(current->own_backtracks, LABEL());
12977
0
}
12978
12979
static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12980
0
{
12981
0
DEFINE_COMPILER;
12982
0
PCRE2_SPTR cc = current->cc;
12983
0
PCRE2_UCHAR bra = OP_BRA;
12984
0
struct sljit_jump *brajump = NULL;
12985
12986
0
SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12987
0
if (*cc == OP_BRAZERO)
12988
0
  {
12989
0
  bra = *cc;
12990
0
  cc++;
12991
0
  }
12992
12993
0
if (bra == OP_BRAZERO)
12994
0
  {
12995
0
  SLJIT_ASSERT(current->own_backtracks == NULL);
12996
0
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12997
0
  }
12998
12999
0
if (CURRENT_AS(assert_backtrack)->framesize < 0)
13000
0
  {
13001
0
  set_jumps(current->own_backtracks, LABEL());
13002
13003
0
  if (bra == OP_BRAZERO)
13004
0
    {
13005
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13006
0
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
13007
0
    free_stack(common, 1);
13008
0
    }
13009
0
  return;
13010
0
  }
13011
13012
0
if (bra == OP_BRAZERO)
13013
0
  {
13014
0
  if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
13015
0
    {
13016
0
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13017
0
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
13018
0
    free_stack(common, 1);
13019
0
    return;
13020
0
    }
13021
0
  free_stack(common, 1);
13022
0
  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
13023
0
  }
13024
13025
0
if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
13026
0
  {
13027
0
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
13028
0
  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13029
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13030
0
  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
13031
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
13032
13033
0
  set_jumps(current->own_backtracks, LABEL());
13034
0
  }
13035
0
else
13036
0
  set_jumps(current->own_backtracks, LABEL());
13037
13038
0
if (bra == OP_BRAZERO)
13039
0
  {
13040
  /* We know there is enough place on the stack. */
13041
0
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
13042
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
13043
0
  JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
13044
0
  JUMPHERE(brajump);
13045
0
  }
13046
0
}
13047
13048
static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13049
0
{
13050
0
DEFINE_COMPILER;
13051
0
int opcode, stacksize, alt_count, alt_max;
13052
0
int offset = 0;
13053
0
int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
13054
0
int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
13055
0
PCRE2_SPTR cc = current->cc;
13056
0
PCRE2_SPTR ccbegin;
13057
0
PCRE2_SPTR ccprev;
13058
0
PCRE2_UCHAR bra = OP_BRA;
13059
0
PCRE2_UCHAR ket;
13060
0
assert_backtrack *assert;
13061
0
BOOL has_alternatives;
13062
0
BOOL needs_control_head = FALSE;
13063
0
BOOL has_vreverse;
13064
0
struct sljit_jump *brazero = NULL;
13065
0
struct sljit_jump *next_alt = NULL;
13066
0
struct sljit_jump *once = NULL;
13067
0
struct sljit_jump *cond = NULL;
13068
0
struct sljit_label *rmin_label = NULL;
13069
0
struct sljit_label *exact_label = NULL;
13070
0
struct sljit_jump *mov_addr = NULL;
13071
13072
0
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
13073
0
  {
13074
0
  bra = *cc;
13075
0
  cc++;
13076
0
  }
13077
13078
0
opcode = *cc;
13079
0
ccbegin = bracketend(cc) - 1 - LINK_SIZE;
13080
0
ket = *ccbegin;
13081
0
if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
13082
0
  {
13083
0
  repeat_ptr = PRIVATE_DATA(ccbegin);
13084
0
  repeat_type = PRIVATE_DATA(ccbegin + 2);
13085
0
  repeat_count = PRIVATE_DATA(ccbegin + 3);
13086
0
  SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
13087
0
  if (repeat_type == OP_UPTO)
13088
0
    ket = OP_KETRMAX;
13089
0
  if (repeat_type == OP_MINUPTO)
13090
0
    ket = OP_KETRMIN;
13091
0
  }
13092
0
ccbegin = cc;
13093
0
cc += GET(cc, 1);
13094
0
has_alternatives = *cc == OP_ALT;
13095
0
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13096
0
  has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
13097
0
if (opcode == OP_CBRA || opcode == OP_SCBRA)
13098
0
  offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
13099
0
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
13100
0
  opcode = OP_SCOND;
13101
13102
0
alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
13103
13104
/* Decoding the needs_control_head in framesize. */
13105
0
if (opcode == OP_ONCE)
13106
0
  {
13107
0
  needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
13108
0
  CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
13109
0
  }
13110
13111
0
if (ket != OP_KET && repeat_type != 0)
13112
0
  {
13113
  /* TMP1 is used in OP_KETRMIN below. */
13114
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13115
0
  free_stack(common, 1);
13116
0
  if (repeat_type == OP_UPTO)
13117
0
    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
13118
0
  else
13119
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13120
0
  }
13121
13122
0
if (ket == OP_KETRMAX)
13123
0
  {
13124
0
  if (bra == OP_BRAZERO)
13125
0
    {
13126
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13127
0
    free_stack(common, 1);
13128
0
    brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13129
0
    }
13130
0
  }
13131
0
else if (ket == OP_KETRMIN)
13132
0
  {
13133
0
  if (bra != OP_BRAMINZERO)
13134
0
    {
13135
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13136
0
    if (repeat_type != 0)
13137
0
      {
13138
      /* TMP1 was set a few lines above. */
13139
0
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13140
      /* Drop STR_PTR for non-greedy plus quantifier. */
13141
0
      if (opcode != OP_ONCE)
13142
0
        free_stack(common, 1);
13143
0
      }
13144
0
    else if (opcode >= OP_SBRA || opcode == OP_ONCE)
13145
0
      {
13146
      /* Checking zero-length iteration. */
13147
0
      if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
13148
0
        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13149
0
      else
13150
0
        {
13151
0
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13152
0
        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13153
0
        }
13154
      /* Drop STR_PTR for non-greedy plus quantifier. */
13155
0
      if (opcode != OP_ONCE)
13156
0
        free_stack(common, 1);
13157
0
      }
13158
0
    else
13159
0
      JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13160
0
    }
13161
0
  rmin_label = LABEL();
13162
0
  if (repeat_type != 0)
13163
0
    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13164
0
  }
13165
0
else if (bra == OP_BRAZERO)
13166
0
  {
13167
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13168
0
  free_stack(common, 1);
13169
0
  brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13170
0
  }
13171
0
else if (repeat_type == OP_EXACT)
13172
0
  {
13173
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13174
0
  exact_label = LABEL();
13175
0
  }
13176
13177
0
if (offset != 0)
13178
0
  {
13179
0
  if (common->capture_last_ptr != 0)
13180
0
    {
13181
0
    SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
13182
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13183
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13184
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13185
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13186
0
    free_stack(common, 3);
13187
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
13188
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
13189
0
    }
13190
0
  else if (common->optimized_cbracket[offset >> 1] == 0)
13191
0
    {
13192
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13193
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13194
0
    free_stack(common, 2);
13195
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13196
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13197
0
    }
13198
0
  }
13199
13200
0
if (SLJIT_UNLIKELY(opcode == OP_ONCE))
13201
0
  {
13202
0
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13203
0
    {
13204
0
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13205
0
    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13206
0
    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
13207
0
    }
13208
0
  once = JUMP(SLJIT_JUMP);
13209
0
  }
13210
0
else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13211
0
  {
13212
0
  if (has_alternatives)
13213
0
    {
13214
    /* Always exactly one alternative. */
13215
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13216
0
    free_stack(common, 1);
13217
13218
0
    alt_max = 2;
13219
0
    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13220
0
    }
13221
0
  }
13222
0
else if (has_alternatives)
13223
0
  {
13224
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13225
0
  free_stack(common, 1);
13226
13227
0
  if (alt_max > 3)
13228
0
    {
13229
0
    sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13230
13231
0
    SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr);
13232
0
    sljit_set_label(CURRENT_AS(bracket_backtrack)->u.matching_mov_addr, LABEL());
13233
0
    sljit_emit_op0(compiler, SLJIT_ENDBR);
13234
0
    }
13235
0
  else
13236
0
    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13237
0
  }
13238
13239
0
COMPILE_BACKTRACKINGPATH(current->top);
13240
0
if (current->own_backtracks)
13241
0
  set_jumps(current->own_backtracks, LABEL());
13242
13243
0
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
13244
0
  {
13245
  /* Conditional block always has at most one alternative. */
13246
0
  if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
13247
0
    {
13248
0
    SLJIT_ASSERT(has_alternatives);
13249
0
    assert = CURRENT_AS(bracket_backtrack)->u.assert;
13250
0
    if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
13251
0
      {
13252
0
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13253
0
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13254
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13255
0
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13256
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13257
0
      }
13258
0
    cond = JUMP(SLJIT_JUMP);
13259
0
    set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
13260
0
    }
13261
0
  else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
13262
0
    {
13263
0
    SLJIT_ASSERT(has_alternatives);
13264
0
    cond = JUMP(SLJIT_JUMP);
13265
0
    set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
13266
0
    }
13267
0
  else
13268
0
    SLJIT_ASSERT(!has_alternatives);
13269
0
  }
13270
13271
0
if (has_alternatives)
13272
0
  {
13273
0
  alt_count = 1;
13274
0
  do
13275
0
    {
13276
0
    current->top = NULL;
13277
0
    current->own_backtracks = NULL;
13278
0
    current->simple_backtracks = NULL;
13279
    /* Conditional blocks always have an additional alternative, even if it is empty. */
13280
0
    if (*cc == OP_ALT)
13281
0
      {
13282
0
      ccprev = cc + 1 + LINK_SIZE;
13283
0
      cc += GET(cc, 1);
13284
13285
0
      has_vreverse = FALSE;
13286
0
      if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA)
13287
0
        {
13288
0
        SLJIT_ASSERT(private_data_ptr != 0);
13289
0
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13290
13291
0
        has_vreverse = (*ccprev == OP_VREVERSE);
13292
0
        if (*ccprev == OP_REVERSE || has_vreverse)
13293
0
          ccprev = compile_reverse_matchingpath(common, ccprev, current);
13294
0
        }
13295
0
      else if (opcode != OP_COND && opcode != OP_SCOND)
13296
0
        {
13297
0
        if (opcode != OP_ONCE)
13298
0
          {
13299
0
          if (private_data_ptr != 0)
13300
0
            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13301
0
          else
13302
0
            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13303
0
          }
13304
0
        else
13305
0
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
13306
0
        }
13307
13308
0
      compile_matchingpath(common, ccprev, cc, current);
13309
0
      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13310
0
        return;
13311
13312
0
      switch (opcode)
13313
0
        {
13314
0
        case OP_ASSERTBACK_NA:
13315
0
          if (has_vreverse)
13316
0
            {
13317
0
            SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));
13318
0
            add_jump(compiler, &current->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
13319
0
            }
13320
13321
0
          if (PRIVATE_DATA(ccbegin + 1))
13322
0
            OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13323
0
          break;
13324
0
        case OP_ASSERT_NA:
13325
0
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
13326
0
          break;
13327
0
        case OP_SCRIPT_RUN:
13328
0
          match_script_run_common(common, private_data_ptr, current);
13329
0
          break;
13330
0
        }
13331
0
      }
13332
13333
    /* Instructions after the current alternative is successfully matched. */
13334
    /* There is a similar code in compile_bracket_matchingpath. */
13335
0
    if (opcode == OP_ONCE)
13336
0
      match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
13337
13338
0
    stacksize = 0;
13339
0
    if (repeat_type == OP_MINUPTO)
13340
0
      {
13341
      /* We need to preserve the counter. TMP2 will be used below. */
13342
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
13343
0
      stacksize++;
13344
0
      }
13345
0
    if (ket != OP_KET || bra != OP_BRA)
13346
0
      stacksize++;
13347
0
    if (offset != 0)
13348
0
      {
13349
0
      if (common->capture_last_ptr != 0)
13350
0
        stacksize++;
13351
0
      if (common->optimized_cbracket[offset >> 1] == 0)
13352
0
        stacksize += 2;
13353
0
      }
13354
0
    if (opcode != OP_ONCE)
13355
0
      stacksize++;
13356
13357
0
    if (stacksize > 0)
13358
0
      allocate_stack(common, stacksize);
13359
13360
0
    stacksize = 0;
13361
0
    if (repeat_type == OP_MINUPTO)
13362
0
      {
13363
      /* TMP2 was set above. */
13364
0
      OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
13365
0
      stacksize++;
13366
0
      }
13367
13368
0
    if (ket != OP_KET || bra != OP_BRA)
13369
0
      {
13370
0
      if (ket != OP_KET)
13371
0
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
13372
0
      else
13373
0
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
13374
0
      stacksize++;
13375
0
      }
13376
13377
0
    if (offset != 0)
13378
0
      stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
13379
13380
0
    if (opcode != OP_ONCE)
13381
0
      {
13382
0
      if (alt_max <= 3)
13383
0
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
13384
0
      else
13385
0
        mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
13386
0
      }
13387
13388
0
    if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
13389
0
      {
13390
      /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
13391
0
      SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
13392
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
13393
0
      }
13394
13395
0
    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
13396
13397
0
    if (opcode != OP_ONCE)
13398
0
      {
13399
0
      if (alt_max <= 3)
13400
0
        {
13401
0
        JUMPHERE(next_alt);
13402
0
        alt_count++;
13403
0
        if (alt_count < alt_max)
13404
0
          {
13405
0
          SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
13406
0
          next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13407
0
          }
13408
0
        }
13409
0
      else
13410
0
        {
13411
0
        sljit_set_label(mov_addr, LABEL());
13412
0
        sljit_emit_op0(compiler, SLJIT_ENDBR);
13413
0
        }
13414
0
      }
13415
13416
0
    COMPILE_BACKTRACKINGPATH(current->top);
13417
0
    if (current->own_backtracks)
13418
0
      set_jumps(current->own_backtracks, LABEL());
13419
0
    SLJIT_ASSERT(!current->simple_backtracks);
13420
0
    }
13421
0
  while (*cc == OP_ALT);
13422
13423
0
  if (cond != NULL)
13424
0
    {
13425
0
    SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
13426
0
    assert = CURRENT_AS(bracket_backtrack)->u.assert;
13427
0
    if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
13428
0
      {
13429
0
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13430
0
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13431
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13432
0
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13433
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13434
0
      }
13435
0
    JUMPHERE(cond);
13436
0
    }
13437
13438
  /* Free the STR_PTR. */
13439
0
  if (private_data_ptr == 0)
13440
0
    free_stack(common, 1);
13441
0
  }
13442
13443
0
if (offset != 0)
13444
0
  {
13445
  /* Using both tmp register is better for instruction scheduling. */
13446
0
  if (common->optimized_cbracket[offset >> 1] != 0)
13447
0
    {
13448
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13449
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13450
0
    free_stack(common, 2);
13451
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13452
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13453
0
    }
13454
0
  else
13455
0
    {
13456
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13457
0
    free_stack(common, 1);
13458
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13459
0
    }
13460
0
  }
13461
0
else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
13462
0
  {
13463
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13464
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13465
0
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
13466
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13467
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
13468
0
  free_stack(common, 4);
13469
0
  }
13470
0
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
13471
0
  {
13472
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
13473
0
  free_stack(common, 1);
13474
0
  }
13475
0
else if (opcode == OP_ONCE)
13476
0
  {
13477
0
  cc = ccbegin + GET(ccbegin, 1);
13478
0
  stacksize = needs_control_head ? 1 : 0;
13479
13480
0
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13481
0
    {
13482
    /* Reset head and drop saved frame. */
13483
0
    stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
13484
0
    }
13485
0
  else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
13486
0
    {
13487
    /* The STR_PTR must be released. */
13488
0
    stacksize++;
13489
0
    }
13490
13491
0
  if (stacksize > 0)
13492
0
    free_stack(common, stacksize);
13493
13494
0
  JUMPHERE(once);
13495
  /* Restore previous private_data_ptr */
13496
0
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13497
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
13498
0
  else if (ket == OP_KETRMIN)
13499
0
    {
13500
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13501
    /* See the comment below. */
13502
0
    free_stack(common, 2);
13503
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13504
0
    }
13505
0
  }
13506
13507
0
if (repeat_type == OP_EXACT)
13508
0
  {
13509
0
  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13510
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13511
0
  CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
13512
0
  }
13513
0
else if (ket == OP_KETRMAX)
13514
0
  {
13515
0
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13516
0
  if (bra != OP_BRAZERO)
13517
0
    free_stack(common, 1);
13518
13519
0
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13520
0
  if (bra == OP_BRAZERO)
13521
0
    {
13522
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13523
0
    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13524
0
    JUMPHERE(brazero);
13525
0
    free_stack(common, 1);
13526
0
    }
13527
0
  }
13528
0
else if (ket == OP_KETRMIN)
13529
0
  {
13530
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13531
13532
  /* OP_ONCE removes everything in case of a backtrack, so we don't
13533
  need to explicitly release the STR_PTR. The extra release would
13534
  affect badly the free_stack(2) above. */
13535
0
  if (opcode != OP_ONCE)
13536
0
    free_stack(common, 1);
13537
0
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
13538
0
  if (opcode == OP_ONCE)
13539
0
    free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
13540
0
  else if (bra == OP_BRAMINZERO)
13541
0
    free_stack(common, 1);
13542
0
  }
13543
0
else if (bra == OP_BRAZERO)
13544
0
  {
13545
0
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13546
0
  JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13547
0
  JUMPHERE(brazero);
13548
0
  }
13549
0
}
13550
13551
static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13552
0
{
13553
0
DEFINE_COMPILER;
13554
0
int offset;
13555
0
struct sljit_jump *jump;
13556
0
PCRE2_SPTR cc;
13557
13558
/* No retry on backtrack, just drop everything. */
13559
0
if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
13560
0
  {
13561
0
  cc = current->cc;
13562
13563
0
  if (*cc == OP_BRAPOSZERO)
13564
0
    cc++;
13565
13566
0
  if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)
13567
0
    {
13568
0
    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
13569
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13570
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13571
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13572
0
    if (common->capture_last_ptr != 0)
13573
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13574
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13575
0
    if (common->capture_last_ptr != 0)
13576
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13577
0
    }
13578
0
  set_jumps(current->own_backtracks, LABEL());
13579
0
  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13580
0
  return;
13581
0
  }
13582
13583
0
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
13584
0
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13585
0
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
13586
13587
0
if (current->own_backtracks)
13588
0
  {
13589
0
  jump = JUMP(SLJIT_JUMP);
13590
0
  set_jumps(current->own_backtracks, LABEL());
13591
  /* Drop the stack frame. */
13592
0
  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13593
0
  JUMPHERE(jump);
13594
0
  }
13595
0
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13596
0
}
13597
13598
static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13599
0
{
13600
0
assert_backtrack backtrack;
13601
13602
0
current->top = NULL;
13603
0
current->own_backtracks = NULL;
13604
0
current->simple_backtracks = NULL;
13605
0
if (current->cc[1] > OP_ASSERTBACK_NOT)
13606
0
  {
13607
  /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13608
0
  compile_bracket_matchingpath(common, current->cc, current);
13609
0
  compile_bracket_backtrackingpath(common, current->top);
13610
0
  }
13611
0
else
13612
0
  {
13613
0
  memset(&backtrack, 0, sizeof(backtrack));
13614
0
  backtrack.common.cc = current->cc;
13615
0
  backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13616
  /* Manual call of compile_assert_matchingpath. */
13617
0
  compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13618
0
  }
13619
0
SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);
13620
0
}
13621
13622
static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13623
0
{
13624
0
DEFINE_COMPILER;
13625
0
PCRE2_UCHAR opcode = *current->cc;
13626
0
struct sljit_label *loop;
13627
0
struct sljit_jump *jump;
13628
13629
0
if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13630
0
  {
13631
0
  if (common->then_trap != NULL)
13632
0
    {
13633
0
    SLJIT_ASSERT(common->control_head_ptr != 0);
13634
13635
0
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13636
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13637
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13638
0
    jump = JUMP(SLJIT_JUMP);
13639
13640
0
    loop = LABEL();
13641
0
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13642
0
    JUMPHERE(jump);
13643
0
    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13644
0
    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13645
0
    add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13646
0
    return;
13647
0
    }
13648
0
  else if (!common->local_quit_available && common->in_positive_assertion)
13649
0
    {
13650
0
    add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13651
0
    return;
13652
0
    }
13653
0
  }
13654
13655
0
if (common->local_quit_available)
13656
0
  {
13657
  /* Abort match with a fail. */
13658
0
  if (common->quit_label == NULL)
13659
0
    add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13660
0
  else
13661
0
    JUMPTO(SLJIT_JUMP, common->quit_label);
13662
0
  return;
13663
0
  }
13664
13665
0
if (opcode == OP_SKIP_ARG)
13666
0
  {
13667
0
  SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13668
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13669
0
  OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13670
0
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
13671
13672
0
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13673
0
  add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13674
0
  return;
13675
0
  }
13676
13677
0
if (opcode == OP_SKIP)
13678
0
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13679
0
else
13680
0
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13681
0
add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13682
0
}
13683
13684
static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13685
0
{
13686
0
DEFINE_COMPILER;
13687
0
struct sljit_jump *jump;
13688
0
struct sljit_label *label;
13689
13690
0
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13691
0
jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));
13692
0
skip_valid_char(common);
13693
0
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
13694
0
JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);
13695
13696
0
label = LABEL();
13697
0
sljit_set_label(jump, label);
13698
0
set_jumps(current->own_backtracks, label);
13699
0
}
13700
13701
static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13702
0
{
13703
0
DEFINE_COMPILER;
13704
0
struct sljit_jump *jump;
13705
0
int size;
13706
13707
0
if (CURRENT_AS(then_trap_backtrack)->then_trap)
13708
0
  {
13709
0
  common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13710
0
  return;
13711
0
  }
13712
13713
0
size = CURRENT_AS(then_trap_backtrack)->framesize;
13714
0
size = 3 + (size < 0 ? 0 : size);
13715
13716
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13717
0
free_stack(common, size);
13718
0
jump = JUMP(SLJIT_JUMP);
13719
13720
0
set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13721
/* STACK_TOP is set by THEN. */
13722
0
if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13723
0
  {
13724
0
  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13725
0
  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13726
0
  }
13727
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13728
0
free_stack(common, 3);
13729
13730
0
JUMPHERE(jump);
13731
0
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13732
0
}
13733
13734
static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13735
0
{
13736
0
DEFINE_COMPILER;
13737
0
then_trap_backtrack *save_then_trap = common->then_trap;
13738
13739
0
while (current)
13740
0
  {
13741
0
  if (current->simple_backtracks != NULL)
13742
0
    set_jumps(current->simple_backtracks, LABEL());
13743
0
  switch(*current->cc)
13744
0
    {
13745
0
    case OP_SET_SOM:
13746
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13747
0
    free_stack(common, 1);
13748
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13749
0
    break;
13750
13751
0
    case OP_STAR:
13752
0
    case OP_MINSTAR:
13753
0
    case OP_PLUS:
13754
0
    case OP_MINPLUS:
13755
0
    case OP_QUERY:
13756
0
    case OP_MINQUERY:
13757
0
    case OP_UPTO:
13758
0
    case OP_MINUPTO:
13759
0
    case OP_EXACT:
13760
0
    case OP_POSSTAR:
13761
0
    case OP_POSPLUS:
13762
0
    case OP_POSQUERY:
13763
0
    case OP_POSUPTO:
13764
0
    case OP_STARI:
13765
0
    case OP_MINSTARI:
13766
0
    case OP_PLUSI:
13767
0
    case OP_MINPLUSI:
13768
0
    case OP_QUERYI:
13769
0
    case OP_MINQUERYI:
13770
0
    case OP_UPTOI:
13771
0
    case OP_MINUPTOI:
13772
0
    case OP_EXACTI:
13773
0
    case OP_POSSTARI:
13774
0
    case OP_POSPLUSI:
13775
0
    case OP_POSQUERYI:
13776
0
    case OP_POSUPTOI:
13777
0
    case OP_NOTSTAR:
13778
0
    case OP_NOTMINSTAR:
13779
0
    case OP_NOTPLUS:
13780
0
    case OP_NOTMINPLUS:
13781
0
    case OP_NOTQUERY:
13782
0
    case OP_NOTMINQUERY:
13783
0
    case OP_NOTUPTO:
13784
0
    case OP_NOTMINUPTO:
13785
0
    case OP_NOTEXACT:
13786
0
    case OP_NOTPOSSTAR:
13787
0
    case OP_NOTPOSPLUS:
13788
0
    case OP_NOTPOSQUERY:
13789
0
    case OP_NOTPOSUPTO:
13790
0
    case OP_NOTSTARI:
13791
0
    case OP_NOTMINSTARI:
13792
0
    case OP_NOTPLUSI:
13793
0
    case OP_NOTMINPLUSI:
13794
0
    case OP_NOTQUERYI:
13795
0
    case OP_NOTMINQUERYI:
13796
0
    case OP_NOTUPTOI:
13797
0
    case OP_NOTMINUPTOI:
13798
0
    case OP_NOTEXACTI:
13799
0
    case OP_NOTPOSSTARI:
13800
0
    case OP_NOTPOSPLUSI:
13801
0
    case OP_NOTPOSQUERYI:
13802
0
    case OP_NOTPOSUPTOI:
13803
0
    case OP_TYPESTAR:
13804
0
    case OP_TYPEMINSTAR:
13805
0
    case OP_TYPEPLUS:
13806
0
    case OP_TYPEMINPLUS:
13807
0
    case OP_TYPEQUERY:
13808
0
    case OP_TYPEMINQUERY:
13809
0
    case OP_TYPEUPTO:
13810
0
    case OP_TYPEMINUPTO:
13811
0
    case OP_TYPEEXACT:
13812
0
    case OP_TYPEPOSSTAR:
13813
0
    case OP_TYPEPOSPLUS:
13814
0
    case OP_TYPEPOSQUERY:
13815
0
    case OP_TYPEPOSUPTO:
13816
0
    case OP_CLASS:
13817
0
    case OP_NCLASS:
13818
0
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13819
0
    case OP_XCLASS:
13820
0
#endif
13821
0
    compile_iterator_backtrackingpath(common, current);
13822
0
    break;
13823
13824
0
    case OP_REF:
13825
0
    case OP_REFI:
13826
0
    case OP_DNREF:
13827
0
    case OP_DNREFI:
13828
0
    compile_ref_iterator_backtrackingpath(common, current);
13829
0
    break;
13830
13831
0
    case OP_RECURSE:
13832
0
    compile_recurse_backtrackingpath(common, current);
13833
0
    break;
13834
13835
0
    case OP_ASSERT:
13836
0
    case OP_ASSERT_NOT:
13837
0
    case OP_ASSERTBACK:
13838
0
    case OP_ASSERTBACK_NOT:
13839
0
    compile_assert_backtrackingpath(common, current);
13840
0
    break;
13841
13842
0
    case OP_ASSERT_NA:
13843
0
    case OP_ASSERTBACK_NA:
13844
0
    case OP_ONCE:
13845
0
    case OP_SCRIPT_RUN:
13846
0
    case OP_BRA:
13847
0
    case OP_CBRA:
13848
0
    case OP_COND:
13849
0
    case OP_SBRA:
13850
0
    case OP_SCBRA:
13851
0
    case OP_SCOND:
13852
0
    compile_bracket_backtrackingpath(common, current);
13853
0
    break;
13854
13855
0
    case OP_BRAZERO:
13856
0
    if (current->cc[1] > OP_ASSERTBACK_NOT)
13857
0
      compile_bracket_backtrackingpath(common, current);
13858
0
    else
13859
0
      compile_assert_backtrackingpath(common, current);
13860
0
    break;
13861
13862
0
    case OP_BRAPOS:
13863
0
    case OP_CBRAPOS:
13864
0
    case OP_SBRAPOS:
13865
0
    case OP_SCBRAPOS:
13866
0
    case OP_BRAPOSZERO:
13867
0
    compile_bracketpos_backtrackingpath(common, current);
13868
0
    break;
13869
13870
0
    case OP_BRAMINZERO:
13871
0
    compile_braminzero_backtrackingpath(common, current);
13872
0
    break;
13873
13874
0
    case OP_MARK:
13875
0
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13876
0
    if (common->has_skip_arg)
13877
0
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13878
0
    free_stack(common, common->has_skip_arg ? 5 : 1);
13879
0
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13880
0
    if (common->has_skip_arg)
13881
0
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13882
0
    break;
13883
13884
0
    case OP_THEN:
13885
0
    case OP_THEN_ARG:
13886
0
    case OP_PRUNE:
13887
0
    case OP_PRUNE_ARG:
13888
0
    case OP_SKIP:
13889
0
    case OP_SKIP_ARG:
13890
0
    compile_control_verb_backtrackingpath(common, current);
13891
0
    break;
13892
13893
0
    case OP_COMMIT:
13894
0
    case OP_COMMIT_ARG:
13895
0
    if (!common->local_quit_available)
13896
0
      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13897
0
    if (common->quit_label == NULL)
13898
0
      add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13899
0
    else
13900
0
      JUMPTO(SLJIT_JUMP, common->quit_label);
13901
0
    break;
13902
13903
0
    case OP_CALLOUT:
13904
0
    case OP_CALLOUT_STR:
13905
0
    case OP_FAIL:
13906
0
    case OP_ACCEPT:
13907
0
    case OP_ASSERT_ACCEPT:
13908
0
    set_jumps(current->own_backtracks, LABEL());
13909
0
    break;
13910
13911
0
    case OP_VREVERSE:
13912
0
    compile_vreverse_backtrackingpath(common, current);
13913
0
    break;
13914
13915
0
    case OP_THEN_TRAP:
13916
    /* A virtual opcode for then traps. */
13917
0
    compile_then_trap_backtrackingpath(common, current);
13918
0
    break;
13919
13920
0
    default:
13921
0
    SLJIT_UNREACHABLE();
13922
0
    break;
13923
0
    }
13924
0
  current = current->prev;
13925
0
  }
13926
0
common->then_trap = save_then_trap;
13927
0
}
13928
13929
static SLJIT_INLINE void compile_recurse(compiler_common *common)
13930
0
{
13931
0
DEFINE_COMPILER;
13932
0
PCRE2_SPTR cc = common->start + common->currententry->start;
13933
0
PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13934
0
PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13935
0
uint32_t recurse_flags = 0;
13936
0
int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13937
0
int alt_count, alt_max, local_size;
13938
0
backtrack_common altbacktrack;
13939
0
jump_list *match = NULL;
13940
0
struct sljit_jump *next_alt = NULL;
13941
0
struct sljit_jump *accept_exit = NULL;
13942
0
struct sljit_label *quit;
13943
0
struct sljit_jump *mov_addr = NULL;
13944
13945
/* Recurse captures then. */
13946
0
common->then_trap = NULL;
13947
13948
0
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13949
13950
0
alt_max = no_alternatives(cc);
13951
0
alt_count = 0;
13952
13953
/* Matching path. */
13954
0
SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13955
0
common->currententry->entry_label = LABEL();
13956
0
set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13957
13958
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);
13959
0
count_match(common);
13960
13961
0
local_size = (alt_max > 1) ? 2 : 1;
13962
13963
/* (Reversed) stack layout:
13964
   [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13965
13966
0
allocate_stack(common, private_data_size + local_size);
13967
/* Save return address. */
13968
0
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13969
13970
0
copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13971
13972
/* This variable is saved and restored all time when we enter or exit from a recursive context. */
13973
0
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13974
13975
0
if (recurse_flags & recurse_flag_control_head_found)
13976
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13977
13978
0
if (alt_max > 1)
13979
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13980
13981
0
memset(&altbacktrack, 0, sizeof(backtrack_common));
13982
0
common->quit_label = NULL;
13983
0
common->accept_label = NULL;
13984
0
common->quit = NULL;
13985
0
common->accept = NULL;
13986
0
altbacktrack.cc = ccbegin;
13987
0
cc += GET(cc, 1);
13988
0
while (1)
13989
0
  {
13990
0
  altbacktrack.top = NULL;
13991
0
  altbacktrack.own_backtracks = NULL;
13992
13993
0
  if (altbacktrack.cc != ccbegin)
13994
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13995
13996
0
  compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13997
0
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13998
0
    return;
13999
14000
0
  allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
14001
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14002
14003
0
  if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
14004
0
    {
14005
0
    if (alt_max > 3)
14006
0
      mov_addr = sljit_emit_mov_addr(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
14007
0
    else
14008
0
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
14009
0
    }
14010
14011
0
  add_jump(compiler, &match, JUMP(SLJIT_JUMP));
14012
14013
0
  if (alt_count == 0)
14014
0
    {
14015
    /* Backtracking path entry. */
14016
0
    SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
14017
0
    common->currententry->backtrack_label = LABEL();
14018
0
    set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
14019
14020
0
    sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);
14021
14022
0
    if (recurse_flags & recurse_flag_accept_found)
14023
0
      accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
14024
14025
0
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
14026
    /* Save return address. */
14027
0
    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
14028
14029
0
    copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
14030
14031
0
    if (alt_max > 1)
14032
0
      {
14033
0
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
14034
0
      free_stack(common, 2);
14035
14036
0
      if (alt_max > 3)
14037
0
        {
14038
0
        sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
14039
0
        sljit_set_label(mov_addr, LABEL());
14040
0
        sljit_emit_op0(compiler, SLJIT_ENDBR);
14041
0
        }
14042
0
      else
14043
0
        next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
14044
0
      }
14045
0
    else
14046
0
      free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
14047
0
    }
14048
0
  else if (alt_max > 3)
14049
0
    {
14050
0
    sljit_set_label(mov_addr, LABEL());
14051
0
    sljit_emit_op0(compiler, SLJIT_ENDBR);
14052
0
    }
14053
0
  else
14054
0
    {
14055
0
    JUMPHERE(next_alt);
14056
0
    if (alt_count + 1 < alt_max)
14057
0
      {
14058
0
      SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
14059
0
      next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
14060
0
      }
14061
0
    }
14062
14063
0
  alt_count++;
14064
14065
0
  compile_backtrackingpath(common, altbacktrack.top);
14066
0
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14067
0
    return;
14068
0
  set_jumps(altbacktrack.own_backtracks, LABEL());
14069
14070
0
  if (*cc != OP_ALT)
14071
0
    break;
14072
14073
0
  altbacktrack.cc = cc + 1 + LINK_SIZE;
14074
0
  cc += GET(cc, 1);
14075
0
  }
14076
14077
/* No alternative is matched. */
14078
14079
0
quit = LABEL();
14080
14081
0
copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
14082
14083
0
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14084
0
free_stack(common, private_data_size + local_size);
14085
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14086
0
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14087
14088
0
if (common->quit != NULL)
14089
0
  {
14090
0
  SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
14091
14092
0
  set_jumps(common->quit, LABEL());
14093
0
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14094
0
  copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14095
0
  JUMPTO(SLJIT_JUMP, quit);
14096
0
  }
14097
14098
0
if (recurse_flags & recurse_flag_accept_found)
14099
0
  {
14100
0
  JUMPHERE(accept_exit);
14101
0
  free_stack(common, 2);
14102
14103
  /* Save return address. */
14104
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
14105
14106
0
  copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
14107
14108
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
14109
0
  free_stack(common, private_data_size + local_size);
14110
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
14111
0
  OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14112
0
  }
14113
14114
0
if (common->accept != NULL)
14115
0
  {
14116
0
  SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
14117
14118
0
  set_jumps(common->accept, LABEL());
14119
14120
0
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
14121
0
  OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
14122
14123
0
  allocate_stack(common, 2);
14124
0
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
14125
0
  }
14126
14127
0
set_jumps(match, LABEL());
14128
14129
0
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
14130
14131
0
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
14132
14133
0
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
14134
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
14135
0
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
14136
0
}
14137
14138
#undef COMPILE_BACKTRACKINGPATH
14139
#undef CURRENT_AS
14140
14141
#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
14142
0
  (PCRE2_JIT_INVALID_UTF)
14143
14144
static int jit_compile(pcre2_code *code, sljit_u32 mode)
14145
0
{
14146
0
pcre2_real_code *re = (pcre2_real_code *)code;
14147
0
struct sljit_compiler *compiler;
14148
0
backtrack_common rootbacktrack;
14149
0
compiler_common common_data;
14150
0
compiler_common *common = &common_data;
14151
0
const sljit_u8 *tables = re->tables;
14152
0
void *allocator_data = &re->memctl;
14153
0
int private_data_size;
14154
0
PCRE2_SPTR ccend;
14155
0
executable_functions *functions;
14156
0
void *executable_func;
14157
0
sljit_uw executable_size;
14158
0
sljit_uw total_length;
14159
0
struct sljit_label *mainloop_label = NULL;
14160
0
struct sljit_label *continue_match_label;
14161
0
struct sljit_label *empty_match_found_label = NULL;
14162
0
struct sljit_label *empty_match_backtrack_label = NULL;
14163
0
struct sljit_label *reset_match_label;
14164
0
struct sljit_label *quit_label;
14165
0
struct sljit_jump *jump;
14166
0
struct sljit_jump *minlength_check_failed = NULL;
14167
0
struct sljit_jump *empty_match = NULL;
14168
0
struct sljit_jump *end_anchor_failed = NULL;
14169
0
jump_list *reqcu_not_found = NULL;
14170
14171
0
SLJIT_ASSERT(tables);
14172
14173
#if HAS_VIRTUAL_REGISTERS == 1
14174
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);
14175
#elif HAS_VIRTUAL_REGISTERS == 0
14176
0
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);
14177
#else
14178
#error "Invalid value for HAS_VIRTUAL_REGISTERS"
14179
#endif
14180
14181
0
memset(&rootbacktrack, 0, sizeof(backtrack_common));
14182
0
memset(common, 0, sizeof(compiler_common));
14183
0
common->re = re;
14184
0
common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
14185
0
rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
14186
14187
0
#ifdef SUPPORT_UNICODE
14188
0
common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
14189
0
#endif /* SUPPORT_UNICODE */
14190
0
mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
14191
14192
0
common->start = rootbacktrack.cc;
14193
0
common->read_only_data_head = NULL;
14194
0
common->fcc = tables + fcc_offset;
14195
0
common->lcc = (sljit_sw)(tables + lcc_offset);
14196
0
common->mode = mode;
14197
0
common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
14198
0
common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
14199
0
common->nltype = NLTYPE_FIXED;
14200
0
switch(re->newline_convention)
14201
0
  {
14202
0
  case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
14203
0
  case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
14204
0
  case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
14205
0
  case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
14206
0
  case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
14207
0
  case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
14208
0
  default: return PCRE2_ERROR_INTERNAL;
14209
0
  }
14210
0
common->nlmax = READ_CHAR_MAX;
14211
0
common->nlmin = 0;
14212
0
if (re->bsr_convention == PCRE2_BSR_UNICODE)
14213
0
  common->bsr_nltype = NLTYPE_ANY;
14214
0
else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
14215
0
  common->bsr_nltype = NLTYPE_ANYCRLF;
14216
0
else
14217
0
  {
14218
#ifdef BSR_ANYCRLF
14219
  common->bsr_nltype = NLTYPE_ANYCRLF;
14220
#else
14221
0
  common->bsr_nltype = NLTYPE_ANY;
14222
0
#endif
14223
0
  }
14224
0
common->bsr_nlmax = READ_CHAR_MAX;
14225
0
common->bsr_nlmin = 0;
14226
0
common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
14227
0
common->ctypes = (sljit_sw)(tables + ctypes_offset);
14228
0
common->name_count = re->name_count;
14229
0
common->name_entry_size = re->name_entry_size;
14230
0
common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
14231
0
common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
14232
0
#ifdef SUPPORT_UNICODE
14233
/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
14234
0
common->utf = (re->overall_options & PCRE2_UTF) != 0;
14235
0
common->ucp = (re->overall_options & PCRE2_UCP) != 0;
14236
0
if (common->utf)
14237
0
  {
14238
0
  if (common->nltype == NLTYPE_ANY)
14239
0
    common->nlmax = 0x2029;
14240
0
  else if (common->nltype == NLTYPE_ANYCRLF)
14241
0
    common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14242
0
  else
14243
0
    {
14244
    /* We only care about the first newline character. */
14245
0
    common->nlmax = common->newline & 0xff;
14246
0
    }
14247
14248
0
  if (common->nltype == NLTYPE_FIXED)
14249
0
    common->nlmin = common->newline & 0xff;
14250
0
  else
14251
0
    common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14252
14253
0
  if (common->bsr_nltype == NLTYPE_ANY)
14254
0
    common->bsr_nlmax = 0x2029;
14255
0
  else
14256
0
    common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
14257
0
  common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
14258
0
  }
14259
0
else
14260
0
  common->invalid_utf = FALSE;
14261
0
#endif /* SUPPORT_UNICODE */
14262
0
ccend = bracketend(common->start);
14263
14264
/* Calculate the local space size on the stack. */
14265
0
common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
14266
0
common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
14267
0
if (!common->optimized_cbracket)
14268
0
  return PCRE2_ERROR_NOMEMORY;
14269
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
14270
memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14271
#else
14272
0
memset(common->optimized_cbracket, 1, re->top_bracket + 1);
14273
0
#endif
14274
14275
0
SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
14276
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
14277
common->capture_last_ptr = common->ovector_start;
14278
common->ovector_start += sizeof(sljit_sw);
14279
#endif
14280
0
if (!check_opcode_types(common, common->start, ccend))
14281
0
  {
14282
0
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14283
0
  return PCRE2_ERROR_NOMEMORY;
14284
0
  }
14285
14286
/* Checking flags and updating ovector_start. */
14287
0
if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14288
0
  {
14289
0
  common->req_char_ptr = common->ovector_start;
14290
0
  common->ovector_start += sizeof(sljit_sw);
14291
0
  }
14292
0
if (mode != PCRE2_JIT_COMPLETE)
14293
0
  {
14294
0
  common->start_used_ptr = common->ovector_start;
14295
0
  common->ovector_start += sizeof(sljit_sw);
14296
0
  if (mode == PCRE2_JIT_PARTIAL_SOFT)
14297
0
    {
14298
0
    common->hit_start = common->ovector_start;
14299
0
    common->ovector_start += sizeof(sljit_sw);
14300
0
    }
14301
0
  }
14302
0
if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
14303
0
  {
14304
0
  common->match_end_ptr = common->ovector_start;
14305
0
  common->ovector_start += sizeof(sljit_sw);
14306
0
  }
14307
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
14308
common->control_head_ptr = 1;
14309
#endif
14310
0
if (common->control_head_ptr != 0)
14311
0
  {
14312
0
  common->control_head_ptr = common->ovector_start;
14313
0
  common->ovector_start += sizeof(sljit_sw);
14314
0
  }
14315
0
if (common->has_set_som)
14316
0
  {
14317
  /* Saving the real start pointer is necessary. */
14318
0
  common->start_ptr = common->ovector_start;
14319
0
  common->ovector_start += sizeof(sljit_sw);
14320
0
  }
14321
14322
/* Aligning ovector to even number of sljit words. */
14323
0
if ((common->ovector_start & sizeof(sljit_sw)) != 0)
14324
0
  common->ovector_start += sizeof(sljit_sw);
14325
14326
0
if (common->start_ptr == 0)
14327
0
  common->start_ptr = OVECTOR(0);
14328
14329
/* Capturing brackets cannot be optimized if callouts are allowed. */
14330
0
if (common->capture_last_ptr != 0)
14331
0
  memset(common->optimized_cbracket, 0, re->top_bracket + 1);
14332
14333
0
SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
14334
0
common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
14335
14336
0
total_length = ccend - common->start;
14337
0
common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
14338
0
if (!common->private_data_ptrs)
14339
0
  {
14340
0
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14341
0
  return PCRE2_ERROR_NOMEMORY;
14342
0
  }
14343
0
memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
14344
14345
0
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
14346
14347
0
if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
14348
0
  detect_early_fail(common, common->start, &private_data_size, 0, 0);
14349
14350
0
set_private_data_ptrs(common, &private_data_size, ccend);
14351
14352
0
SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
14353
14354
0
if (private_data_size > 65536)
14355
0
  {
14356
0
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
14357
0
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14358
0
  return PCRE2_ERROR_NOMEMORY;
14359
0
  }
14360
14361
0
if (common->has_then)
14362
0
  {
14363
0
  common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
14364
0
  memset(common->then_offsets, 0, total_length);
14365
0
  set_then_offsets(common, common->start, NULL);
14366
0
  }
14367
14368
0
compiler = sljit_create_compiler(allocator_data);
14369
0
if (!compiler)
14370
0
  {
14371
0
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14372
0
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
14373
0
  return PCRE2_ERROR_NOMEMORY;
14374
0
  }
14375
0
common->compiler = compiler;
14376
14377
/* Main pcre2_jit_exec entry. */
14378
0
SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
14379
0
sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, private_data_size);
14380
14381
/* Register init. */
14382
0
reset_ovector(common, (re->top_bracket + 1) * 2);
14383
0
if (common->req_char_ptr != 0)
14384
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
14385
14386
0
OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
14387
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
14388
0
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14389
0
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
14390
0
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
14391
0
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
14392
0
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
14393
0
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
14394
0
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
14395
0
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
14396
14397
0
if (common->early_fail_start_ptr < common->early_fail_end_ptr)
14398
0
  reset_early_fail(common);
14399
14400
0
if (mode == PCRE2_JIT_PARTIAL_SOFT)
14401
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14402
0
if (common->mark_ptr != 0)
14403
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
14404
0
if (common->control_head_ptr != 0)
14405
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
14406
14407
/* Main part of the matching */
14408
0
if ((re->overall_options & PCRE2_ANCHORED) == 0)
14409
0
  {
14410
0
  mainloop_label = mainloop_entry(common);
14411
0
  continue_match_label = LABEL();
14412
  /* Forward search if possible. */
14413
0
  if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14414
0
    {
14415
0
    if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
14416
0
      ;
14417
0
    else if ((re->flags & PCRE2_FIRSTSET) != 0)
14418
0
      fast_forward_first_char(common);
14419
0
    else if ((re->flags & PCRE2_STARTLINE) != 0)
14420
0
      fast_forward_newline(common);
14421
0
    else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
14422
0
      fast_forward_start_bits(common);
14423
0
    }
14424
0
  }
14425
0
else
14426
0
  continue_match_label = LABEL();
14427
14428
0
if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
14429
0
  {
14430
0
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14431
0
  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
14432
0
  minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
14433
0
  }
14434
0
if (common->req_char_ptr != 0)
14435
0
  reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
14436
14437
/* Store the current STR_PTR in OVECTOR(0). */
14438
0
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
14439
/* Copy the limit of allowed recursions. */
14440
0
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
14441
0
if (common->capture_last_ptr != 0)
14442
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
14443
0
if (common->fast_forward_bc_ptr != NULL)
14444
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
14445
14446
0
if (common->start_ptr != OVECTOR(0))
14447
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
14448
14449
/* Copy the beginning of the string. */
14450
0
if (mode == PCRE2_JIT_PARTIAL_SOFT)
14451
0
  {
14452
0
  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14453
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14454
0
  JUMPHERE(jump);
14455
0
  }
14456
0
else if (mode == PCRE2_JIT_PARTIAL_HARD)
14457
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14458
14459
0
compile_matchingpath(common, common->start, ccend, &rootbacktrack);
14460
0
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14461
0
  {
14462
0
  sljit_free_compiler(compiler);
14463
0
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14464
0
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
14465
0
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14466
0
  return PCRE2_ERROR_NOMEMORY;
14467
0
  }
14468
14469
0
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14470
0
  end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
14471
14472
0
if (common->might_be_empty)
14473
0
  {
14474
0
  empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
14475
0
  empty_match_found_label = LABEL();
14476
0
  }
14477
14478
0
common->accept_label = LABEL();
14479
0
if (common->accept != NULL)
14480
0
  set_jumps(common->accept, common->accept_label);
14481
14482
/* This means we have a match. Update the ovector. */
14483
0
copy_ovector(common, re->top_bracket + 1);
14484
0
common->quit_label = common->abort_label = LABEL();
14485
0
if (common->quit != NULL)
14486
0
  set_jumps(common->quit, common->quit_label);
14487
0
if (common->abort != NULL)
14488
0
  set_jumps(common->abort, common->abort_label);
14489
0
if (minlength_check_failed != NULL)
14490
0
  SET_LABEL(minlength_check_failed, common->abort_label);
14491
14492
0
sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
14493
0
sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
14494
14495
0
if (common->failed_match != NULL)
14496
0
  {
14497
0
  SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
14498
0
  set_jumps(common->failed_match, LABEL());
14499
0
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14500
0
  JUMPTO(SLJIT_JUMP, common->abort_label);
14501
0
  }
14502
14503
0
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14504
0
  JUMPHERE(end_anchor_failed);
14505
14506
0
if (mode != PCRE2_JIT_COMPLETE)
14507
0
  {
14508
0
  common->partialmatchlabel = LABEL();
14509
0
  set_jumps(common->partialmatch, common->partialmatchlabel);
14510
0
  return_with_partial_match(common, common->quit_label);
14511
0
  }
14512
14513
0
if (common->might_be_empty)
14514
0
  empty_match_backtrack_label = LABEL();
14515
0
compile_backtrackingpath(common, rootbacktrack.top);
14516
0
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14517
0
  {
14518
0
  sljit_free_compiler(compiler);
14519
0
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14520
0
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
14521
0
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14522
0
  return PCRE2_ERROR_NOMEMORY;
14523
0
  }
14524
14525
0
SLJIT_ASSERT(rootbacktrack.prev == NULL);
14526
0
reset_match_label = LABEL();
14527
14528
0
if (mode == PCRE2_JIT_PARTIAL_SOFT)
14529
0
  {
14530
  /* Update hit_start only in the first time. */
14531
0
  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
14532
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14533
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
14534
0
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
14535
0
  JUMPHERE(jump);
14536
0
  }
14537
14538
/* Check we have remaining characters. */
14539
0
if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
14540
0
  {
14541
0
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
14542
0
  }
14543
14544
0
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
14545
0
    (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
14546
14547
0
if ((re->overall_options & PCRE2_ANCHORED) == 0)
14548
0
  {
14549
0
  if (common->ff_newline_shortcut != NULL)
14550
0
    {
14551
    /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
14552
0
    if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
14553
0
      {
14554
0
      if (common->match_end_ptr != 0)
14555
0
        {
14556
0
        OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
14557
0
        OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
14558
0
        CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
14559
0
        OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
14560
0
        }
14561
0
      else
14562
0
        CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
14563
0
      }
14564
0
    }
14565
0
  else
14566
0
    CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
14567
0
  }
14568
14569
/* No more remaining characters. */
14570
0
if (reqcu_not_found != NULL)
14571
0
  set_jumps(reqcu_not_found, LABEL());
14572
14573
0
if (mode == PCRE2_JIT_PARTIAL_SOFT)
14574
0
  CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
14575
14576
0
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14577
0
JUMPTO(SLJIT_JUMP, common->quit_label);
14578
14579
0
flush_stubs(common);
14580
14581
0
if (common->might_be_empty)
14582
0
  {
14583
0
  JUMPHERE(empty_match);
14584
0
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
14585
0
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
14586
0
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
14587
0
  JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
14588
0
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
14589
0
  JUMPTO(SLJIT_ZERO, empty_match_found_label);
14590
0
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14591
0
  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
14592
0
  JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
14593
0
  }
14594
14595
0
common->fast_forward_bc_ptr = NULL;
14596
0
common->early_fail_start_ptr = 0;
14597
0
common->early_fail_end_ptr = 0;
14598
0
common->currententry = common->entries;
14599
0
common->local_quit_available = TRUE;
14600
0
quit_label = common->quit_label;
14601
0
if (common->currententry != NULL)
14602
0
  {
14603
  /* A free bit for each private data. */
14604
0
  common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
14605
0
  SLJIT_ASSERT(common->recurse_bitset_size > 0);
14606
0
  common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
14607
14608
0
  if (common->recurse_bitset != NULL)
14609
0
    {
14610
0
    do
14611
0
      {
14612
      /* Might add new entries. */
14613
0
      compile_recurse(common);
14614
0
      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14615
0
        break;
14616
0
      flush_stubs(common);
14617
0
      common->currententry = common->currententry->next;
14618
0
      }
14619
0
    while (common->currententry != NULL);
14620
14621
0
    SLJIT_FREE(common->recurse_bitset, allocator_data);
14622
0
    }
14623
14624
0
  if (common->currententry != NULL)
14625
0
    {
14626
    /* The common->recurse_bitset has been freed. */
14627
0
    SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14628
14629
0
    sljit_free_compiler(compiler);
14630
0
    SLJIT_FREE(common->optimized_cbracket, allocator_data);
14631
0
    SLJIT_FREE(common->private_data_ptrs, allocator_data);
14632
0
    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14633
0
    return PCRE2_ERROR_NOMEMORY;
14634
0
    }
14635
0
  }
14636
0
common->local_quit_available = FALSE;
14637
0
common->quit_label = quit_label;
14638
14639
/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14640
/* This is a (really) rare case. */
14641
0
set_jumps(common->stackalloc, LABEL());
14642
/* RETURN_ADDR is not a saved register. */
14643
0
sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14644
14645
0
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14646
14647
0
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14648
0
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14649
0
OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14650
0
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14651
0
OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14652
14653
0
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
14654
14655
0
jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14656
0
OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14657
0
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14658
0
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14659
0
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14660
0
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14661
14662
/* Allocation failed. */
14663
0
JUMPHERE(jump);
14664
/* We break the return address cache here, but this is a really rare case. */
14665
0
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14666
0
JUMPTO(SLJIT_JUMP, common->quit_label);
14667
14668
/* Call limit reached. */
14669
0
set_jumps(common->calllimit, LABEL());
14670
0
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14671
0
JUMPTO(SLJIT_JUMP, common->quit_label);
14672
14673
0
if (common->revertframes != NULL)
14674
0
  {
14675
0
  set_jumps(common->revertframes, LABEL());
14676
0
  do_revertframes(common);
14677
0
  }
14678
0
if (common->wordboundary != NULL)
14679
0
  {
14680
0
  set_jumps(common->wordboundary, LABEL());
14681
0
  check_wordboundary(common, FALSE);
14682
0
  }
14683
0
if (common->ucp_wordboundary != NULL)
14684
0
  {
14685
0
  set_jumps(common->ucp_wordboundary, LABEL());
14686
0
  check_wordboundary(common, TRUE);
14687
0
  }
14688
0
if (common->anynewline != NULL)
14689
0
  {
14690
0
  set_jumps(common->anynewline, LABEL());
14691
0
  check_anynewline(common);
14692
0
  }
14693
0
if (common->hspace != NULL)
14694
0
  {
14695
0
  set_jumps(common->hspace, LABEL());
14696
0
  check_hspace(common);
14697
0
  }
14698
0
if (common->vspace != NULL)
14699
0
  {
14700
0
  set_jumps(common->vspace, LABEL());
14701
0
  check_vspace(common);
14702
0
  }
14703
0
if (common->casefulcmp != NULL)
14704
0
  {
14705
0
  set_jumps(common->casefulcmp, LABEL());
14706
0
  do_casefulcmp(common);
14707
0
  }
14708
0
if (common->caselesscmp != NULL)
14709
0
  {
14710
0
  set_jumps(common->caselesscmp, LABEL());
14711
0
  do_caselesscmp(common);
14712
0
  }
14713
0
if (common->reset_match != NULL || common->restart_match != NULL)
14714
0
  {
14715
0
  if (common->restart_match != NULL)
14716
0
    {
14717
0
    set_jumps(common->restart_match, LABEL());
14718
0
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14719
0
    }
14720
14721
0
  set_jumps(common->reset_match, LABEL());
14722
0
  do_reset_match(common, (re->top_bracket + 1) * 2);
14723
  /* The value of restart_match is in TMP1. */
14724
0
  CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14725
0
  OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14726
0
  JUMPTO(SLJIT_JUMP, reset_match_label);
14727
0
  }
14728
0
#ifdef SUPPORT_UNICODE
14729
0
#if PCRE2_CODE_UNIT_WIDTH == 8
14730
0
if (common->utfreadchar != NULL)
14731
0
  {
14732
0
  set_jumps(common->utfreadchar, LABEL());
14733
0
  do_utfreadchar(common);
14734
0
  }
14735
0
if (common->utfreadtype8 != NULL)
14736
0
  {
14737
0
  set_jumps(common->utfreadtype8, LABEL());
14738
0
  do_utfreadtype8(common);
14739
0
  }
14740
0
if (common->utfpeakcharback != NULL)
14741
0
  {
14742
0
  set_jumps(common->utfpeakcharback, LABEL());
14743
0
  do_utfpeakcharback(common);
14744
0
  }
14745
0
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14746
0
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14747
0
if (common->utfreadchar_invalid != NULL)
14748
0
  {
14749
0
  set_jumps(common->utfreadchar_invalid, LABEL());
14750
0
  do_utfreadchar_invalid(common);
14751
0
  }
14752
0
if (common->utfreadnewline_invalid != NULL)
14753
0
  {
14754
0
  set_jumps(common->utfreadnewline_invalid, LABEL());
14755
0
  do_utfreadnewline_invalid(common);
14756
0
  }
14757
0
if (common->utfmoveback_invalid)
14758
0
  {
14759
0
  set_jumps(common->utfmoveback_invalid, LABEL());
14760
0
  do_utfmoveback_invalid(common);
14761
0
  }
14762
0
if (common->utfpeakcharback_invalid)
14763
0
  {
14764
0
  set_jumps(common->utfpeakcharback_invalid, LABEL());
14765
0
  do_utfpeakcharback_invalid(common);
14766
0
  }
14767
0
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14768
0
if (common->getucd != NULL)
14769
0
  {
14770
0
  set_jumps(common->getucd, LABEL());
14771
0
  do_getucd(common);
14772
0
  }
14773
0
if (common->getucdtype != NULL)
14774
0
  {
14775
0
  set_jumps(common->getucdtype, LABEL());
14776
0
  do_getucdtype(common);
14777
0
  }
14778
0
#endif /* SUPPORT_UNICODE */
14779
14780
0
SLJIT_FREE(common->optimized_cbracket, allocator_data);
14781
0
SLJIT_FREE(common->private_data_ptrs, allocator_data);
14782
14783
0
executable_func = sljit_generate_code(compiler, 0, NULL);
14784
0
executable_size = sljit_get_generated_code_size(compiler);
14785
0
sljit_free_compiler(compiler);
14786
14787
0
if (executable_func == NULL)
14788
0
  {
14789
0
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14790
0
  return PCRE2_ERROR_NOMEMORY;
14791
0
  }
14792
14793
/* Reuse the function descriptor if possible. */
14794
0
if (re->executable_jit != NULL)
14795
0
  functions = (executable_functions *)re->executable_jit;
14796
0
else
14797
0
  {
14798
0
  functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14799
0
  if (functions == NULL)
14800
0
    {
14801
    /* This case is highly unlikely since we just recently
14802
    freed a lot of memory. Not impossible though. */
14803
0
    sljit_free_code(executable_func, NULL);
14804
0
    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14805
0
    return PCRE2_ERROR_NOMEMORY;
14806
0
    }
14807
0
  memset(functions, 0, sizeof(executable_functions));
14808
0
  functions->top_bracket = re->top_bracket + 1;
14809
0
  functions->limit_match = re->limit_match;
14810
0
  re->executable_jit = functions;
14811
0
  }
14812
14813
/* Turn mode into an index. */
14814
0
if (mode == PCRE2_JIT_COMPLETE)
14815
0
  mode = 0;
14816
0
else
14817
0
  mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14818
14819
0
SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14820
0
functions->executable_funcs[mode] = executable_func;
14821
0
functions->read_only_data_heads[mode] = common->read_only_data_head;
14822
0
functions->executable_sizes[mode] = executable_size;
14823
0
return 0;
14824
0
}
14825
14826
#endif
14827
14828
/*************************************************
14829
*        JIT compile a Regular Expression        *
14830
*************************************************/
14831
14832
/* This function used JIT to convert a previously-compiled pattern into machine
14833
code.
14834
14835
Arguments:
14836
  code          a compiled pattern
14837
  options       JIT option bits
14838
14839
Returns:        0: success or (*NOJIT) was used
14840
               <0: an error code
14841
*/
14842
14843
#define PUBLIC_JIT_COMPILE_OPTIONS \
14844
0
  (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14845
14846
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
14847
pcre2_jit_compile(pcre2_code *code, uint32_t options)
14848
0
{
14849
0
pcre2_real_code *re = (pcre2_real_code *)code;
14850
0
#ifdef SUPPORT_JIT
14851
0
executable_functions *functions;
14852
0
static int executable_allocator_is_working = -1;
14853
0
#endif
14854
14855
0
if (code == NULL)
14856
0
  return PCRE2_ERROR_NULL;
14857
14858
0
if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14859
0
  return PCRE2_ERROR_JIT_BADOPTION;
14860
14861
/* Support for invalid UTF was first introduced in JIT, with the option
14862
PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14863
compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14864
preferred feature, with the earlier option deprecated. However, for backward
14865
compatibility, if the earlier option is set, it forces the new option so that
14866
if JIT matching falls back to the interpreter, there is still support for
14867
invalid UTF. However, if this function has already been successfully called
14868
without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14869
non-invalid-supporting JIT code was compiled), give an error.
14870
14871
If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14872
actions are needed:
14873
14874
  1. Remove the definition from pcre2.h.in and from the list in
14875
     PUBLIC_JIT_COMPILE_OPTIONS above.
14876
14877
  2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14878
14879
  3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14880
14881
  4. Delete the following short block of code. The setting of "re" and
14882
     "functions" can be moved into the JIT-only block below, but if that is
14883
     done, (void)re and (void)functions will be needed in the non-JIT case, to
14884
     avoid compiler warnings.
14885
*/
14886
14887
0
#ifdef SUPPORT_JIT
14888
0
functions = (executable_functions *)re->executable_jit;
14889
0
#endif
14890
14891
0
if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14892
0
  {
14893
0
  if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14894
0
    {
14895
0
#ifdef SUPPORT_JIT
14896
0
    if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14897
0
#endif
14898
0
    re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14899
0
    }
14900
0
  }
14901
14902
/* The above tests are run with and without JIT support. This means that
14903
PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14904
interpreter support) even in the absence of JIT. But now, if there is no JIT
14905
support, give an error return. */
14906
14907
#ifndef SUPPORT_JIT
14908
return PCRE2_ERROR_JIT_BADOPTION;
14909
#else  /* SUPPORT_JIT */
14910
14911
/* There is JIT support. Do the necessary. */
14912
14913
0
if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14914
14915
0
if (executable_allocator_is_working == -1)
14916
0
  {
14917
  /* Checks whether the executable allocator is working. This check
14918
     might run multiple times in multi-threaded environments, but the
14919
     result should not be affected by it. */
14920
0
  void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14921
0
  if (ptr != NULL)
14922
0
    {
14923
0
    SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14924
0
    executable_allocator_is_working = 1;
14925
0
    }
14926
0
  else executable_allocator_is_working = 0;
14927
0
  }
14928
14929
0
if (!executable_allocator_is_working)
14930
0
  return PCRE2_ERROR_NOMEMORY;
14931
14932
0
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14933
0
  options |= PCRE2_JIT_INVALID_UTF;
14934
14935
0
if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14936
0
    || functions->executable_funcs[0] == NULL)) {
14937
0
  uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14938
0
  int result = jit_compile(code, options & ~excluded_options);
14939
0
  if (result != 0)
14940
0
    return result;
14941
0
  }
14942
14943
0
if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14944
0
    || functions->executable_funcs[1] == NULL)) {
14945
0
  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14946
0
  int result = jit_compile(code, options & ~excluded_options);
14947
0
  if (result != 0)
14948
0
    return result;
14949
0
  }
14950
14951
0
if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14952
0
    || functions->executable_funcs[2] == NULL)) {
14953
0
  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14954
0
  int result = jit_compile(code, options & ~excluded_options);
14955
0
  if (result != 0)
14956
0
    return result;
14957
0
  }
14958
14959
0
return 0;
14960
14961
0
#endif  /* SUPPORT_JIT */
14962
0
}
14963
14964
/* JIT compiler uses an all-in-one approach. This improves security,
14965
   since the code generator functions are not exported. */
14966
14967
#define INCLUDED_FROM_PCRE2_JIT_COMPILE
14968
14969
#include "pcre2_jit_match.c"
14970
#include "pcre2_jit_misc.c"
14971
14972
/* End of pcre2_jit_compile.c */