Coverage Report

Created: 2025-07-23 08:13

/src/glib-2.80.0/subprojects/pcre2-10.42/src/pcre2_jit_compile.c
Line
Count
Source (jump to first uncovered line)
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
                    This module by Zoltan Herczeg
10
     Original API code Copyright (c) 1997-2012 University of Cambridge
11
          New API code Copyright (c) 2016-2021 University of Cambridge
12
13
-----------------------------------------------------------------------------
14
Redistribution and use in source and binary forms, with or without
15
modification, are permitted provided that the following conditions are met:
16
17
    * Redistributions of source code must retain the above copyright notice,
18
      this list of conditions and the following disclaimer.
19
20
    * Redistributions in binary form must reproduce the above copyright
21
      notice, this list of conditions and the following disclaimer in the
22
      documentation and/or other materials provided with the distribution.
23
24
    * Neither the name of the University of Cambridge nor the names of its
25
      contributors may be used to endorse or promote products derived from
26
      this software without specific prior written permission.
27
28
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
-----------------------------------------------------------------------------
40
*/
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#include "pcre2_internal.h"
47
48
#ifdef SUPPORT_JIT
49
50
/* All-in-one: Since we use the JIT compiler only from here,
51
we just include it. This way we don't need to touch the build
52
system files. */
53
54
#define SLJIT_CONFIG_AUTO 1
55
#define SLJIT_CONFIG_STATIC 1
56
#define SLJIT_VERBOSE 0
57
58
#ifdef PCRE2_DEBUG
59
#define SLJIT_DEBUG 1
60
#else
61
#define SLJIT_DEBUG 0
62
#endif
63
64
#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65
#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66
67
static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68
{
69
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70
return allocator->malloc(size, allocator->memory_data);
71
}
72
73
static void pcre2_jit_free(void *ptr, void *allocator_data)
74
{
75
pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76
allocator->free(ptr, allocator->memory_data);
77
}
78
79
#include "sljit/sljitLir.c"
80
81
#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82
#error Unsupported architecture
83
#endif
84
85
/* Defines for debugging purposes. */
86
87
/* 1 - Use unoptimized capturing brackets.
88
   2 - Enable capture_last_ptr (includes option 1). */
89
/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90
91
/* 1 - Always have a control head. */
92
/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93
94
/* Allocate memory for the regex stack on the real machine stack.
95
Fast, but limited size. */
96
#define MACHINE_STACK_SIZE 32768
97
98
/* Growth rate for stack allocated by the OS. Should be the multiply
99
of page size. */
100
#define STACK_GROWTH_RATE 8192
101
102
/* Enable to check that the allocation could destroy temporaries. */
103
#if defined SLJIT_DEBUG && SLJIT_DEBUG
104
#define DESTROY_REGISTERS 1
105
#endif
106
107
/*
108
Short summary about the backtracking mechanism empolyed by the jit code generator:
109
110
The code generator follows the recursive nature of the PERL compatible regular
111
expressions. The basic blocks of regular expressions are condition checkers
112
whose execute different commands depending on the result of the condition check.
113
The relationship between the operators can be horizontal (concatenation) and
114
vertical (sub-expression) (See struct backtrack_common for more details).
115
116
  'ab' - 'a' and 'b' regexps are concatenated
117
  'a+' - 'a' is the sub-expression of the '+' operator
118
119
The condition checkers are boolean (true/false) checkers. Machine code is generated
120
for the checker itself and for the actions depending on the result of the checker.
121
The 'true' case is called as the matching path (expected path), and the other is called as
122
the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123
branches on the matching path.
124
125
 Greedy star operator (*) :
126
   Matching path: match happens.
127
   Backtrack path: match failed.
128
 Non-greedy star operator (*?) :
129
   Matching path: no need to perform a match.
130
   Backtrack path: match is required.
131
132
The following example shows how the code generated for a capturing bracket
133
with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134
we have the following regular expression:
135
136
   A(B|C)D
137
138
The generated code will be the following:
139
140
 A matching path
141
 '(' matching path (pushing arguments to the stack)
142
 B matching path
143
 ')' matching path (pushing arguments to the stack)
144
 D matching path
145
 return with successful match
146
147
 D backtrack path
148
 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149
 B backtrack path
150
 C expected path
151
 jump to D matching path
152
 C backtrack path
153
 A backtrack path
154
155
 Notice, that the order of backtrack code paths are the opposite of the fast
156
 code paths. In this way the topmost value on the stack is always belong
157
 to the current backtrack code path. The backtrack path must check
158
 whether there is a next alternative. If so, it needs to jump back to
159
 the matching path eventually. Otherwise it needs to clear out its own stack
160
 frame and continue the execution on the backtrack code paths.
161
*/
162
163
/*
164
Saved stack frames:
165
166
Atomic blocks and asserts require reloading the values of private data
167
when the backtrack mechanism performed. Because of OP_RECURSE, the data
168
are not necessarly known in compile time, thus we need a dynamic restore
169
mechanism.
170
171
The stack frames are stored in a chain list, and have the following format:
172
([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173
174
Thus we can restore the private data to a particular point in the stack.
175
*/
176
177
typedef struct jit_arguments {
178
  /* Pointers first. */
179
  struct sljit_stack *stack;
180
  PCRE2_SPTR str;
181
  PCRE2_SPTR begin;
182
  PCRE2_SPTR end;
183
  pcre2_match_data *match_data;
184
  PCRE2_SPTR startchar_ptr;
185
  PCRE2_UCHAR *mark_ptr;
186
  int (*callout)(pcre2_callout_block *, void *);
187
  void *callout_data;
188
  /* Everything else after. */
189
  sljit_uw offset_limit;
190
  sljit_u32 limit_match;
191
  sljit_u32 oveccount;
192
  sljit_u32 options;
193
} jit_arguments;
194
195
#define JIT_NUMBER_OF_COMPILE_MODES 3
196
197
typedef struct executable_functions {
198
  void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199
  void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200
  sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201
  sljit_u32 top_bracket;
202
  sljit_u32 limit_match;
203
} executable_functions;
204
205
typedef struct jump_list {
206
  struct sljit_jump *jump;
207
  struct jump_list *next;
208
} jump_list;
209
210
typedef struct stub_list {
211
  struct sljit_jump *start;
212
  struct sljit_label *quit;
213
  struct stub_list *next;
214
} stub_list;
215
216
enum frame_types {
217
  no_frame = -1,
218
  no_stack = -2
219
};
220
221
enum control_types {
222
  type_mark = 0,
223
  type_then_trap = 1
224
};
225
226
enum  early_fail_types {
227
  type_skip = 0,
228
  type_fail = 1,
229
  type_fail_range = 2
230
};
231
232
typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233
234
/* The following structure is the key data type for the recursive
235
code generator. It is allocated by compile_matchingpath, and contains
236
the arguments for compile_backtrackingpath. Must be the first member
237
of its descendants. */
238
typedef struct backtrack_common {
239
  /* Concatenation stack. */
240
  struct backtrack_common *prev;
241
  jump_list *nextbacktracks;
242
  /* Internal stack (for component operators). */
243
  struct backtrack_common *top;
244
  jump_list *topbacktracks;
245
  /* Opcode pointer. */
246
  PCRE2_SPTR cc;
247
} backtrack_common;
248
249
typedef struct assert_backtrack {
250
  backtrack_common common;
251
  jump_list *condfailed;
252
  /* Less than 0 if a frame is not needed. */
253
  int framesize;
254
  /* Points to our private memory word on the stack. */
255
  int private_data_ptr;
256
  /* For iterators. */
257
  struct sljit_label *matchingpath;
258
} assert_backtrack;
259
260
typedef struct bracket_backtrack {
261
  backtrack_common common;
262
  /* Where to coninue if an alternative is successfully matched. */
263
  struct sljit_label *alternative_matchingpath;
264
  /* For rmin and rmax iterators. */
265
  struct sljit_label *recursive_matchingpath;
266
  /* For greedy ? operator. */
267
  struct sljit_label *zero_matchingpath;
268
  /* Contains the branches of a failed condition. */
269
  union {
270
    /* Both for OP_COND, OP_SCOND. */
271
    jump_list *condfailed;
272
    assert_backtrack *assert;
273
    /* For OP_ONCE. Less than 0 if not needed. */
274
    int framesize;
275
    /* For brackets with >3 alternatives. */
276
    struct sljit_put_label *matching_put_label;
277
  } u;
278
  /* Points to our private memory word on the stack. */
279
  int private_data_ptr;
280
} bracket_backtrack;
281
282
typedef struct bracketpos_backtrack {
283
  backtrack_common common;
284
  /* Points to our private memory word on the stack. */
285
  int private_data_ptr;
286
  /* Reverting stack is needed. */
287
  int framesize;
288
  /* Allocated stack size. */
289
  int stacksize;
290
} bracketpos_backtrack;
291
292
typedef struct braminzero_backtrack {
293
  backtrack_common common;
294
  struct sljit_label *matchingpath;
295
} braminzero_backtrack;
296
297
typedef struct char_iterator_backtrack {
298
  backtrack_common common;
299
  /* Next iteration. */
300
  struct sljit_label *matchingpath;
301
  union {
302
    jump_list *backtracks;
303
    struct {
304
      unsigned int othercasebit;
305
      PCRE2_UCHAR chr;
306
      BOOL enabled;
307
    } charpos;
308
  } u;
309
} char_iterator_backtrack;
310
311
typedef struct ref_iterator_backtrack {
312
  backtrack_common common;
313
  /* Next iteration. */
314
  struct sljit_label *matchingpath;
315
} ref_iterator_backtrack;
316
317
typedef struct recurse_entry {
318
  struct recurse_entry *next;
319
  /* Contains the function entry label. */
320
  struct sljit_label *entry_label;
321
  /* Contains the function entry label. */
322
  struct sljit_label *backtrack_label;
323
  /* Collects the entry calls until the function is not created. */
324
  jump_list *entry_calls;
325
  /* Collects the backtrack calls until the function is not created. */
326
  jump_list *backtrack_calls;
327
  /* Points to the starting opcode. */
328
  sljit_sw start;
329
} recurse_entry;
330
331
typedef struct recurse_backtrack {
332
  backtrack_common common;
333
  /* Return to the matching path. */
334
  struct sljit_label *matchingpath;
335
  /* Recursive pattern. */
336
  recurse_entry *entry;
337
  /* Pattern is inlined. */
338
  BOOL inlined_pattern;
339
} recurse_backtrack;
340
341
#define OP_THEN_TRAP OP_TABLE_LENGTH
342
343
typedef struct then_trap_backtrack {
344
  backtrack_common common;
345
  /* If then_trap is not NULL, this structure contains the real
346
  then_trap for the backtracking path. */
347
  struct then_trap_backtrack *then_trap;
348
  /* Points to the starting opcode. */
349
  sljit_sw start;
350
  /* Exit point for the then opcodes of this alternative. */
351
  jump_list *quit;
352
  /* Frame size of the current alternative. */
353
  int framesize;
354
} then_trap_backtrack;
355
356
#define MAX_N_CHARS 12
357
#define MAX_DIFF_CHARS 5
358
359
typedef struct fast_forward_char_data {
360
  /* Number of characters in the chars array, 255 for any character. */
361
  sljit_u8 count;
362
  /* Number of last UTF-8 characters in the chars array. */
363
  sljit_u8 last_count;
364
  /* Available characters in the current position. */
365
  PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366
} fast_forward_char_data;
367
368
#define MAX_CLASS_RANGE_SIZE 4
369
#define MAX_CLASS_CHARS_SIZE 3
370
371
typedef struct compiler_common {
372
  /* The sljit ceneric compiler. */
373
  struct sljit_compiler *compiler;
374
  /* Compiled regular expression. */
375
  pcre2_real_code *re;
376
  /* First byte code. */
377
  PCRE2_SPTR start;
378
  /* Maps private data offset to each opcode. */
379
  sljit_s32 *private_data_ptrs;
380
  /* Chain list of read-only data ptrs. */
381
  void *read_only_data_head;
382
  /* Tells whether the capturing bracket is optimized. */
383
  sljit_u8 *optimized_cbracket;
384
  /* Tells whether the starting offset is a target of then. */
385
  sljit_u8 *then_offsets;
386
  /* Current position where a THEN must jump. */
387
  then_trap_backtrack *then_trap;
388
  /* Starting offset of private data for capturing brackets. */
389
  sljit_s32 cbra_ptr;
390
  /* Output vector starting point. Must be divisible by 2. */
391
  sljit_s32 ovector_start;
392
  /* Points to the starting character of the current match. */
393
  sljit_s32 start_ptr;
394
  /* Last known position of the requested byte. */
395
  sljit_s32 req_char_ptr;
396
  /* Head of the last recursion. */
397
  sljit_s32 recursive_head_ptr;
398
  /* First inspected character for partial matching.
399
     (Needed for avoiding zero length partial matches.) */
400
  sljit_s32 start_used_ptr;
401
  /* Starting pointer for partial soft matches. */
402
  sljit_s32 hit_start;
403
  /* Pointer of the match end position. */
404
  sljit_s32 match_end_ptr;
405
  /* Points to the marked string. */
406
  sljit_s32 mark_ptr;
407
  /* Recursive control verb management chain. */
408
  sljit_s32 control_head_ptr;
409
  /* Points to the last matched capture block index. */
410
  sljit_s32 capture_last_ptr;
411
  /* Fast forward skipping byte code pointer. */
412
  PCRE2_SPTR fast_forward_bc_ptr;
413
  /* Locals used by fast fail optimization. */
414
  sljit_s32 early_fail_start_ptr;
415
  sljit_s32 early_fail_end_ptr;
416
  /* Variables used by recursive call generator. */
417
  sljit_s32 recurse_bitset_size;
418
  uint8_t *recurse_bitset;
419
420
  /* Flipped and lower case tables. */
421
  const sljit_u8 *fcc;
422
  sljit_sw lcc;
423
  /* Mode can be PCRE2_JIT_COMPLETE and others. */
424
  int mode;
425
  /* TRUE, when empty match is accepted for partial matching. */
426
  BOOL allow_empty_partial;
427
  /* TRUE, when minlength is greater than 0. */
428
  BOOL might_be_empty;
429
  /* \K is found in the pattern. */
430
  BOOL has_set_som;
431
  /* (*SKIP:arg) is found in the pattern. */
432
  BOOL has_skip_arg;
433
  /* (*THEN) is found in the pattern. */
434
  BOOL has_then;
435
  /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
436
  BOOL has_skip_in_assert_back;
437
  /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
438
  BOOL local_quit_available;
439
  /* Currently in a positive assertion. */
440
  BOOL in_positive_assertion;
441
  /* Newline control. */
442
  int nltype;
443
  sljit_u32 nlmax;
444
  sljit_u32 nlmin;
445
  int newline;
446
  int bsr_nltype;
447
  sljit_u32 bsr_nlmax;
448
  sljit_u32 bsr_nlmin;
449
  /* Dollar endonly. */
450
  int endonly;
451
  /* Tables. */
452
  sljit_sw ctypes;
453
  /* Named capturing brackets. */
454
  PCRE2_SPTR name_table;
455
  sljit_sw name_count;
456
  sljit_sw name_entry_size;
457
458
  /* Labels and jump lists. */
459
  struct sljit_label *partialmatchlabel;
460
  struct sljit_label *quit_label;
461
  struct sljit_label *abort_label;
462
  struct sljit_label *accept_label;
463
  struct sljit_label *ff_newline_shortcut;
464
  stub_list *stubs;
465
  recurse_entry *entries;
466
  recurse_entry *currententry;
467
  jump_list *partialmatch;
468
  jump_list *quit;
469
  jump_list *positive_assertion_quit;
470
  jump_list *abort;
471
  jump_list *failed_match;
472
  jump_list *accept;
473
  jump_list *calllimit;
474
  jump_list *stackalloc;
475
  jump_list *revertframes;
476
  jump_list *wordboundary;
477
  jump_list *anynewline;
478
  jump_list *hspace;
479
  jump_list *vspace;
480
  jump_list *casefulcmp;
481
  jump_list *caselesscmp;
482
  jump_list *reset_match;
483
  BOOL unset_backref;
484
  BOOL alt_circumflex;
485
#ifdef SUPPORT_UNICODE
486
  BOOL utf;
487
  BOOL invalid_utf;
488
  BOOL ucp;
489
  /* Points to saving area for iref. */
490
  sljit_s32 iref_ptr;
491
  jump_list *getucd;
492
  jump_list *getucdtype;
493
#if PCRE2_CODE_UNIT_WIDTH == 8
494
  jump_list *utfreadchar;
495
  jump_list *utfreadtype8;
496
  jump_list *utfpeakcharback;
497
#endif
498
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
499
  jump_list *utfreadchar_invalid;
500
  jump_list *utfreadnewline_invalid;
501
  jump_list *utfmoveback_invalid;
502
  jump_list *utfpeakcharback_invalid;
503
#endif
504
#endif /* SUPPORT_UNICODE */
505
} compiler_common;
506
507
/* For byte_sequence_compare. */
508
509
typedef struct compare_context {
510
  int length;
511
  int sourcereg;
512
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
513
  int ucharptr;
514
  union {
515
    sljit_s32 asint;
516
    sljit_u16 asushort;
517
#if PCRE2_CODE_UNIT_WIDTH == 8
518
    sljit_u8 asbyte;
519
    sljit_u8 asuchars[4];
520
#elif PCRE2_CODE_UNIT_WIDTH == 16
521
    sljit_u16 asuchars[2];
522
#elif PCRE2_CODE_UNIT_WIDTH == 32
523
    sljit_u32 asuchars[1];
524
#endif
525
  } c;
526
  union {
527
    sljit_s32 asint;
528
    sljit_u16 asushort;
529
#if PCRE2_CODE_UNIT_WIDTH == 8
530
    sljit_u8 asbyte;
531
    sljit_u8 asuchars[4];
532
#elif PCRE2_CODE_UNIT_WIDTH == 16
533
    sljit_u16 asuchars[2];
534
#elif PCRE2_CODE_UNIT_WIDTH == 32
535
    sljit_u32 asuchars[1];
536
#endif
537
  } oc;
538
#endif
539
} compare_context;
540
541
/* Undefine sljit macros. */
542
#undef CMP
543
544
/* Used for accessing the elements of the stack. */
545
#define STACK(i)      ((i) * SSIZE_OF(sw))
546
547
#ifdef SLJIT_PREF_SHIFT_REG
548
#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
549
/* Nothing. */
550
#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
551
#define SHIFT_REG_IS_R3
552
#else
553
#error "Unsupported shift register"
554
#endif
555
#endif
556
557
#define TMP1          SLJIT_R0
558
#ifdef SHIFT_REG_IS_R3
559
#define TMP2          SLJIT_R3
560
#define TMP3          SLJIT_R2
561
#else
562
#define TMP2          SLJIT_R2
563
#define TMP3          SLJIT_R3
564
#endif
565
#define STR_PTR       SLJIT_R1
566
#define STR_END       SLJIT_S0
567
#define STACK_TOP     SLJIT_S1
568
#define STACK_LIMIT   SLJIT_S2
569
#define COUNT_MATCH   SLJIT_S3
570
#define ARGUMENTS     SLJIT_S4
571
#define RETURN_ADDR   SLJIT_R4
572
573
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
574
#define HAS_VIRTUAL_REGISTERS 1
575
#else
576
#define HAS_VIRTUAL_REGISTERS 0
577
#endif
578
579
/* Local space layout. */
580
/* These two locals can be used by the current opcode. */
581
#define LOCALS0          (0 * sizeof(sljit_sw))
582
#define LOCALS1          (1 * sizeof(sljit_sw))
583
/* Two local variables for possessive quantifiers (char1 cannot use them). */
584
#define POSSESSIVE0      (2 * sizeof(sljit_sw))
585
#define POSSESSIVE1      (3 * sizeof(sljit_sw))
586
/* Max limit of recursions. */
587
#define LIMIT_MATCH      (4 * sizeof(sljit_sw))
588
/* The output vector is stored on the stack, and contains pointers
589
to characters. The vector data is divided into two groups: the first
590
group contains the start / end character pointers, and the second is
591
the start pointers when the end of the capturing group has not yet reached. */
592
#define OVECTOR_START    (common->ovector_start)
593
#define OVECTOR(i)       (OVECTOR_START + (i) * SSIZE_OF(sw))
594
#define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * SSIZE_OF(sw))
595
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
596
597
#if PCRE2_CODE_UNIT_WIDTH == 8
598
#define MOV_UCHAR  SLJIT_MOV_U8
599
#define IN_UCHARS(x) (x)
600
#elif PCRE2_CODE_UNIT_WIDTH == 16
601
#define MOV_UCHAR  SLJIT_MOV_U16
602
#define UCHAR_SHIFT (1)
603
#define IN_UCHARS(x) ((x) * 2)
604
#elif PCRE2_CODE_UNIT_WIDTH == 32
605
#define MOV_UCHAR  SLJIT_MOV_U32
606
#define UCHAR_SHIFT (2)
607
#define IN_UCHARS(x) ((x) * 4)
608
#else
609
#error Unsupported compiling mode
610
#endif
611
612
/* Shortcuts. */
613
#define DEFINE_COMPILER \
614
  struct sljit_compiler *compiler = common->compiler
615
#define OP1(op, dst, dstw, src, srcw) \
616
  sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
617
#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
618
  sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
619
#define OP2U(op, src1, src1w, src2, src2w) \
620
  sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
621
#define OP_SRC(op, src, srcw) \
622
  sljit_emit_op_src(compiler, (op), (src), (srcw))
623
#define LABEL() \
624
  sljit_emit_label(compiler)
625
#define JUMP(type) \
626
  sljit_emit_jump(compiler, (type))
627
#define JUMPTO(type, label) \
628
  sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
629
#define JUMPHERE(jump) \
630
  sljit_set_label((jump), sljit_emit_label(compiler))
631
#define SET_LABEL(jump, label) \
632
  sljit_set_label((jump), (label))
633
#define CMP(type, src1, src1w, src2, src2w) \
634
  sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
635
#define CMPTO(type, src1, src1w, src2, src2w, label) \
636
  sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
637
#define OP_FLAGS(op, dst, dstw, type) \
638
  sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
639
#define CMOV(type, dst_reg, src, srcw) \
640
  sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
641
#define GET_LOCAL_BASE(dst, dstw, offset) \
642
  sljit_get_local_base(compiler, (dst), (dstw), (offset))
643
644
#define READ_CHAR_MAX 0x7fffffff
645
646
#define INVALID_UTF_CHAR -1
647
#define UNASSIGNED_UTF_CHAR 888
648
649
#if defined SUPPORT_UNICODE
650
#if PCRE2_CODE_UNIT_WIDTH == 8
651
652
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
653
  { \
654
  if (ptr[0] <= 0x7f) \
655
    c = *ptr++; \
656
  else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
657
    { \
658
    c = ptr[1] - 0x80; \
659
    \
660
    if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
661
      { \
662
      c |= (ptr[0] - 0xc0) << 6; \
663
      ptr += 2; \
664
      } \
665
    else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
666
      { \
667
      c = c << 6 | (ptr[2] - 0x80); \
668
      \
669
      if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
670
        { \
671
        c |= (ptr[0] - 0xe0) << 12; \
672
        ptr += 3; \
673
        \
674
        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
675
          { \
676
          invalid_action; \
677
          } \
678
        } \
679
      else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
680
        { \
681
        c = c << 6 | (ptr[3] - 0x80); \
682
        \
683
        if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
684
          { \
685
          c |= (ptr[0] - 0xf0) << 18; \
686
          ptr += 4; \
687
          \
688
          if (c >= 0x110000 || c < 0x10000) \
689
            { \
690
            invalid_action; \
691
            } \
692
          } \
693
        else \
694
          { \
695
          invalid_action; \
696
          } \
697
        } \
698
      else \
699
        { \
700
        invalid_action; \
701
        } \
702
      } \
703
    else \
704
      { \
705
      invalid_action; \
706
      } \
707
    } \
708
  else \
709
    { \
710
    invalid_action; \
711
    } \
712
  }
713
714
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
715
  { \
716
  c = ptr[-1]; \
717
  if (c <= 0x7f) \
718
    ptr--; \
719
  else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
720
    { \
721
    c -= 0x80; \
722
    \
723
    if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
724
      { \
725
      c |= (ptr[-2] - 0xc0) << 6; \
726
      ptr -= 2; \
727
      } \
728
    else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
729
      { \
730
      c = c << 6 | (ptr[-2] - 0x80); \
731
      \
732
      if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
733
        { \
734
        c |= (ptr[-3] - 0xe0) << 12; \
735
        ptr -= 3; \
736
        \
737
        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
738
          { \
739
          invalid_action; \
740
          } \
741
        } \
742
      else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
743
        { \
744
        c = c << 6 | (ptr[-3] - 0x80); \
745
        \
746
        if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
747
          { \
748
          c |= (ptr[-4] - 0xf0) << 18; \
749
          ptr -= 4; \
750
          \
751
          if (c >= 0x110000 || c < 0x10000) \
752
            { \
753
            invalid_action; \
754
            } \
755
          } \
756
        else \
757
          { \
758
          invalid_action; \
759
          } \
760
        } \
761
      else \
762
        { \
763
        invalid_action; \
764
        } \
765
      } \
766
    else \
767
      { \
768
      invalid_action; \
769
      } \
770
    } \
771
  else \
772
    { \
773
    invalid_action; \
774
    } \
775
  }
776
777
#elif PCRE2_CODE_UNIT_WIDTH == 16
778
779
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
780
  { \
781
  if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
782
    c = *ptr++; \
783
  else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
784
    { \
785
    c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
786
    ptr += 2; \
787
    } \
788
  else \
789
    { \
790
    invalid_action; \
791
    } \
792
  }
793
794
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
795
  { \
796
  c = ptr[-1]; \
797
  if (c < 0xd800 || c >= 0xe000) \
798
    ptr--; \
799
  else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
800
    { \
801
    c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
802
    ptr -= 2; \
803
    } \
804
  else \
805
    { \
806
    invalid_action; \
807
    } \
808
  }
809
810
811
#elif PCRE2_CODE_UNIT_WIDTH == 32
812
813
#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
814
  { \
815
  if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
816
    c = *ptr++; \
817
  else \
818
    { \
819
    invalid_action; \
820
    } \
821
  }
822
823
#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
824
  { \
825
  c = ptr[-1]; \
826
  if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
827
    ptr--; \
828
  else \
829
    { \
830
    invalid_action; \
831
    } \
832
  }
833
834
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
835
#endif /* SUPPORT_UNICODE */
836
837
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
838
{
839
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
840
do cc += GET(cc, 1); while (*cc == OP_ALT);
841
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
842
cc += 1 + LINK_SIZE;
843
return cc;
844
}
845
846
static int no_alternatives(PCRE2_SPTR cc)
847
{
848
int count = 0;
849
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
850
do
851
  {
852
  cc += GET(cc, 1);
853
  count++;
854
  }
855
while (*cc == OP_ALT);
856
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
857
return count;
858
}
859
860
/* Functions whose might need modification for all new supported opcodes:
861
 next_opcode
862
 check_opcode_types
863
 set_private_data_ptrs
864
 get_framesize
865
 init_frame
866
 get_recurse_data_length
867
 copy_recurse_data
868
 compile_matchingpath
869
 compile_backtrackingpath
870
*/
871
872
static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
873
{
874
SLJIT_UNUSED_ARG(common);
875
switch(*cc)
876
  {
877
  case OP_SOD:
878
  case OP_SOM:
879
  case OP_SET_SOM:
880
  case OP_NOT_WORD_BOUNDARY:
881
  case OP_WORD_BOUNDARY:
882
  case OP_NOT_DIGIT:
883
  case OP_DIGIT:
884
  case OP_NOT_WHITESPACE:
885
  case OP_WHITESPACE:
886
  case OP_NOT_WORDCHAR:
887
  case OP_WORDCHAR:
888
  case OP_ANY:
889
  case OP_ALLANY:
890
  case OP_NOTPROP:
891
  case OP_PROP:
892
  case OP_ANYNL:
893
  case OP_NOT_HSPACE:
894
  case OP_HSPACE:
895
  case OP_NOT_VSPACE:
896
  case OP_VSPACE:
897
  case OP_EXTUNI:
898
  case OP_EODN:
899
  case OP_EOD:
900
  case OP_CIRC:
901
  case OP_CIRCM:
902
  case OP_DOLL:
903
  case OP_DOLLM:
904
  case OP_CRSTAR:
905
  case OP_CRMINSTAR:
906
  case OP_CRPLUS:
907
  case OP_CRMINPLUS:
908
  case OP_CRQUERY:
909
  case OP_CRMINQUERY:
910
  case OP_CRRANGE:
911
  case OP_CRMINRANGE:
912
  case OP_CRPOSSTAR:
913
  case OP_CRPOSPLUS:
914
  case OP_CRPOSQUERY:
915
  case OP_CRPOSRANGE:
916
  case OP_CLASS:
917
  case OP_NCLASS:
918
  case OP_REF:
919
  case OP_REFI:
920
  case OP_DNREF:
921
  case OP_DNREFI:
922
  case OP_RECURSE:
923
  case OP_CALLOUT:
924
  case OP_ALT:
925
  case OP_KET:
926
  case OP_KETRMAX:
927
  case OP_KETRMIN:
928
  case OP_KETRPOS:
929
  case OP_REVERSE:
930
  case OP_ASSERT:
931
  case OP_ASSERT_NOT:
932
  case OP_ASSERTBACK:
933
  case OP_ASSERTBACK_NOT:
934
  case OP_ASSERT_NA:
935
  case OP_ASSERTBACK_NA:
936
  case OP_ONCE:
937
  case OP_SCRIPT_RUN:
938
  case OP_BRA:
939
  case OP_BRAPOS:
940
  case OP_CBRA:
941
  case OP_CBRAPOS:
942
  case OP_COND:
943
  case OP_SBRA:
944
  case OP_SBRAPOS:
945
  case OP_SCBRA:
946
  case OP_SCBRAPOS:
947
  case OP_SCOND:
948
  case OP_CREF:
949
  case OP_DNCREF:
950
  case OP_RREF:
951
  case OP_DNRREF:
952
  case OP_FALSE:
953
  case OP_TRUE:
954
  case OP_BRAZERO:
955
  case OP_BRAMINZERO:
956
  case OP_BRAPOSZERO:
957
  case OP_PRUNE:
958
  case OP_SKIP:
959
  case OP_THEN:
960
  case OP_COMMIT:
961
  case OP_FAIL:
962
  case OP_ACCEPT:
963
  case OP_ASSERT_ACCEPT:
964
  case OP_CLOSE:
965
  case OP_SKIPZERO:
966
  return cc + PRIV(OP_lengths)[*cc];
967
968
  case OP_CHAR:
969
  case OP_CHARI:
970
  case OP_NOT:
971
  case OP_NOTI:
972
  case OP_STAR:
973
  case OP_MINSTAR:
974
  case OP_PLUS:
975
  case OP_MINPLUS:
976
  case OP_QUERY:
977
  case OP_MINQUERY:
978
  case OP_UPTO:
979
  case OP_MINUPTO:
980
  case OP_EXACT:
981
  case OP_POSSTAR:
982
  case OP_POSPLUS:
983
  case OP_POSQUERY:
984
  case OP_POSUPTO:
985
  case OP_STARI:
986
  case OP_MINSTARI:
987
  case OP_PLUSI:
988
  case OP_MINPLUSI:
989
  case OP_QUERYI:
990
  case OP_MINQUERYI:
991
  case OP_UPTOI:
992
  case OP_MINUPTOI:
993
  case OP_EXACTI:
994
  case OP_POSSTARI:
995
  case OP_POSPLUSI:
996
  case OP_POSQUERYI:
997
  case OP_POSUPTOI:
998
  case OP_NOTSTAR:
999
  case OP_NOTMINSTAR:
1000
  case OP_NOTPLUS:
1001
  case OP_NOTMINPLUS:
1002
  case OP_NOTQUERY:
1003
  case OP_NOTMINQUERY:
1004
  case OP_NOTUPTO:
1005
  case OP_NOTMINUPTO:
1006
  case OP_NOTEXACT:
1007
  case OP_NOTPOSSTAR:
1008
  case OP_NOTPOSPLUS:
1009
  case OP_NOTPOSQUERY:
1010
  case OP_NOTPOSUPTO:
1011
  case OP_NOTSTARI:
1012
  case OP_NOTMINSTARI:
1013
  case OP_NOTPLUSI:
1014
  case OP_NOTMINPLUSI:
1015
  case OP_NOTQUERYI:
1016
  case OP_NOTMINQUERYI:
1017
  case OP_NOTUPTOI:
1018
  case OP_NOTMINUPTOI:
1019
  case OP_NOTEXACTI:
1020
  case OP_NOTPOSSTARI:
1021
  case OP_NOTPOSPLUSI:
1022
  case OP_NOTPOSQUERYI:
1023
  case OP_NOTPOSUPTOI:
1024
  cc += PRIV(OP_lengths)[*cc];
1025
#ifdef SUPPORT_UNICODE
1026
  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1027
#endif
1028
  return cc;
1029
1030
  /* Special cases. */
1031
  case OP_TYPESTAR:
1032
  case OP_TYPEMINSTAR:
1033
  case OP_TYPEPLUS:
1034
  case OP_TYPEMINPLUS:
1035
  case OP_TYPEQUERY:
1036
  case OP_TYPEMINQUERY:
1037
  case OP_TYPEUPTO:
1038
  case OP_TYPEMINUPTO:
1039
  case OP_TYPEEXACT:
1040
  case OP_TYPEPOSSTAR:
1041
  case OP_TYPEPOSPLUS:
1042
  case OP_TYPEPOSQUERY:
1043
  case OP_TYPEPOSUPTO:
1044
  return cc + PRIV(OP_lengths)[*cc] - 1;
1045
1046
  case OP_ANYBYTE:
1047
#ifdef SUPPORT_UNICODE
1048
  if (common->utf) return NULL;
1049
#endif
1050
  return cc + 1;
1051
1052
  case OP_CALLOUT_STR:
1053
  return cc + GET(cc, 1 + 2*LINK_SIZE);
1054
1055
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1056
  case OP_XCLASS:
1057
  return cc + GET(cc, 1);
1058
#endif
1059
1060
  case OP_MARK:
1061
  case OP_COMMIT_ARG:
1062
  case OP_PRUNE_ARG:
1063
  case OP_SKIP_ARG:
1064
  case OP_THEN_ARG:
1065
  return cc + 1 + 2 + cc[1];
1066
1067
  default:
1068
  SLJIT_UNREACHABLE();
1069
  return NULL;
1070
  }
1071
}
1072
1073
static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1074
{
1075
int count;
1076
PCRE2_SPTR slot;
1077
PCRE2_SPTR assert_back_end = cc - 1;
1078
PCRE2_SPTR assert_na_end = cc - 1;
1079
1080
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1081
while (cc < ccend)
1082
  {
1083
  switch(*cc)
1084
    {
1085
    case OP_SET_SOM:
1086
    common->has_set_som = TRUE;
1087
    common->might_be_empty = TRUE;
1088
    cc += 1;
1089
    break;
1090
1091
    case OP_REFI:
1092
#ifdef SUPPORT_UNICODE
1093
    if (common->iref_ptr == 0)
1094
      {
1095
      common->iref_ptr = common->ovector_start;
1096
      common->ovector_start += 3 * sizeof(sljit_sw);
1097
      }
1098
#endif /* SUPPORT_UNICODE */
1099
    /* Fall through. */
1100
    case OP_REF:
1101
    common->optimized_cbracket[GET2(cc, 1)] = 0;
1102
    cc += 1 + IMM2_SIZE;
1103
    break;
1104
1105
    case OP_ASSERT_NA:
1106
    case OP_ASSERTBACK_NA:
1107
    slot = bracketend(cc);
1108
    if (slot > assert_na_end)
1109
      assert_na_end = slot;
1110
    cc += 1 + LINK_SIZE;
1111
    break;
1112
1113
    case OP_CBRAPOS:
1114
    case OP_SCBRAPOS:
1115
    common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1116
    cc += 1 + LINK_SIZE + IMM2_SIZE;
1117
    break;
1118
1119
    case OP_COND:
1120
    case OP_SCOND:
1121
    /* Only AUTO_CALLOUT can insert this opcode. We do
1122
       not intend to support this case. */
1123
    if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1124
      return FALSE;
1125
    cc += 1 + LINK_SIZE;
1126
    break;
1127
1128
    case OP_CREF:
1129
    common->optimized_cbracket[GET2(cc, 1)] = 0;
1130
    cc += 1 + IMM2_SIZE;
1131
    break;
1132
1133
    case OP_DNREF:
1134
    case OP_DNREFI:
1135
    case OP_DNCREF:
1136
    count = GET2(cc, 1 + IMM2_SIZE);
1137
    slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1138
    while (count-- > 0)
1139
      {
1140
      common->optimized_cbracket[GET2(slot, 0)] = 0;
1141
      slot += common->name_entry_size;
1142
      }
1143
    cc += 1 + 2 * IMM2_SIZE;
1144
    break;
1145
1146
    case OP_RECURSE:
1147
    /* Set its value only once. */
1148
    if (common->recursive_head_ptr == 0)
1149
      {
1150
      common->recursive_head_ptr = common->ovector_start;
1151
      common->ovector_start += sizeof(sljit_sw);
1152
      }
1153
    cc += 1 + LINK_SIZE;
1154
    break;
1155
1156
    case OP_CALLOUT:
1157
    case OP_CALLOUT_STR:
1158
    if (common->capture_last_ptr == 0)
1159
      {
1160
      common->capture_last_ptr = common->ovector_start;
1161
      common->ovector_start += sizeof(sljit_sw);
1162
      }
1163
    cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1164
    break;
1165
1166
    case OP_ASSERTBACK:
1167
    slot = bracketend(cc);
1168
    if (slot > assert_back_end)
1169
      assert_back_end = slot;
1170
    cc += 1 + LINK_SIZE;
1171
    break;
1172
1173
    case OP_THEN_ARG:
1174
    common->has_then = TRUE;
1175
    common->control_head_ptr = 1;
1176
    /* Fall through. */
1177
1178
    case OP_COMMIT_ARG:
1179
    case OP_PRUNE_ARG:
1180
    if (cc < assert_na_end)
1181
      return FALSE;
1182
    /* Fall through */
1183
    case OP_MARK:
1184
    if (common->mark_ptr == 0)
1185
      {
1186
      common->mark_ptr = common->ovector_start;
1187
      common->ovector_start += sizeof(sljit_sw);
1188
      }
1189
    cc += 1 + 2 + cc[1];
1190
    break;
1191
1192
    case OP_THEN:
1193
    common->has_then = TRUE;
1194
    common->control_head_ptr = 1;
1195
    cc += 1;
1196
    break;
1197
1198
    case OP_SKIP:
1199
    if (cc < assert_back_end)
1200
      common->has_skip_in_assert_back = TRUE;
1201
    if (cc < assert_na_end)
1202
      return FALSE;
1203
    cc += 1;
1204
    break;
1205
1206
    case OP_SKIP_ARG:
1207
    common->control_head_ptr = 1;
1208
    common->has_skip_arg = TRUE;
1209
    if (cc < assert_back_end)
1210
      common->has_skip_in_assert_back = TRUE;
1211
    if (cc < assert_na_end)
1212
      return FALSE;
1213
    cc += 1 + 2 + cc[1];
1214
    break;
1215
1216
    case OP_PRUNE:
1217
    case OP_COMMIT:
1218
    case OP_ASSERT_ACCEPT:
1219
    if (cc < assert_na_end)
1220
      return FALSE;
1221
    cc++;
1222
    break;
1223
1224
    default:
1225
    cc = next_opcode(common, cc);
1226
    if (cc == NULL)
1227
      return FALSE;
1228
    break;
1229
    }
1230
  }
1231
return TRUE;
1232
}
1233
1234
#define EARLY_FAIL_ENHANCE_MAX (1 + 3)
1235
1236
/*
1237
start:
1238
  0 - skip / early fail allowed
1239
  1 - only early fail with range allowed
1240
  >1 - (start - 1) early fail is processed
1241
1242
return: current number of iterators enhanced with fast fail
1243
*/
1244
static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start,
1245
   sljit_s32 depth, int start, BOOL fast_forward_allowed)
1246
{
1247
PCRE2_SPTR begin = cc;
1248
PCRE2_SPTR next_alt;
1249
PCRE2_SPTR end;
1250
PCRE2_SPTR accelerated_start;
1251
BOOL prev_fast_forward_allowed;
1252
int result = 0;
1253
int count;
1254
1255
SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1256
SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1257
SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1258
1259
next_alt = cc + GET(cc, 1);
1260
if (*next_alt == OP_ALT)
1261
  fast_forward_allowed = FALSE;
1262
1263
do
1264
  {
1265
  count = start;
1266
  cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1267
1268
  while (TRUE)
1269
    {
1270
    accelerated_start = NULL;
1271
1272
    switch(*cc)
1273
      {
1274
      case OP_SOD:
1275
      case OP_SOM:
1276
      case OP_SET_SOM:
1277
      case OP_NOT_WORD_BOUNDARY:
1278
      case OP_WORD_BOUNDARY:
1279
      case OP_EODN:
1280
      case OP_EOD:
1281
      case OP_CIRC:
1282
      case OP_CIRCM:
1283
      case OP_DOLL:
1284
      case OP_DOLLM:
1285
      /* Zero width assertions. */
1286
      cc++;
1287
      continue;
1288
1289
      case OP_NOT_DIGIT:
1290
      case OP_DIGIT:
1291
      case OP_NOT_WHITESPACE:
1292
      case OP_WHITESPACE:
1293
      case OP_NOT_WORDCHAR:
1294
      case OP_WORDCHAR:
1295
      case OP_ANY:
1296
      case OP_ALLANY:
1297
      case OP_ANYBYTE:
1298
      case OP_NOT_HSPACE:
1299
      case OP_HSPACE:
1300
      case OP_NOT_VSPACE:
1301
      case OP_VSPACE:
1302
      fast_forward_allowed = FALSE;
1303
      cc++;
1304
      continue;
1305
1306
      case OP_ANYNL:
1307
      case OP_EXTUNI:
1308
      fast_forward_allowed = FALSE;
1309
      if (count == 0)
1310
        count = 1;
1311
      cc++;
1312
      continue;
1313
1314
      case OP_NOTPROP:
1315
      case OP_PROP:
1316
      fast_forward_allowed = FALSE;
1317
      cc += 1 + 2;
1318
      continue;
1319
1320
      case OP_CHAR:
1321
      case OP_CHARI:
1322
      case OP_NOT:
1323
      case OP_NOTI:
1324
      fast_forward_allowed = FALSE;
1325
      cc += 2;
1326
#ifdef SUPPORT_UNICODE
1327
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1328
#endif
1329
      continue;
1330
1331
      case OP_TYPESTAR:
1332
      case OP_TYPEMINSTAR:
1333
      case OP_TYPEPLUS:
1334
      case OP_TYPEMINPLUS:
1335
      case OP_TYPEPOSSTAR:
1336
      case OP_TYPEPOSPLUS:
1337
      /* The type or prop opcode is skipped in the next iteration. */
1338
      cc += 1;
1339
1340
      if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1341
        {
1342
        accelerated_start = cc - 1;
1343
        break;
1344
        }
1345
1346
      if (count == 0)
1347
        count = 1;
1348
      fast_forward_allowed = FALSE;
1349
      continue;
1350
1351
      case OP_TYPEUPTO:
1352
      case OP_TYPEMINUPTO:
1353
      case OP_TYPEEXACT:
1354
      case OP_TYPEPOSUPTO:
1355
      cc += IMM2_SIZE;
1356
      /* Fall through */
1357
1358
      case OP_TYPEQUERY:
1359
      case OP_TYPEMINQUERY:
1360
      case OP_TYPEPOSQUERY:
1361
      /* The type or prop opcode is skipped in the next iteration. */
1362
      fast_forward_allowed = FALSE;
1363
      if (count == 0)
1364
        count = 1;
1365
      cc += 1;
1366
      continue;
1367
1368
      case OP_STAR:
1369
      case OP_MINSTAR:
1370
      case OP_PLUS:
1371
      case OP_MINPLUS:
1372
      case OP_POSSTAR:
1373
      case OP_POSPLUS:
1374
1375
      case OP_STARI:
1376
      case OP_MINSTARI:
1377
      case OP_PLUSI:
1378
      case OP_MINPLUSI:
1379
      case OP_POSSTARI:
1380
      case OP_POSPLUSI:
1381
1382
      case OP_NOTSTAR:
1383
      case OP_NOTMINSTAR:
1384
      case OP_NOTPLUS:
1385
      case OP_NOTMINPLUS:
1386
      case OP_NOTPOSSTAR:
1387
      case OP_NOTPOSPLUS:
1388
1389
      case OP_NOTSTARI:
1390
      case OP_NOTMINSTARI:
1391
      case OP_NOTPLUSI:
1392
      case OP_NOTMINPLUSI:
1393
      case OP_NOTPOSSTARI:
1394
      case OP_NOTPOSPLUSI:
1395
      accelerated_start = cc;
1396
      cc += 2;
1397
#ifdef SUPPORT_UNICODE
1398
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1399
#endif
1400
      break;
1401
1402
      case OP_UPTO:
1403
      case OP_MINUPTO:
1404
      case OP_EXACT:
1405
      case OP_POSUPTO:
1406
      case OP_UPTOI:
1407
      case OP_MINUPTOI:
1408
      case OP_EXACTI:
1409
      case OP_POSUPTOI:
1410
      case OP_NOTUPTO:
1411
      case OP_NOTMINUPTO:
1412
      case OP_NOTEXACT:
1413
      case OP_NOTPOSUPTO:
1414
      case OP_NOTUPTOI:
1415
      case OP_NOTMINUPTOI:
1416
      case OP_NOTEXACTI:
1417
      case OP_NOTPOSUPTOI:
1418
      cc += IMM2_SIZE;
1419
      /* Fall through */
1420
1421
      case OP_QUERY:
1422
      case OP_MINQUERY:
1423
      case OP_POSQUERY:
1424
      case OP_QUERYI:
1425
      case OP_MINQUERYI:
1426
      case OP_POSQUERYI:
1427
      case OP_NOTQUERY:
1428
      case OP_NOTMINQUERY:
1429
      case OP_NOTPOSQUERY:
1430
      case OP_NOTQUERYI:
1431
      case OP_NOTMINQUERYI:
1432
      case OP_NOTPOSQUERYI:
1433
      fast_forward_allowed = FALSE;
1434
      if (count == 0)
1435
        count = 1;
1436
      cc += 2;
1437
#ifdef SUPPORT_UNICODE
1438
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1439
#endif
1440
      continue;
1441
1442
      case OP_CLASS:
1443
      case OP_NCLASS:
1444
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1445
      case OP_XCLASS:
1446
      accelerated_start = cc;
1447
      cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1448
#else
1449
      accelerated_start = cc;
1450
      cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1451
#endif
1452
1453
      switch (*cc)
1454
        {
1455
        case OP_CRSTAR:
1456
        case OP_CRMINSTAR:
1457
        case OP_CRPLUS:
1458
        case OP_CRMINPLUS:
1459
        case OP_CRPOSSTAR:
1460
        case OP_CRPOSPLUS:
1461
        cc++;
1462
        break;
1463
1464
        case OP_CRRANGE:
1465
        case OP_CRMINRANGE:
1466
        case OP_CRPOSRANGE:
1467
        cc += 2 * IMM2_SIZE;
1468
        /* Fall through */
1469
        case OP_CRQUERY:
1470
        case OP_CRMINQUERY:
1471
        case OP_CRPOSQUERY:
1472
        cc++;
1473
        if (count == 0)
1474
          count = 1;
1475
        /* Fall through */
1476
        default:
1477
        accelerated_start = NULL;
1478
        fast_forward_allowed = FALSE;
1479
        continue;
1480
        }
1481
      break;
1482
1483
      case OP_ONCE:
1484
      case OP_BRA:
1485
      case OP_CBRA:
1486
      end = cc + GET(cc, 1);
1487
1488
      prev_fast_forward_allowed = fast_forward_allowed;
1489
      fast_forward_allowed = FALSE;
1490
      if (depth >= 4)
1491
        break;
1492
1493
      end = bracketend(cc) - (1 + LINK_SIZE);
1494
      if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1495
        break;
1496
1497
      count = detect_early_fail(common, cc, private_data_start, depth + 1, count, prev_fast_forward_allowed);
1498
1499
      if (PRIVATE_DATA(cc) != 0)
1500
        common->private_data_ptrs[begin - common->start] = 1;
1501
1502
      if (count < EARLY_FAIL_ENHANCE_MAX)
1503
        {
1504
        cc = end + (1 + LINK_SIZE);
1505
        continue;
1506
        }
1507
      break;
1508
1509
      case OP_KET:
1510
      SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1511
      if (cc >= next_alt)
1512
        break;
1513
      cc += 1 + LINK_SIZE;
1514
      continue;
1515
      }
1516
1517
    if (accelerated_start != NULL)
1518
      {
1519
      if (count == 0)
1520
        {
1521
        count++;
1522
1523
        if (fast_forward_allowed)
1524
          {
1525
          common->fast_forward_bc_ptr = accelerated_start;
1526
          common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1527
          *private_data_start += sizeof(sljit_sw);
1528
          }
1529
        else
1530
          {
1531
          common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1532
1533
          if (common->early_fail_start_ptr == 0)
1534
            common->early_fail_start_ptr = *private_data_start;
1535
1536
          *private_data_start += sizeof(sljit_sw);
1537
          common->early_fail_end_ptr = *private_data_start;
1538
1539
          if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1540
            return EARLY_FAIL_ENHANCE_MAX;
1541
          }
1542
        }
1543
      else
1544
        {
1545
        common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1546
1547
        if (common->early_fail_start_ptr == 0)
1548
          common->early_fail_start_ptr = *private_data_start;
1549
1550
        *private_data_start += 2 * sizeof(sljit_sw);
1551
        common->early_fail_end_ptr = *private_data_start;
1552
1553
        if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1554
          return EARLY_FAIL_ENHANCE_MAX;
1555
        }
1556
1557
      /* Cannot be part of a repeat. */
1558
      common->private_data_ptrs[begin - common->start] = 1;
1559
      count++;
1560
1561
      if (count < EARLY_FAIL_ENHANCE_MAX)
1562
        continue;
1563
      }
1564
1565
    break;
1566
    }
1567
1568
  if (*cc != OP_ALT && *cc != OP_KET)
1569
    result = EARLY_FAIL_ENHANCE_MAX;
1570
  else if (result < count)
1571
    result = count;
1572
1573
  cc = next_alt;
1574
  next_alt = cc + GET(cc, 1);
1575
  }
1576
while (*cc == OP_ALT);
1577
1578
return result;
1579
}
1580
1581
static int get_class_iterator_size(PCRE2_SPTR cc)
1582
{
1583
sljit_u32 min;
1584
sljit_u32 max;
1585
switch(*cc)
1586
  {
1587
  case OP_CRSTAR:
1588
  case OP_CRPLUS:
1589
  return 2;
1590
1591
  case OP_CRMINSTAR:
1592
  case OP_CRMINPLUS:
1593
  case OP_CRQUERY:
1594
  case OP_CRMINQUERY:
1595
  return 1;
1596
1597
  case OP_CRRANGE:
1598
  case OP_CRMINRANGE:
1599
  min = GET2(cc, 1);
1600
  max = GET2(cc, 1 + IMM2_SIZE);
1601
  if (max == 0)
1602
    return (*cc == OP_CRRANGE) ? 2 : 1;
1603
  max -= min;
1604
  if (max > 2)
1605
    max = 2;
1606
  return max;
1607
1608
  default:
1609
  return 0;
1610
  }
1611
}
1612
1613
static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1614
{
1615
PCRE2_SPTR end = bracketend(begin);
1616
PCRE2_SPTR next;
1617
PCRE2_SPTR next_end;
1618
PCRE2_SPTR max_end;
1619
PCRE2_UCHAR type;
1620
sljit_sw length = end - begin;
1621
sljit_s32 min, max, i;
1622
1623
/* Detect fixed iterations first. */
1624
if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1625
  return FALSE;
1626
1627
/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1628
 * Skip the check of the second part. */
1629
if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1630
  return TRUE;
1631
1632
next = end;
1633
min = 1;
1634
while (1)
1635
  {
1636
  if (*next != *begin)
1637
    break;
1638
  next_end = bracketend(next);
1639
  if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1640
    break;
1641
  next = next_end;
1642
  min++;
1643
  }
1644
1645
if (min == 2)
1646
  return FALSE;
1647
1648
max = 0;
1649
max_end = next;
1650
if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1651
  {
1652
  type = *next;
1653
  while (1)
1654
    {
1655
    if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1656
      break;
1657
    next_end = bracketend(next + 2 + LINK_SIZE);
1658
    if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1659
      break;
1660
    next = next_end;
1661
    max++;
1662
    }
1663
1664
  if (next[0] == type && next[1] == *begin && max >= 1)
1665
    {
1666
    next_end = bracketend(next + 1);
1667
    if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1668
      {
1669
      for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1670
        if (*next_end != OP_KET)
1671
          break;
1672
1673
      if (i == max)
1674
        {
1675
        common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1676
        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1677
        /* +2 the original and the last. */
1678
        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1679
        if (min == 1)
1680
          return TRUE;
1681
        min--;
1682
        max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1683
        }
1684
      }
1685
    }
1686
  }
1687
1688
if (min >= 3)
1689
  {
1690
  common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1691
  common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1692
  common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1693
  return TRUE;
1694
  }
1695
1696
return FALSE;
1697
}
1698
1699
#define CASE_ITERATOR_PRIVATE_DATA_1 \
1700
    case OP_MINSTAR: \
1701
    case OP_MINPLUS: \
1702
    case OP_QUERY: \
1703
    case OP_MINQUERY: \
1704
    case OP_MINSTARI: \
1705
    case OP_MINPLUSI: \
1706
    case OP_QUERYI: \
1707
    case OP_MINQUERYI: \
1708
    case OP_NOTMINSTAR: \
1709
    case OP_NOTMINPLUS: \
1710
    case OP_NOTQUERY: \
1711
    case OP_NOTMINQUERY: \
1712
    case OP_NOTMINSTARI: \
1713
    case OP_NOTMINPLUSI: \
1714
    case OP_NOTQUERYI: \
1715
    case OP_NOTMINQUERYI:
1716
1717
#define CASE_ITERATOR_PRIVATE_DATA_2A \
1718
    case OP_STAR: \
1719
    case OP_PLUS: \
1720
    case OP_STARI: \
1721
    case OP_PLUSI: \
1722
    case OP_NOTSTAR: \
1723
    case OP_NOTPLUS: \
1724
    case OP_NOTSTARI: \
1725
    case OP_NOTPLUSI:
1726
1727
#define CASE_ITERATOR_PRIVATE_DATA_2B \
1728
    case OP_UPTO: \
1729
    case OP_MINUPTO: \
1730
    case OP_UPTOI: \
1731
    case OP_MINUPTOI: \
1732
    case OP_NOTUPTO: \
1733
    case OP_NOTMINUPTO: \
1734
    case OP_NOTUPTOI: \
1735
    case OP_NOTMINUPTOI:
1736
1737
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1738
    case OP_TYPEMINSTAR: \
1739
    case OP_TYPEMINPLUS: \
1740
    case OP_TYPEQUERY: \
1741
    case OP_TYPEMINQUERY:
1742
1743
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1744
    case OP_TYPESTAR: \
1745
    case OP_TYPEPLUS:
1746
1747
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1748
    case OP_TYPEUPTO: \
1749
    case OP_TYPEMINUPTO:
1750
1751
static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1752
{
1753
PCRE2_SPTR cc = common->start;
1754
PCRE2_SPTR alternative;
1755
PCRE2_SPTR end = NULL;
1756
int private_data_ptr = *private_data_start;
1757
int space, size, bracketlen;
1758
BOOL repeat_check = TRUE;
1759
1760
while (cc < ccend)
1761
  {
1762
  space = 0;
1763
  size = 0;
1764
  bracketlen = 0;
1765
  if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1766
    break;
1767
1768
  /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1769
  if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1770
    {
1771
    if (detect_repeat(common, cc))
1772
      {
1773
      /* These brackets are converted to repeats, so no global
1774
      based single character repeat is allowed. */
1775
      if (cc >= end)
1776
        end = bracketend(cc);
1777
      }
1778
    }
1779
  repeat_check = TRUE;
1780
1781
  switch(*cc)
1782
    {
1783
    case OP_KET:
1784
    if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1785
      {
1786
      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1787
      private_data_ptr += sizeof(sljit_sw);
1788
      cc += common->private_data_ptrs[cc + 1 - common->start];
1789
      }
1790
    cc += 1 + LINK_SIZE;
1791
    break;
1792
1793
    case OP_ASSERT:
1794
    case OP_ASSERT_NOT:
1795
    case OP_ASSERTBACK:
1796
    case OP_ASSERTBACK_NOT:
1797
    case OP_ASSERT_NA:
1798
    case OP_ASSERTBACK_NA:
1799
    case OP_ONCE:
1800
    case OP_SCRIPT_RUN:
1801
    case OP_BRAPOS:
1802
    case OP_SBRA:
1803
    case OP_SBRAPOS:
1804
    case OP_SCOND:
1805
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1806
    private_data_ptr += sizeof(sljit_sw);
1807
    bracketlen = 1 + LINK_SIZE;
1808
    break;
1809
1810
    case OP_CBRAPOS:
1811
    case OP_SCBRAPOS:
1812
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1813
    private_data_ptr += sizeof(sljit_sw);
1814
    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1815
    break;
1816
1817
    case OP_COND:
1818
    /* Might be a hidden SCOND. */
1819
    common->private_data_ptrs[cc - common->start] = 0;
1820
    alternative = cc + GET(cc, 1);
1821
    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1822
      {
1823
      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1824
      private_data_ptr += sizeof(sljit_sw);
1825
      }
1826
    bracketlen = 1 + LINK_SIZE;
1827
    break;
1828
1829
    case OP_BRA:
1830
    bracketlen = 1 + LINK_SIZE;
1831
    break;
1832
1833
    case OP_CBRA:
1834
    case OP_SCBRA:
1835
    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1836
    break;
1837
1838
    case OP_BRAZERO:
1839
    case OP_BRAMINZERO:
1840
    case OP_BRAPOSZERO:
1841
    size = 1;
1842
    repeat_check = FALSE;
1843
    break;
1844
1845
    CASE_ITERATOR_PRIVATE_DATA_1
1846
    size = -2;
1847
    space = 1;
1848
    break;
1849
1850
    CASE_ITERATOR_PRIVATE_DATA_2A
1851
    size = -2;
1852
    space = 2;
1853
    break;
1854
1855
    CASE_ITERATOR_PRIVATE_DATA_2B
1856
    size = -(2 + IMM2_SIZE);
1857
    space = 2;
1858
    break;
1859
1860
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1861
    size = 1;
1862
    space = 1;
1863
    break;
1864
1865
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1866
    size = 1;
1867
    if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1868
      space = 2;
1869
    break;
1870
1871
    case OP_TYPEUPTO:
1872
    size = 1 + IMM2_SIZE;
1873
    if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1874
      space = 2;
1875
    break;
1876
1877
    case OP_TYPEMINUPTO:
1878
    size = 1 + IMM2_SIZE;
1879
    space = 2;
1880
    break;
1881
1882
    case OP_CLASS:
1883
    case OP_NCLASS:
1884
    size = 1 + 32 / sizeof(PCRE2_UCHAR);
1885
    space = get_class_iterator_size(cc + size);
1886
    break;
1887
1888
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1889
    case OP_XCLASS:
1890
    size = GET(cc, 1);
1891
    space = get_class_iterator_size(cc + size);
1892
    break;
1893
#endif
1894
1895
    default:
1896
    cc = next_opcode(common, cc);
1897
    SLJIT_ASSERT(cc != NULL);
1898
    break;
1899
    }
1900
1901
  /* Character iterators, which are not inside a repeated bracket,
1902
     gets a private slot instead of allocating it on the stack. */
1903
  if (space > 0 && cc >= end)
1904
    {
1905
    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1906
    private_data_ptr += sizeof(sljit_sw) * space;
1907
    }
1908
1909
  if (size != 0)
1910
    {
1911
    if (size < 0)
1912
      {
1913
      cc += -size;
1914
#ifdef SUPPORT_UNICODE
1915
      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1916
#endif
1917
      }
1918
    else
1919
      cc += size;
1920
    }
1921
1922
  if (bracketlen > 0)
1923
    {
1924
    if (cc >= end)
1925
      {
1926
      end = bracketend(cc);
1927
      if (end[-1 - LINK_SIZE] == OP_KET)
1928
        end = NULL;
1929
      }
1930
    cc += bracketlen;
1931
    }
1932
  }
1933
*private_data_start = private_data_ptr;
1934
}
1935
1936
/* Returns with a frame_types (always < 0) if no need for frame. */
1937
static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1938
{
1939
int length = 0;
1940
int possessive = 0;
1941
BOOL stack_restore = FALSE;
1942
BOOL setsom_found = recursive;
1943
BOOL setmark_found = recursive;
1944
/* The last capture is a local variable even for recursions. */
1945
BOOL capture_last_found = FALSE;
1946
1947
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1948
SLJIT_ASSERT(common->control_head_ptr != 0);
1949
*needs_control_head = TRUE;
1950
#else
1951
*needs_control_head = FALSE;
1952
#endif
1953
1954
if (ccend == NULL)
1955
  {
1956
  ccend = bracketend(cc) - (1 + LINK_SIZE);
1957
  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1958
    {
1959
    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1960
    /* This is correct regardless of common->capture_last_ptr. */
1961
    capture_last_found = TRUE;
1962
    }
1963
  cc = next_opcode(common, cc);
1964
  }
1965
1966
SLJIT_ASSERT(cc != NULL);
1967
while (cc < ccend)
1968
  switch(*cc)
1969
    {
1970
    case OP_SET_SOM:
1971
    SLJIT_ASSERT(common->has_set_som);
1972
    stack_restore = TRUE;
1973
    if (!setsom_found)
1974
      {
1975
      length += 2;
1976
      setsom_found = TRUE;
1977
      }
1978
    cc += 1;
1979
    break;
1980
1981
    case OP_MARK:
1982
    case OP_COMMIT_ARG:
1983
    case OP_PRUNE_ARG:
1984
    case OP_THEN_ARG:
1985
    SLJIT_ASSERT(common->mark_ptr != 0);
1986
    stack_restore = TRUE;
1987
    if (!setmark_found)
1988
      {
1989
      length += 2;
1990
      setmark_found = TRUE;
1991
      }
1992
    if (common->control_head_ptr != 0)
1993
      *needs_control_head = TRUE;
1994
    cc += 1 + 2 + cc[1];
1995
    break;
1996
1997
    case OP_RECURSE:
1998
    stack_restore = TRUE;
1999
    if (common->has_set_som && !setsom_found)
2000
      {
2001
      length += 2;
2002
      setsom_found = TRUE;
2003
      }
2004
    if (common->mark_ptr != 0 && !setmark_found)
2005
      {
2006
      length += 2;
2007
      setmark_found = TRUE;
2008
      }
2009
    if (common->capture_last_ptr != 0 && !capture_last_found)
2010
      {
2011
      length += 2;
2012
      capture_last_found = TRUE;
2013
      }
2014
    cc += 1 + LINK_SIZE;
2015
    break;
2016
2017
    case OP_CBRA:
2018
    case OP_CBRAPOS:
2019
    case OP_SCBRA:
2020
    case OP_SCBRAPOS:
2021
    stack_restore = TRUE;
2022
    if (common->capture_last_ptr != 0 && !capture_last_found)
2023
      {
2024
      length += 2;
2025
      capture_last_found = TRUE;
2026
      }
2027
    length += 3;
2028
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2029
    break;
2030
2031
    case OP_THEN:
2032
    stack_restore = TRUE;
2033
    if (common->control_head_ptr != 0)
2034
      *needs_control_head = TRUE;
2035
    cc ++;
2036
    break;
2037
2038
    default:
2039
    stack_restore = TRUE;
2040
    /* Fall through. */
2041
2042
    case OP_NOT_WORD_BOUNDARY:
2043
    case OP_WORD_BOUNDARY:
2044
    case OP_NOT_DIGIT:
2045
    case OP_DIGIT:
2046
    case OP_NOT_WHITESPACE:
2047
    case OP_WHITESPACE:
2048
    case OP_NOT_WORDCHAR:
2049
    case OP_WORDCHAR:
2050
    case OP_ANY:
2051
    case OP_ALLANY:
2052
    case OP_ANYBYTE:
2053
    case OP_NOTPROP:
2054
    case OP_PROP:
2055
    case OP_ANYNL:
2056
    case OP_NOT_HSPACE:
2057
    case OP_HSPACE:
2058
    case OP_NOT_VSPACE:
2059
    case OP_VSPACE:
2060
    case OP_EXTUNI:
2061
    case OP_EODN:
2062
    case OP_EOD:
2063
    case OP_CIRC:
2064
    case OP_CIRCM:
2065
    case OP_DOLL:
2066
    case OP_DOLLM:
2067
    case OP_CHAR:
2068
    case OP_CHARI:
2069
    case OP_NOT:
2070
    case OP_NOTI:
2071
2072
    case OP_EXACT:
2073
    case OP_POSSTAR:
2074
    case OP_POSPLUS:
2075
    case OP_POSQUERY:
2076
    case OP_POSUPTO:
2077
2078
    case OP_EXACTI:
2079
    case OP_POSSTARI:
2080
    case OP_POSPLUSI:
2081
    case OP_POSQUERYI:
2082
    case OP_POSUPTOI:
2083
2084
    case OP_NOTEXACT:
2085
    case OP_NOTPOSSTAR:
2086
    case OP_NOTPOSPLUS:
2087
    case OP_NOTPOSQUERY:
2088
    case OP_NOTPOSUPTO:
2089
2090
    case OP_NOTEXACTI:
2091
    case OP_NOTPOSSTARI:
2092
    case OP_NOTPOSPLUSI:
2093
    case OP_NOTPOSQUERYI:
2094
    case OP_NOTPOSUPTOI:
2095
2096
    case OP_TYPEEXACT:
2097
    case OP_TYPEPOSSTAR:
2098
    case OP_TYPEPOSPLUS:
2099
    case OP_TYPEPOSQUERY:
2100
    case OP_TYPEPOSUPTO:
2101
2102
    case OP_CLASS:
2103
    case OP_NCLASS:
2104
    case OP_XCLASS:
2105
2106
    case OP_CALLOUT:
2107
    case OP_CALLOUT_STR:
2108
2109
    cc = next_opcode(common, cc);
2110
    SLJIT_ASSERT(cc != NULL);
2111
    break;
2112
    }
2113
2114
/* Possessive quantifiers can use a special case. */
2115
if (SLJIT_UNLIKELY(possessive == length))
2116
  return stack_restore ? no_frame : no_stack;
2117
2118
if (length > 0)
2119
  return length + 1;
2120
return stack_restore ? no_frame : no_stack;
2121
}
2122
2123
static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2124
{
2125
DEFINE_COMPILER;
2126
BOOL setsom_found = FALSE;
2127
BOOL setmark_found = FALSE;
2128
/* The last capture is a local variable even for recursions. */
2129
BOOL capture_last_found = FALSE;
2130
int offset;
2131
2132
/* >= 1 + shortest item size (2) */
2133
SLJIT_UNUSED_ARG(stacktop);
2134
SLJIT_ASSERT(stackpos >= stacktop + 2);
2135
2136
stackpos = STACK(stackpos);
2137
if (ccend == NULL)
2138
  {
2139
  ccend = bracketend(cc) - (1 + LINK_SIZE);
2140
  if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2141
    cc = next_opcode(common, cc);
2142
  }
2143
2144
SLJIT_ASSERT(cc != NULL);
2145
while (cc < ccend)
2146
  switch(*cc)
2147
    {
2148
    case OP_SET_SOM:
2149
    SLJIT_ASSERT(common->has_set_som);
2150
    if (!setsom_found)
2151
      {
2152
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2153
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2154
      stackpos -= SSIZE_OF(sw);
2155
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2156
      stackpos -= SSIZE_OF(sw);
2157
      setsom_found = TRUE;
2158
      }
2159
    cc += 1;
2160
    break;
2161
2162
    case OP_MARK:
2163
    case OP_COMMIT_ARG:
2164
    case OP_PRUNE_ARG:
2165
    case OP_THEN_ARG:
2166
    SLJIT_ASSERT(common->mark_ptr != 0);
2167
    if (!setmark_found)
2168
      {
2169
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2170
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2171
      stackpos -= SSIZE_OF(sw);
2172
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2173
      stackpos -= SSIZE_OF(sw);
2174
      setmark_found = TRUE;
2175
      }
2176
    cc += 1 + 2 + cc[1];
2177
    break;
2178
2179
    case OP_RECURSE:
2180
    if (common->has_set_som && !setsom_found)
2181
      {
2182
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2183
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2184
      stackpos -= SSIZE_OF(sw);
2185
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2186
      stackpos -= SSIZE_OF(sw);
2187
      setsom_found = TRUE;
2188
      }
2189
    if (common->mark_ptr != 0 && !setmark_found)
2190
      {
2191
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2192
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2193
      stackpos -= SSIZE_OF(sw);
2194
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2195
      stackpos -= SSIZE_OF(sw);
2196
      setmark_found = TRUE;
2197
      }
2198
    if (common->capture_last_ptr != 0 && !capture_last_found)
2199
      {
2200
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2201
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2202
      stackpos -= SSIZE_OF(sw);
2203
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2204
      stackpos -= SSIZE_OF(sw);
2205
      capture_last_found = TRUE;
2206
      }
2207
    cc += 1 + LINK_SIZE;
2208
    break;
2209
2210
    case OP_CBRA:
2211
    case OP_CBRAPOS:
2212
    case OP_SCBRA:
2213
    case OP_SCBRAPOS:
2214
    if (common->capture_last_ptr != 0 && !capture_last_found)
2215
      {
2216
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2217
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2218
      stackpos -= SSIZE_OF(sw);
2219
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2220
      stackpos -= SSIZE_OF(sw);
2221
      capture_last_found = TRUE;
2222
      }
2223
    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2224
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2225
    stackpos -= SSIZE_OF(sw);
2226
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2227
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2228
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2229
    stackpos -= SSIZE_OF(sw);
2230
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2231
    stackpos -= SSIZE_OF(sw);
2232
2233
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2234
    break;
2235
2236
    default:
2237
    cc = next_opcode(common, cc);
2238
    SLJIT_ASSERT(cc != NULL);
2239
    break;
2240
    }
2241
2242
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2243
SLJIT_ASSERT(stackpos == STACK(stacktop));
2244
}
2245
2246
#define RECURSE_TMP_REG_COUNT 3
2247
2248
typedef struct delayed_mem_copy_status {
2249
  struct sljit_compiler *compiler;
2250
  int store_bases[RECURSE_TMP_REG_COUNT];
2251
  int store_offsets[RECURSE_TMP_REG_COUNT];
2252
  int tmp_regs[RECURSE_TMP_REG_COUNT];
2253
  int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2254
  int next_tmp_reg;
2255
} delayed_mem_copy_status;
2256
2257
static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2258
{
2259
int i;
2260
2261
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2262
  {
2263
  SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2264
  SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2265
2266
  status->store_bases[i] = -1;
2267
  }
2268
status->next_tmp_reg = 0;
2269
status->compiler = common->compiler;
2270
}
2271
2272
static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2273
  int store_base, sljit_sw store_offset)
2274
{
2275
struct sljit_compiler *compiler = status->compiler;
2276
int next_tmp_reg = status->next_tmp_reg;
2277
int tmp_reg = status->tmp_regs[next_tmp_reg];
2278
2279
SLJIT_ASSERT(load_base > 0 && store_base > 0);
2280
2281
if (status->store_bases[next_tmp_reg] == -1)
2282
  {
2283
  /* Preserve virtual registers. */
2284
  if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2285
    OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2286
  }
2287
else
2288
  OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2289
2290
OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2291
status->store_bases[next_tmp_reg] = store_base;
2292
status->store_offsets[next_tmp_reg] = store_offset;
2293
2294
status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2295
}
2296
2297
static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2298
{
2299
struct sljit_compiler *compiler = status->compiler;
2300
int next_tmp_reg = status->next_tmp_reg;
2301
int tmp_reg, saved_tmp_reg, i;
2302
2303
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2304
  {
2305
  if (status->store_bases[next_tmp_reg] != -1)
2306
    {
2307
    tmp_reg = status->tmp_regs[next_tmp_reg];
2308
    saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2309
2310
    OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2311
2312
    /* Restore virtual registers. */
2313
    if (sljit_get_register_index(saved_tmp_reg) < 0)
2314
      OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2315
    }
2316
2317
  next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2318
  }
2319
}
2320
2321
#undef RECURSE_TMP_REG_COUNT
2322
2323
static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2324
{
2325
uint8_t *byte;
2326
uint8_t mask;
2327
2328
SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2329
2330
bit_index >>= SLJIT_WORD_SHIFT;
2331
2332
SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2333
2334
mask = 1 << (bit_index & 0x7);
2335
byte = common->recurse_bitset + (bit_index >> 3);
2336
2337
if (*byte & mask)
2338
  return FALSE;
2339
2340
*byte |= mask;
2341
return TRUE;
2342
}
2343
2344
enum get_recurse_flags {
2345
  recurse_flag_quit_found = (1 << 0),
2346
  recurse_flag_accept_found = (1 << 1),
2347
  recurse_flag_setsom_found = (1 << 2),
2348
  recurse_flag_setmark_found = (1 << 3),
2349
  recurse_flag_control_head_found = (1 << 4),
2350
};
2351
2352
static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2353
{
2354
int length = 1;
2355
int size, offset;
2356
PCRE2_SPTR alternative;
2357
uint32_t recurse_flags = 0;
2358
2359
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2360
2361
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2362
SLJIT_ASSERT(common->control_head_ptr != 0);
2363
recurse_flags |= recurse_flag_control_head_found;
2364
#endif
2365
2366
/* Calculate the sum of the private machine words. */
2367
while (cc < ccend)
2368
  {
2369
  size = 0;
2370
  switch(*cc)
2371
    {
2372
    case OP_SET_SOM:
2373
    SLJIT_ASSERT(common->has_set_som);
2374
    recurse_flags |= recurse_flag_setsom_found;
2375
    cc += 1;
2376
    break;
2377
2378
    case OP_RECURSE:
2379
    if (common->has_set_som)
2380
      recurse_flags |= recurse_flag_setsom_found;
2381
    if (common->mark_ptr != 0)
2382
      recurse_flags |= recurse_flag_setmark_found;
2383
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2384
      length++;
2385
    cc += 1 + LINK_SIZE;
2386
    break;
2387
2388
    case OP_KET:
2389
    offset = PRIVATE_DATA(cc);
2390
    if (offset != 0)
2391
      {
2392
      if (recurse_check_bit(common, offset))
2393
        length++;
2394
      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2395
      cc += PRIVATE_DATA(cc + 1);
2396
      }
2397
    cc += 1 + LINK_SIZE;
2398
    break;
2399
2400
    case OP_ASSERT:
2401
    case OP_ASSERT_NOT:
2402
    case OP_ASSERTBACK:
2403
    case OP_ASSERTBACK_NOT:
2404
    case OP_ASSERT_NA:
2405
    case OP_ASSERTBACK_NA:
2406
    case OP_ONCE:
2407
    case OP_SCRIPT_RUN:
2408
    case OP_BRAPOS:
2409
    case OP_SBRA:
2410
    case OP_SBRAPOS:
2411
    case OP_SCOND:
2412
    SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2413
    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2414
      length++;
2415
    cc += 1 + LINK_SIZE;
2416
    break;
2417
2418
    case OP_CBRA:
2419
    case OP_SCBRA:
2420
    offset = GET2(cc, 1 + LINK_SIZE);
2421
    if (recurse_check_bit(common, OVECTOR(offset << 1)))
2422
      {
2423
      SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2424
      length += 2;
2425
      }
2426
    if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2427
      length++;
2428
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2429
      length++;
2430
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2431
    break;
2432
2433
    case OP_CBRAPOS:
2434
    case OP_SCBRAPOS:
2435
    offset = GET2(cc, 1 + LINK_SIZE);
2436
    if (recurse_check_bit(common, OVECTOR(offset << 1)))
2437
      {
2438
      SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2439
      length += 2;
2440
      }
2441
    if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2442
      length++;
2443
    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2444
      length++;
2445
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2446
      length++;
2447
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2448
    break;
2449
2450
    case OP_COND:
2451
    /* Might be a hidden SCOND. */
2452
    alternative = cc + GET(cc, 1);
2453
    if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2454
      length++;
2455
    cc += 1 + LINK_SIZE;
2456
    break;
2457
2458
    CASE_ITERATOR_PRIVATE_DATA_1
2459
    offset = PRIVATE_DATA(cc);
2460
    if (offset != 0 && recurse_check_bit(common, offset))
2461
      length++;
2462
    cc += 2;
2463
#ifdef SUPPORT_UNICODE
2464
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2465
#endif
2466
    break;
2467
2468
    CASE_ITERATOR_PRIVATE_DATA_2A
2469
    offset = PRIVATE_DATA(cc);
2470
    if (offset != 0 && recurse_check_bit(common, offset))
2471
      {
2472
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2473
      length += 2;
2474
      }
2475
    cc += 2;
2476
#ifdef SUPPORT_UNICODE
2477
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2478
#endif
2479
    break;
2480
2481
    CASE_ITERATOR_PRIVATE_DATA_2B
2482
    offset = PRIVATE_DATA(cc);
2483
    if (offset != 0 && recurse_check_bit(common, offset))
2484
      {
2485
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2486
      length += 2;
2487
      }
2488
    cc += 2 + IMM2_SIZE;
2489
#ifdef SUPPORT_UNICODE
2490
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2491
#endif
2492
    break;
2493
2494
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2495
    offset = PRIVATE_DATA(cc);
2496
    if (offset != 0 && recurse_check_bit(common, offset))
2497
      length++;
2498
    cc += 1;
2499
    break;
2500
2501
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2502
    offset = PRIVATE_DATA(cc);
2503
    if (offset != 0 && recurse_check_bit(common, offset))
2504
      {
2505
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2506
      length += 2;
2507
      }
2508
    cc += 1;
2509
    break;
2510
2511
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2512
    offset = PRIVATE_DATA(cc);
2513
    if (offset != 0 && recurse_check_bit(common, offset))
2514
      {
2515
      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2516
      length += 2;
2517
      }
2518
    cc += 1 + IMM2_SIZE;
2519
    break;
2520
2521
    case OP_CLASS:
2522
    case OP_NCLASS:
2523
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2524
    case OP_XCLASS:
2525
    size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2526
#else
2527
    size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2528
#endif
2529
2530
    offset = PRIVATE_DATA(cc);
2531
    if (offset != 0 && recurse_check_bit(common, offset))
2532
      length += get_class_iterator_size(cc + size);
2533
    cc += size;
2534
    break;
2535
2536
    case OP_MARK:
2537
    case OP_COMMIT_ARG:
2538
    case OP_PRUNE_ARG:
2539
    case OP_THEN_ARG:
2540
    SLJIT_ASSERT(common->mark_ptr != 0);
2541
    recurse_flags |= recurse_flag_setmark_found;
2542
    if (common->control_head_ptr != 0)
2543
      recurse_flags |= recurse_flag_control_head_found;
2544
    if (*cc != OP_MARK)
2545
      recurse_flags |= recurse_flag_quit_found;
2546
2547
    cc += 1 + 2 + cc[1];
2548
    break;
2549
2550
    case OP_PRUNE:
2551
    case OP_SKIP:
2552
    case OP_COMMIT:
2553
    recurse_flags |= recurse_flag_quit_found;
2554
    cc++;
2555
    break;
2556
2557
    case OP_SKIP_ARG:
2558
    recurse_flags |= recurse_flag_quit_found;
2559
    cc += 1 + 2 + cc[1];
2560
    break;
2561
2562
    case OP_THEN:
2563
    SLJIT_ASSERT(common->control_head_ptr != 0);
2564
    recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2565
    cc++;
2566
    break;
2567
2568
    case OP_ACCEPT:
2569
    case OP_ASSERT_ACCEPT:
2570
    recurse_flags |= recurse_flag_accept_found;
2571
    cc++;
2572
    break;
2573
2574
    default:
2575
    cc = next_opcode(common, cc);
2576
    SLJIT_ASSERT(cc != NULL);
2577
    break;
2578
    }
2579
  }
2580
SLJIT_ASSERT(cc == ccend);
2581
2582
if (recurse_flags & recurse_flag_control_head_found)
2583
  length++;
2584
if (recurse_flags & recurse_flag_quit_found)
2585
  {
2586
  if (recurse_flags & recurse_flag_setsom_found)
2587
    length++;
2588
  if (recurse_flags & recurse_flag_setmark_found)
2589
    length++;
2590
  }
2591
2592
*result_flags = recurse_flags;
2593
return length;
2594
}
2595
2596
enum copy_recurse_data_types {
2597
  recurse_copy_from_global,
2598
  recurse_copy_private_to_global,
2599
  recurse_copy_shared_to_global,
2600
  recurse_copy_kept_shared_to_global,
2601
  recurse_swap_global
2602
};
2603
2604
static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2605
  int type, int stackptr, int stacktop, uint32_t recurse_flags)
2606
{
2607
delayed_mem_copy_status status;
2608
PCRE2_SPTR alternative;
2609
sljit_sw private_srcw[2];
2610
sljit_sw shared_srcw[3];
2611
sljit_sw kept_shared_srcw[2];
2612
int private_count, shared_count, kept_shared_count;
2613
int from_sp, base_reg, offset, i;
2614
2615
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2616
2617
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2618
SLJIT_ASSERT(common->control_head_ptr != 0);
2619
recurse_check_bit(common, common->control_head_ptr);
2620
#endif
2621
2622
switch (type)
2623
  {
2624
  case recurse_copy_from_global:
2625
  from_sp = TRUE;
2626
  base_reg = STACK_TOP;
2627
  break;
2628
2629
  case recurse_copy_private_to_global:
2630
  case recurse_copy_shared_to_global:
2631
  case recurse_copy_kept_shared_to_global:
2632
  from_sp = FALSE;
2633
  base_reg = STACK_TOP;
2634
  break;
2635
2636
  default:
2637
  SLJIT_ASSERT(type == recurse_swap_global);
2638
  from_sp = FALSE;
2639
  base_reg = TMP2;
2640
  break;
2641
  }
2642
2643
stackptr = STACK(stackptr);
2644
stacktop = STACK(stacktop);
2645
2646
status.tmp_regs[0] = TMP1;
2647
status.saved_tmp_regs[0] = TMP1;
2648
2649
if (base_reg != TMP2)
2650
  {
2651
  status.tmp_regs[1] = TMP2;
2652
  status.saved_tmp_regs[1] = TMP2;
2653
  }
2654
else
2655
  {
2656
  status.saved_tmp_regs[1] = RETURN_ADDR;
2657
  if (HAS_VIRTUAL_REGISTERS)
2658
    status.tmp_regs[1] = STR_PTR;
2659
  else
2660
    status.tmp_regs[1] = RETURN_ADDR;
2661
  }
2662
2663
status.saved_tmp_regs[2] = TMP3;
2664
if (HAS_VIRTUAL_REGISTERS)
2665
  status.tmp_regs[2] = STR_END;
2666
else
2667
  status.tmp_regs[2] = TMP3;
2668
2669
delayed_mem_copy_init(&status, common);
2670
2671
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2672
  {
2673
  SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2674
2675
  if (!from_sp)
2676
    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2677
2678
  if (from_sp || type == recurse_swap_global)
2679
    delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2680
  }
2681
2682
stackptr += sizeof(sljit_sw);
2683
2684
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2685
if (type != recurse_copy_shared_to_global)
2686
  {
2687
  if (!from_sp)
2688
    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2689
2690
  if (from_sp || type == recurse_swap_global)
2691
    delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2692
  }
2693
2694
stackptr += sizeof(sljit_sw);
2695
#endif
2696
2697
while (cc < ccend)
2698
  {
2699
  private_count = 0;
2700
  shared_count = 0;
2701
  kept_shared_count = 0;
2702
2703
  switch(*cc)
2704
    {
2705
    case OP_SET_SOM:
2706
    SLJIT_ASSERT(common->has_set_som);
2707
    if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
2708
      {
2709
      kept_shared_srcw[0] = OVECTOR(0);
2710
      kept_shared_count = 1;
2711
      }
2712
    cc += 1;
2713
    break;
2714
2715
    case OP_RECURSE:
2716
    if (recurse_flags & recurse_flag_quit_found)
2717
      {
2718
      if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2719
        {
2720
        kept_shared_srcw[0] = OVECTOR(0);
2721
        kept_shared_count = 1;
2722
        }
2723
      if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2724
        {
2725
        kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2726
        kept_shared_count++;
2727
        }
2728
      }
2729
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2730
      {
2731
      shared_srcw[0] = common->capture_last_ptr;
2732
      shared_count = 1;
2733
      }
2734
    cc += 1 + LINK_SIZE;
2735
    break;
2736
2737
    case OP_KET:
2738
    private_srcw[0] = PRIVATE_DATA(cc);
2739
    if (private_srcw[0] != 0)
2740
      {
2741
      if (recurse_check_bit(common, private_srcw[0]))
2742
        private_count = 1;
2743
      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2744
      cc += PRIVATE_DATA(cc + 1);
2745
      }
2746
    cc += 1 + LINK_SIZE;
2747
    break;
2748
2749
    case OP_ASSERT:
2750
    case OP_ASSERT_NOT:
2751
    case OP_ASSERTBACK:
2752
    case OP_ASSERTBACK_NOT:
2753
    case OP_ASSERT_NA:
2754
    case OP_ASSERTBACK_NA:
2755
    case OP_ONCE:
2756
    case OP_SCRIPT_RUN:
2757
    case OP_BRAPOS:
2758
    case OP_SBRA:
2759
    case OP_SBRAPOS:
2760
    case OP_SCOND:
2761
    private_srcw[0] = PRIVATE_DATA(cc);
2762
    if (recurse_check_bit(common, private_srcw[0]))
2763
      private_count = 1;
2764
    cc += 1 + LINK_SIZE;
2765
    break;
2766
2767
    case OP_CBRA:
2768
    case OP_SCBRA:
2769
    offset = GET2(cc, 1 + LINK_SIZE);
2770
    shared_srcw[0] = OVECTOR(offset << 1);
2771
    if (recurse_check_bit(common, shared_srcw[0]))
2772
      {
2773
      shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2774
      SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2775
      shared_count = 2;
2776
      }
2777
2778
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2779
      {
2780
      shared_srcw[shared_count] = common->capture_last_ptr;
2781
      shared_count++;
2782
      }
2783
2784
    if (common->optimized_cbracket[offset] == 0)
2785
      {
2786
      private_srcw[0] = OVECTOR_PRIV(offset);
2787
      if (recurse_check_bit(common, private_srcw[0]))
2788
        private_count = 1;
2789
      }
2790
2791
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2792
    break;
2793
2794
    case OP_CBRAPOS:
2795
    case OP_SCBRAPOS:
2796
    offset = GET2(cc, 1 + LINK_SIZE);
2797
    shared_srcw[0] = OVECTOR(offset << 1);
2798
    if (recurse_check_bit(common, shared_srcw[0]))
2799
      {
2800
      shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2801
      SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2802
      shared_count = 2;
2803
      }
2804
2805
    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2806
      {
2807
      shared_srcw[shared_count] = common->capture_last_ptr;
2808
      shared_count++;
2809
      }
2810
2811
    private_srcw[0] = PRIVATE_DATA(cc);
2812
    if (recurse_check_bit(common, private_srcw[0]))
2813
      private_count = 1;
2814
2815
    offset = OVECTOR_PRIV(offset);
2816
    if (recurse_check_bit(common, offset))
2817
      {
2818
      private_srcw[private_count] = offset;
2819
      private_count++;
2820
      }
2821
    cc += 1 + LINK_SIZE + IMM2_SIZE;
2822
    break;
2823
2824
    case OP_COND:
2825
    /* Might be a hidden SCOND. */
2826
    alternative = cc + GET(cc, 1);
2827
    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2828
      {
2829
      private_srcw[0] = PRIVATE_DATA(cc);
2830
      if (recurse_check_bit(common, private_srcw[0]))
2831
        private_count = 1;
2832
      }
2833
    cc += 1 + LINK_SIZE;
2834
    break;
2835
2836
    CASE_ITERATOR_PRIVATE_DATA_1
2837
    private_srcw[0] = PRIVATE_DATA(cc);
2838
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2839
      private_count = 1;
2840
    cc += 2;
2841
#ifdef SUPPORT_UNICODE
2842
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2843
#endif
2844
    break;
2845
2846
    CASE_ITERATOR_PRIVATE_DATA_2A
2847
    private_srcw[0] = PRIVATE_DATA(cc);
2848
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2849
      {
2850
      private_count = 2;
2851
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2852
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2853
      }
2854
    cc += 2;
2855
#ifdef SUPPORT_UNICODE
2856
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2857
#endif
2858
    break;
2859
2860
    CASE_ITERATOR_PRIVATE_DATA_2B
2861
    private_srcw[0] = PRIVATE_DATA(cc);
2862
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2863
      {
2864
      private_count = 2;
2865
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2866
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2867
      }
2868
    cc += 2 + IMM2_SIZE;
2869
#ifdef SUPPORT_UNICODE
2870
    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2871
#endif
2872
    break;
2873
2874
    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2875
    private_srcw[0] = PRIVATE_DATA(cc);
2876
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2877
      private_count = 1;
2878
    cc += 1;
2879
    break;
2880
2881
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2882
    private_srcw[0] = PRIVATE_DATA(cc);
2883
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2884
      {
2885
      private_count = 2;
2886
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2887
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2888
      }
2889
    cc += 1;
2890
    break;
2891
2892
    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2893
    private_srcw[0] = PRIVATE_DATA(cc);
2894
    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2895
      {
2896
      private_count = 2;
2897
      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2898
      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2899
      }
2900
    cc += 1 + IMM2_SIZE;
2901
    break;
2902
2903
    case OP_CLASS:
2904
    case OP_NCLASS:
2905
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2906
    case OP_XCLASS:
2907
    i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2908
#else
2909
    i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2910
#endif
2911
    if (PRIVATE_DATA(cc) != 0)
2912
      {
2913
      private_count = 1;
2914
      private_srcw[0] = PRIVATE_DATA(cc);
2915
      switch(get_class_iterator_size(cc + i))
2916
        {
2917
        case 1:
2918
        break;
2919
2920
        case 2:
2921
        if (recurse_check_bit(common, private_srcw[0]))
2922
          {
2923
          private_count = 2;
2924
          private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2925
          SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2926
          }
2927
        break;
2928
2929
        default:
2930
        SLJIT_UNREACHABLE();
2931
        break;
2932
        }
2933
      }
2934
    cc += i;
2935
    break;
2936
2937
    case OP_MARK:
2938
    case OP_COMMIT_ARG:
2939
    case OP_PRUNE_ARG:
2940
    case OP_THEN_ARG:
2941
    SLJIT_ASSERT(common->mark_ptr != 0);
2942
    if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
2943
      {
2944
      kept_shared_srcw[0] = common->mark_ptr;
2945
      kept_shared_count = 1;
2946
      }
2947
    if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
2948
      {
2949
      private_srcw[0] = common->control_head_ptr;
2950
      private_count = 1;
2951
      }
2952
    cc += 1 + 2 + cc[1];
2953
    break;
2954
2955
    case OP_THEN:
2956
    SLJIT_ASSERT(common->control_head_ptr != 0);
2957
    if (recurse_check_bit(common, common->control_head_ptr))
2958
      {
2959
      private_srcw[0] = common->control_head_ptr;
2960
      private_count = 1;
2961
      }
2962
    cc++;
2963
    break;
2964
2965
    default:
2966
    cc = next_opcode(common, cc);
2967
    SLJIT_ASSERT(cc != NULL);
2968
    continue;
2969
    }
2970
2971
  if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2972
    {
2973
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2974
2975
    for (i = 0; i < private_count; i++)
2976
      {
2977
      SLJIT_ASSERT(private_srcw[i] != 0);
2978
2979
      if (!from_sp)
2980
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2981
2982
      if (from_sp || type == recurse_swap_global)
2983
        delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2984
2985
      stackptr += sizeof(sljit_sw);
2986
      }
2987
    }
2988
  else
2989
    stackptr += sizeof(sljit_sw) * private_count;
2990
2991
  if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2992
    {
2993
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2994
2995
    for (i = 0; i < shared_count; i++)
2996
      {
2997
      SLJIT_ASSERT(shared_srcw[i] != 0);
2998
2999
      if (!from_sp)
3000
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3001
3002
      if (from_sp || type == recurse_swap_global)
3003
        delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3004
3005
      stackptr += sizeof(sljit_sw);
3006
      }
3007
    }
3008
  else
3009
    stackptr += sizeof(sljit_sw) * shared_count;
3010
3011
  if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3012
    {
3013
    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3014
3015
    for (i = 0; i < kept_shared_count; i++)
3016
      {
3017
      SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3018
3019
      if (!from_sp)
3020
        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3021
3022
      if (from_sp || type == recurse_swap_global)
3023
        delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3024
3025
      stackptr += sizeof(sljit_sw);
3026
      }
3027
    }
3028
  else
3029
    stackptr += sizeof(sljit_sw) * kept_shared_count;
3030
  }
3031
3032
SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3033
3034
delayed_mem_copy_finish(&status);
3035
}
3036
3037
static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3038
{
3039
PCRE2_SPTR end = bracketend(cc);
3040
BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3041
3042
/* Assert captures then. */
3043
if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3044
  current_offset = NULL;
3045
/* Conditional block does not. */
3046
if (*cc == OP_COND || *cc == OP_SCOND)
3047
  has_alternatives = FALSE;
3048
3049
cc = next_opcode(common, cc);
3050
if (has_alternatives)
3051
  current_offset = common->then_offsets + (cc - common->start);
3052
3053
while (cc < end)
3054
  {
3055
  if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3056
    cc = set_then_offsets(common, cc, current_offset);
3057
  else
3058
    {
3059
    if (*cc == OP_ALT && has_alternatives)
3060
      current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
3061
    if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3062
      *current_offset = 1;
3063
    cc = next_opcode(common, cc);
3064
    }
3065
  }
3066
3067
return end;
3068
}
3069
3070
#undef CASE_ITERATOR_PRIVATE_DATA_1
3071
#undef CASE_ITERATOR_PRIVATE_DATA_2A
3072
#undef CASE_ITERATOR_PRIVATE_DATA_2B
3073
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3074
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3075
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3076
3077
static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3078
{
3079
return (value & (value - 1)) == 0;
3080
}
3081
3082
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3083
{
3084
while (list)
3085
  {
3086
  /* sljit_set_label is clever enough to do nothing
3087
  if either the jump or the label is NULL. */
3088
  SET_LABEL(list->jump, label);
3089
  list = list->next;
3090
  }
3091
}
3092
3093
static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3094
{
3095
jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3096
if (list_item)
3097
  {
3098
  list_item->next = *list;
3099
  list_item->jump = jump;
3100
  *list = list_item;
3101
  }
3102
}
3103
3104
static void add_stub(compiler_common *common, struct sljit_jump *start)
3105
{
3106
DEFINE_COMPILER;
3107
stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3108
3109
if (list_item)
3110
  {
3111
  list_item->start = start;
3112
  list_item->quit = LABEL();
3113
  list_item->next = common->stubs;
3114
  common->stubs = list_item;
3115
  }
3116
}
3117
3118
static void flush_stubs(compiler_common *common)
3119
{
3120
DEFINE_COMPILER;
3121
stub_list *list_item = common->stubs;
3122
3123
while (list_item)
3124
  {
3125
  JUMPHERE(list_item->start);
3126
  add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3127
  JUMPTO(SLJIT_JUMP, list_item->quit);
3128
  list_item = list_item->next;
3129
  }
3130
common->stubs = NULL;
3131
}
3132
3133
static SLJIT_INLINE void count_match(compiler_common *common)
3134
{
3135
DEFINE_COMPILER;
3136
3137
OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3138
add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3139
}
3140
3141
static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3142
{
3143
/* May destroy all locals and registers except TMP2. */
3144
DEFINE_COMPILER;
3145
3146
SLJIT_ASSERT(size > 0);
3147
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3148
#ifdef DESTROY_REGISTERS
3149
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3150
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3151
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3152
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3153
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3154
#endif
3155
add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3156
}
3157
3158
static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3159
{
3160
DEFINE_COMPILER;
3161
3162
SLJIT_ASSERT(size > 0);
3163
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3164
}
3165
3166
static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3167
{
3168
DEFINE_COMPILER;
3169
sljit_uw *result;
3170
3171
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3172
  return NULL;
3173
3174
result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3175
if (SLJIT_UNLIKELY(result == NULL))
3176
  {
3177
  sljit_set_compiler_memory_error(compiler);
3178
  return NULL;
3179
  }
3180
3181
*(void**)result = common->read_only_data_head;
3182
common->read_only_data_head = (void *)result;
3183
return result + 1;
3184
}
3185
3186
static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3187
{
3188
DEFINE_COMPILER;
3189
struct sljit_label *loop;
3190
sljit_s32 i;
3191
3192
/* At this point we can freely use all temporary registers. */
3193
SLJIT_ASSERT(length > 1);
3194
/* TMP1 returns with begin - 1. */
3195
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3196
if (length < 8)
3197
  {
3198
  for (i = 1; i < length; i++)
3199
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3200
  }
3201
else
3202
  {
3203
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3204
    {
3205
    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3206
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3207
    loop = LABEL();
3208
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3209
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3210
    JUMPTO(SLJIT_NOT_ZERO, loop);
3211
    }
3212
  else
3213
    {
3214
    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3215
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3216
    loop = LABEL();
3217
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3218
    OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3219
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3220
    JUMPTO(SLJIT_NOT_ZERO, loop);
3221
    }
3222
  }
3223
}
3224
3225
static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3226
{
3227
DEFINE_COMPILER;
3228
sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3229
sljit_u32 uncleared_size;
3230
sljit_s32 src = SLJIT_IMM;
3231
sljit_s32 i;
3232
struct sljit_label *loop;
3233
3234
SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3235
3236
if (size == sizeof(sljit_sw))
3237
  {
3238
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3239
  return;
3240
  }
3241
3242
if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3243
  {
3244
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3245
  src = TMP3;
3246
  }
3247
3248
if (size <= 6 * sizeof(sljit_sw))
3249
  {
3250
  for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3251
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3252
  return;
3253
  }
3254
3255
GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3256
3257
uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3258
3259
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3260
3261
loop = LABEL();
3262
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3263
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3264
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
3265
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
3266
CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3267
3268
if (uncleared_size >= sizeof(sljit_sw))
3269
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3270
3271
if (uncleared_size >= 2 * sizeof(sljit_sw))
3272
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3273
}
3274
3275
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3276
{
3277
DEFINE_COMPILER;
3278
struct sljit_label *loop;
3279
int i;
3280
3281
SLJIT_ASSERT(length > 1);
3282
/* OVECTOR(1) contains the "string begin - 1" constant. */
3283
if (length > 2)
3284
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3285
if (length < 8)
3286
  {
3287
  for (i = 2; i < length; i++)
3288
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3289
  }
3290
else
3291
  {
3292
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3293
    {
3294
    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3295
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3296
    loop = LABEL();
3297
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3298
    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3299
    JUMPTO(SLJIT_NOT_ZERO, loop);
3300
    }
3301
  else
3302
    {
3303
    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3304
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3305
    loop = LABEL();
3306
    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3307
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3308
    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3309
    JUMPTO(SLJIT_NOT_ZERO, loop);
3310
    }
3311
  }
3312
3313
if (!HAS_VIRTUAL_REGISTERS)
3314
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3315
else
3316
  OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3317
3318
if (common->mark_ptr != 0)
3319
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3320
if (common->control_head_ptr != 0)
3321
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3322
if (HAS_VIRTUAL_REGISTERS)
3323
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3324
3325
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3326
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3327
}
3328
3329
static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3330
{
3331
while (current != NULL)
3332
  {
3333
  switch (current[1])
3334
    {
3335
    case type_then_trap:
3336
    break;
3337
3338
    case type_mark:
3339
    if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3340
      return current[3];
3341
    break;
3342
3343
    default:
3344
    SLJIT_UNREACHABLE();
3345
    break;
3346
    }
3347
  SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3348
  current = (sljit_sw*)current[0];
3349
  }
3350
return 0;
3351
}
3352
3353
static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3354
{
3355
DEFINE_COMPILER;
3356
struct sljit_label *loop;
3357
BOOL has_pre;
3358
3359
/* At this point we can freely use all registers. */
3360
OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3361
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3362
3363
if (HAS_VIRTUAL_REGISTERS)
3364
  {
3365
  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3366
  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3367
  if (common->mark_ptr != 0)
3368
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3369
  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3370
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3371
  if (common->mark_ptr != 0)
3372
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3373
  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3374
    SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3375
  }
3376
else
3377
  {
3378
  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3379
  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3380
  if (common->mark_ptr != 0)
3381
    OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3382
  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3383
  OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3384
  if (common->mark_ptr != 0)
3385
    OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3386
  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3387
  }
3388
3389
has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3390
3391
GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3392
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3393
3394
loop = LABEL();
3395
3396
if (has_pre)
3397
  sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3398
else
3399
  {
3400
  OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3401
  OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3402
  }
3403
3404
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3405
OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3406
/* Copy the integer value to the output buffer */
3407
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3408
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3409
#endif
3410
3411
SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3412
OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3413
3414
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3415
JUMPTO(SLJIT_NOT_ZERO, loop);
3416
3417
/* Calculate the return value, which is the maximum ovector value. */
3418
if (topbracket > 1)
3419
  {
3420
  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
3421
    {
3422
    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3423
    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3424
3425
    /* OVECTOR(0) is never equal to SLJIT_S2. */
3426
    loop = LABEL();
3427
    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
3428
    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3429
    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3430
    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3431
    }
3432
  else
3433
    {
3434
    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3435
    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3436
3437
    /* OVECTOR(0) is never equal to SLJIT_S2. */
3438
    loop = LABEL();
3439
    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3440
    OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
3441
    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3442
    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3443
    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3444
    }
3445
  }
3446
else
3447
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3448
}
3449
3450
static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3451
{
3452
DEFINE_COMPILER;
3453
sljit_s32 mov_opcode;
3454
sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3455
3456
SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3457
SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3458
  && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3459
3460
if (arguments_reg != ARGUMENTS)
3461
  OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3462
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3463
  common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3464
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3465
3466
/* Store match begin and end. */
3467
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3468
OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3469
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3470
3471
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3472
3473
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3474
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3475
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3476
#endif
3477
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3478
3479
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3480
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3481
OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3482
#endif
3483
OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3484
3485
JUMPTO(SLJIT_JUMP, quit);
3486
}
3487
3488
static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3489
{
3490
/* May destroy TMP1. */
3491
DEFINE_COMPILER;
3492
struct sljit_jump *jump;
3493
3494
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3495
  {
3496
  /* The value of -1 must be kept for start_used_ptr! */
3497
  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3498
  /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3499
  is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3500
  jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3501
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3502
  JUMPHERE(jump);
3503
  }
3504
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3505
  {
3506
  jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3507
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3508
  JUMPHERE(jump);
3509
  }
3510
}
3511
3512
static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3513
{
3514
/* Detects if the character has an othercase. */
3515
unsigned int c;
3516
3517
#ifdef SUPPORT_UNICODE
3518
if (common->utf || common->ucp)
3519
  {
3520
  if (common->utf)
3521
    {
3522
    GETCHAR(c, cc);
3523
    }
3524
  else
3525
    c = *cc;
3526
3527
  if (c > 127)
3528
    return c != UCD_OTHERCASE(c);
3529
3530
  return common->fcc[c] != c;
3531
  }
3532
else
3533
#endif
3534
  c = *cc;
3535
return MAX_255(c) ? common->fcc[c] != c : FALSE;
3536
}
3537
3538
static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3539
{
3540
/* Returns with the othercase. */
3541
#ifdef SUPPORT_UNICODE
3542
if ((common->utf || common->ucp) && c > 127)
3543
  return UCD_OTHERCASE(c);
3544
#endif
3545
return TABLE_GET(c, common->fcc, c);
3546
}
3547
3548
static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3549
{
3550
/* Detects if the character and its othercase has only 1 bit difference. */
3551
unsigned int c, oc, bit;
3552
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3553
int n;
3554
#endif
3555
3556
#ifdef SUPPORT_UNICODE
3557
if (common->utf || common->ucp)
3558
  {
3559
  if (common->utf)
3560
    {
3561
    GETCHAR(c, cc);
3562
    }
3563
  else
3564
    c = *cc;
3565
3566
  if (c <= 127)
3567
    oc = common->fcc[c];
3568
  else
3569
    oc = UCD_OTHERCASE(c);
3570
  }
3571
else
3572
  {
3573
  c = *cc;
3574
  oc = TABLE_GET(c, common->fcc, c);
3575
  }
3576
#else
3577
c = *cc;
3578
oc = TABLE_GET(c, common->fcc, c);
3579
#endif
3580
3581
SLJIT_ASSERT(c != oc);
3582
3583
bit = c ^ oc;
3584
/* Optimized for English alphabet. */
3585
if (c <= 127 && bit == 0x20)
3586
  return (0 << 8) | 0x20;
3587
3588
/* Since c != oc, they must have at least 1 bit difference. */
3589
if (!is_powerof2(bit))
3590
  return 0;
3591
3592
#if PCRE2_CODE_UNIT_WIDTH == 8
3593
3594
#ifdef SUPPORT_UNICODE
3595
if (common->utf && c > 127)
3596
  {
3597
  n = GET_EXTRALEN(*cc);
3598
  while ((bit & 0x3f) == 0)
3599
    {
3600
    n--;
3601
    bit >>= 6;
3602
    }
3603
  return (n << 8) | bit;
3604
  }
3605
#endif /* SUPPORT_UNICODE */
3606
return (0 << 8) | bit;
3607
3608
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3609
3610
#ifdef SUPPORT_UNICODE
3611
if (common->utf && c > 65535)
3612
  {
3613
  if (bit >= (1u << 10))
3614
    bit >>= 10;
3615
  else
3616
    return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3617
  }
3618
#endif /* SUPPORT_UNICODE */
3619
return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3620
3621
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3622
}
3623
3624
static void check_partial(compiler_common *common, BOOL force)
3625
{
3626
/* Checks whether a partial matching is occurred. Does not modify registers. */
3627
DEFINE_COMPILER;
3628
struct sljit_jump *jump = NULL;
3629
3630
SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3631
3632
if (common->mode == PCRE2_JIT_COMPLETE)
3633
  return;
3634
3635
if (!force && !common->allow_empty_partial)
3636
  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3637
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3638
  jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3639
3640
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3641
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3642
else
3643
  {
3644
  if (common->partialmatchlabel != NULL)
3645
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3646
  else
3647
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3648
  }
3649
3650
if (jump != NULL)
3651
  JUMPHERE(jump);
3652
}
3653
3654
static void check_str_end(compiler_common *common, jump_list **end_reached)
3655
{
3656
/* Does not affect registers. Usually used in a tight spot. */
3657
DEFINE_COMPILER;
3658
struct sljit_jump *jump;
3659
3660
if (common->mode == PCRE2_JIT_COMPLETE)
3661
  {
3662
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3663
  return;
3664
  }
3665
3666
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3667
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3668
  {
3669
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3670
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3671
  add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3672
  }
3673
else
3674
  {
3675
  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3676
  if (common->partialmatchlabel != NULL)
3677
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3678
  else
3679
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3680
  }
3681
JUMPHERE(jump);
3682
}
3683
3684
static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3685
{
3686
DEFINE_COMPILER;
3687
struct sljit_jump *jump;
3688
3689
if (common->mode == PCRE2_JIT_COMPLETE)
3690
  {
3691
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3692
  return;
3693
  }
3694
3695
/* Partial matching mode. */
3696
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3697
if (!common->allow_empty_partial)
3698
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3699
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3700
  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3701
3702
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3703
  {
3704
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3705
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3706
  }
3707
else
3708
  {
3709
  if (common->partialmatchlabel != NULL)
3710
    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3711
  else
3712
    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3713
  }
3714
JUMPHERE(jump);
3715
}
3716
3717
static void process_partial_match(compiler_common *common)
3718
{
3719
DEFINE_COMPILER;
3720
struct sljit_jump *jump;
3721
3722
/* Partial matching mode. */
3723
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3724
  {
3725
  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3726
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3727
  JUMPHERE(jump);
3728
  }
3729
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3730
  {
3731
  if (common->partialmatchlabel != NULL)
3732
    CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3733
  else
3734
    add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3735
  }
3736
}
3737
3738
static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3739
{
3740
DEFINE_COMPILER;
3741
3742
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3743
process_partial_match(common);
3744
}
3745
3746
static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3747
{
3748
/* Reads the character into TMP1, keeps STR_PTR.
3749
Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3750
DEFINE_COMPILER;
3751
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3752
struct sljit_jump *jump;
3753
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3754
3755
SLJIT_UNUSED_ARG(max);
3756
SLJIT_UNUSED_ARG(dst);
3757
SLJIT_UNUSED_ARG(dstw);
3758
SLJIT_UNUSED_ARG(backtracks);
3759
3760
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3761
3762
#ifdef SUPPORT_UNICODE
3763
#if PCRE2_CODE_UNIT_WIDTH == 8
3764
if (common->utf)
3765
  {
3766
  if (max < 128) return;
3767
3768
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3769
  OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3770
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3771
  add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3772
  OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3773
  if (backtracks && common->invalid_utf)
3774
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3775
  JUMPHERE(jump);
3776
  }
3777
#elif PCRE2_CODE_UNIT_WIDTH == 16
3778
if (common->utf)
3779
  {
3780
  if (max < 0xd800) return;
3781
3782
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3783
3784
  if (common->invalid_utf)
3785
    {
3786
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3787
    OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3788
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3789
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3790
    OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3791
    if (backtracks && common->invalid_utf)
3792
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3793
    }
3794
  else
3795
    {
3796
    /* TMP2 contains the high surrogate. */
3797
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3798
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3799
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3800
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3801
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3802
    }
3803
3804
  JUMPHERE(jump);
3805
  }
3806
#elif PCRE2_CODE_UNIT_WIDTH == 32
3807
if (common->invalid_utf)
3808
  {
3809
  if (max < 0xd800) return;
3810
3811
  if (backtracks != NULL)
3812
    {
3813
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3814
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3815
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3816
    }
3817
  else
3818
    {
3819
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3820
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
3821
    CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3822
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3823
    CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3824
    }
3825
  }
3826
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3827
#endif /* SUPPORT_UNICODE */
3828
}
3829
3830
static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3831
{
3832
/* Reads one character back without moving STR_PTR. TMP2 must
3833
contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3834
DEFINE_COMPILER;
3835
3836
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3837
struct sljit_jump *jump;
3838
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3839
3840
SLJIT_UNUSED_ARG(max);
3841
SLJIT_UNUSED_ARG(backtracks);
3842
3843
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3844
3845
#ifdef SUPPORT_UNICODE
3846
#if PCRE2_CODE_UNIT_WIDTH == 8
3847
if (common->utf)
3848
  {
3849
  if (max < 128) return;
3850
3851
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3852
  if (common->invalid_utf)
3853
    {
3854
    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3855
    if (backtracks != NULL)
3856
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3857
    }
3858
  else
3859
    add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3860
  JUMPHERE(jump);
3861
  }
3862
#elif PCRE2_CODE_UNIT_WIDTH == 16
3863
if (common->utf)
3864
  {
3865
  if (max < 0xd800) return;
3866
3867
  if (common->invalid_utf)
3868
    {
3869
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3870
    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3871
    if (backtracks != NULL)
3872
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3873
    }
3874
  else
3875
    {
3876
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3877
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3878
    /* TMP2 contains the low surrogate. */
3879
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3880
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3881
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3882
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3883
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3884
    }
3885
    JUMPHERE(jump);
3886
  }
3887
#elif PCRE2_CODE_UNIT_WIDTH == 32
3888
if (common->invalid_utf)
3889
  {
3890
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3891
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3892
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3893
  }
3894
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3895
#endif /* SUPPORT_UNICODE */
3896
}
3897
3898
#define READ_CHAR_UPDATE_STR_PTR 0x1
3899
#define READ_CHAR_UTF8_NEWLINE 0x2
3900
#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3901
#define READ_CHAR_VALID_UTF 0x4
3902
3903
static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3904
  jump_list **backtracks, sljit_u32 options)
3905
{
3906
/* Reads the precise value of a character into TMP1, if the character is
3907
between min and max (c >= min && c <= max). Otherwise it returns with a value
3908
outside the range. Does not check STR_END. */
3909
DEFINE_COMPILER;
3910
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3911
struct sljit_jump *jump;
3912
#endif
3913
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3914
struct sljit_jump *jump2;
3915
#endif
3916
3917
SLJIT_UNUSED_ARG(min);
3918
SLJIT_UNUSED_ARG(max);
3919
SLJIT_UNUSED_ARG(backtracks);
3920
SLJIT_UNUSED_ARG(options);
3921
SLJIT_ASSERT(min <= max);
3922
3923
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3924
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3925
3926
#ifdef SUPPORT_UNICODE
3927
#if PCRE2_CODE_UNIT_WIDTH == 8
3928
if (common->utf)
3929
  {
3930
  if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3931
3932
  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3933
    {
3934
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3935
3936
    if (options & READ_CHAR_UTF8_NEWLINE)
3937
      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3938
    else
3939
      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3940
3941
    if (backtracks != NULL)
3942
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3943
    JUMPHERE(jump);
3944
    return;
3945
    }
3946
3947
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3948
  if (min >= 0x10000)
3949
    {
3950
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3951
    if (options & READ_CHAR_UPDATE_STR_PTR)
3952
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3953
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3954
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3955
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3956
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3957
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3958
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3959
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3960
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3961
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3962
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3963
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
3964
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3965
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3966
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3967
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3968
    JUMPHERE(jump2);
3969
    if (options & READ_CHAR_UPDATE_STR_PTR)
3970
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3971
    }
3972
  else if (min >= 0x800 && max <= 0xffff)
3973
    {
3974
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3975
    if (options & READ_CHAR_UPDATE_STR_PTR)
3976
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3977
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3978
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3979
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3980
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3981
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3982
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3983
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
3984
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3985
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3986
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3987
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3988
    JUMPHERE(jump2);
3989
    if (options & READ_CHAR_UPDATE_STR_PTR)
3990
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3991
    }
3992
  else if (max >= 0x800)
3993
    {
3994
    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3995
    }
3996
  else if (max < 128)
3997
    {
3998
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3999
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4000
    }
4001
  else
4002
    {
4003
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4004
    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4005
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4006
    else
4007
      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4008
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4009
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4010
    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4011
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4012
    if (options & READ_CHAR_UPDATE_STR_PTR)
4013
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4014
    }
4015
  JUMPHERE(jump);
4016
  }
4017
#elif PCRE2_CODE_UNIT_WIDTH == 16
4018
if (common->utf)
4019
  {
4020
  if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4021
4022
  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4023
    {
4024
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4025
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4026
4027
    if (options & READ_CHAR_UTF8_NEWLINE)
4028
      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4029
    else
4030
      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4031
4032
    if (backtracks != NULL)
4033
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4034
    JUMPHERE(jump);
4035
    return;
4036
    }
4037
4038
  if (max >= 0x10000)
4039
    {
4040
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4041
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4042
    /* TMP2 contains the high surrogate. */
4043
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4044
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4045
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4046
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4047
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4048
    JUMPHERE(jump);
4049
    return;
4050
    }
4051
4052
  /* Skip low surrogate if necessary. */
4053
  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4054
4055
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4056
    {
4057
    if (options & READ_CHAR_UPDATE_STR_PTR)
4058
      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4059
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4060
    if (options & READ_CHAR_UPDATE_STR_PTR)
4061
      CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4062
    if (max >= 0xd800)
4063
      CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
4064
    }
4065
  else
4066
    {
4067
    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4068
    if (options & READ_CHAR_UPDATE_STR_PTR)
4069
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4070
    if (max >= 0xd800)
4071
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4072
    JUMPHERE(jump);
4073
    }
4074
  }
4075
#elif PCRE2_CODE_UNIT_WIDTH == 32
4076
if (common->invalid_utf)
4077
  {
4078
  if (backtracks != NULL)
4079
    {
4080
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4081
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4082
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4083
    }
4084
  else
4085
    {
4086
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4087
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4088
    CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4089
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4090
    CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4091
    }
4092
  }
4093
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4094
#endif /* SUPPORT_UNICODE */
4095
}
4096
4097
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4098
4099
static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4100
{
4101
/* Tells whether the character codes below 128 are enough
4102
to determine a match. */
4103
const sljit_u8 value = nclass ? 0xff : 0;
4104
const sljit_u8 *end = bitset + 32;
4105
4106
bitset += 16;
4107
do
4108
  {
4109
  if (*bitset++ != value)
4110
    return FALSE;
4111
  }
4112
while (bitset < end);
4113
return TRUE;
4114
}
4115
4116
static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4117
{
4118
/* Reads the precise character type of a character into TMP1, if the character
4119
is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4120
full_read argument tells whether characters above max are accepted or not. */
4121
DEFINE_COMPILER;
4122
struct sljit_jump *jump;
4123
4124
SLJIT_ASSERT(common->utf);
4125
4126
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4127
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4128
4129
/* All values > 127 are zero in ctypes. */
4130
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4131
4132
if (negated)
4133
  {
4134
  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4135
4136
  if (common->invalid_utf)
4137
    {
4138
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4139
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4140
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4141
    }
4142
  else
4143
    {
4144
    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4145
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4146
    }
4147
  JUMPHERE(jump);
4148
  }
4149
}
4150
4151
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4152
4153
static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4154
{
4155
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4156
DEFINE_COMPILER;
4157
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4158
struct sljit_jump *jump;
4159
#endif
4160
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4161
struct sljit_jump *jump2;
4162
#endif
4163
4164
SLJIT_UNUSED_ARG(backtracks);
4165
SLJIT_UNUSED_ARG(negated);
4166
4167
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4168
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4169
4170
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4171
if (common->utf)
4172
  {
4173
  /* The result of this read may be unused, but saves an "else" part. */
4174
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4175
  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4176
4177
  if (!negated)
4178
    {
4179
    if (common->invalid_utf)
4180
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4181
4182
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4183
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4184
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4185
    if (common->invalid_utf)
4186
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4187
4188
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4189
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4190
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4191
    if (common->invalid_utf)
4192
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4193
4194
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4195
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4196
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4197
    JUMPHERE(jump2);
4198
    }
4199
  else if (common->invalid_utf)
4200
    {
4201
    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4202
    OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4203
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4204
4205
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4206
    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4207
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4208
    JUMPHERE(jump2);
4209
    }
4210
  else
4211
    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4212
4213
  JUMPHERE(jump);
4214
  return;
4215
  }
4216
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4217
4218
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4219
if (common->invalid_utf && negated)
4220
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4221
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4222
4223
#if PCRE2_CODE_UNIT_WIDTH != 8
4224
/* The ctypes array contains only 256 values. */
4225
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4226
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4227
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4228
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4229
#if PCRE2_CODE_UNIT_WIDTH != 8
4230
JUMPHERE(jump);
4231
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4232
4233
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4234
if (common->utf && negated)
4235
  {
4236
  /* Skip low surrogate if necessary. */
4237
  if (!common->invalid_utf)
4238
    {
4239
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4240
4241
    if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4242
      {
4243
      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4244
      OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4245
      CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4246
      }
4247
    else
4248
      {
4249
      jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4250
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4251
      JUMPHERE(jump);
4252
      }
4253
    return;
4254
    }
4255
4256
  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4257
  jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4258
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4259
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4260
4261
  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4262
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4263
  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4264
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4265
4266
  JUMPHERE(jump);
4267
  return;
4268
  }
4269
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4270
}
4271
4272
static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4273
{
4274
/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4275
TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4276
and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4277
DEFINE_COMPILER;
4278
4279
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4280
struct sljit_jump *jump;
4281
#endif
4282
4283
#ifdef SUPPORT_UNICODE
4284
#if PCRE2_CODE_UNIT_WIDTH == 8
4285
struct sljit_label *label;
4286
4287
if (common->utf)
4288
  {
4289
  if (!must_be_valid && common->invalid_utf)
4290
    {
4291
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4292
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4293
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4294
    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4295
    if (backtracks != NULL)
4296
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4297
    JUMPHERE(jump);
4298
    return;
4299
    }
4300
4301
  label = LABEL();
4302
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4303
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4304
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4305
  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4306
  return;
4307
  }
4308
#elif PCRE2_CODE_UNIT_WIDTH == 16
4309
if (common->utf)
4310
  {
4311
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4312
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4313
4314
  if (!must_be_valid && common->invalid_utf)
4315
    {
4316
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4317
    jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4318
    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4319
    if (backtracks != NULL)
4320
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4321
    JUMPHERE(jump);
4322
    return;
4323
    }
4324
4325
  /* Skip low surrogate if necessary. */
4326
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4327
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4328
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4329
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4330
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4331
  return;
4332
  }
4333
#elif PCRE2_CODE_UNIT_WIDTH == 32
4334
if (common->invalid_utf && !must_be_valid)
4335
  {
4336
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4337
  if (backtracks != NULL)
4338
    {
4339
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4340
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4341
    return;
4342
    }
4343
4344
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4345
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4346
  OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4347
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4348
  return;
4349
  }
4350
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4351
#endif /* SUPPORT_UNICODE */
4352
4353
SLJIT_UNUSED_ARG(backtracks);
4354
SLJIT_UNUSED_ARG(must_be_valid);
4355
4356
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4357
}
4358
4359
static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4360
{
4361
/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4362
DEFINE_COMPILER;
4363
struct sljit_jump *jump;
4364
4365
if (nltype == NLTYPE_ANY)
4366
  {
4367
  add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4368
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
4369
  add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4370
  }
4371
else if (nltype == NLTYPE_ANYCRLF)
4372
  {
4373
  if (jumpifmatch)
4374
    {
4375
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4376
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4377
    }
4378
  else
4379
    {
4380
    jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4381
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4382
    JUMPHERE(jump);
4383
    }
4384
  }
4385
else
4386
  {
4387
  SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4388
  add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4389
  }
4390
}
4391
4392
#ifdef SUPPORT_UNICODE
4393
4394
#if PCRE2_CODE_UNIT_WIDTH == 8
4395
static void do_utfreadchar(compiler_common *common)
4396
{
4397
/* Fast decoding a UTF-8 character. TMP1 contains the first byte
4398
of the character (>= 0xc0). Return char value in TMP1. */
4399
DEFINE_COMPILER;
4400
struct sljit_jump *jump;
4401
4402
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4403
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4404
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4405
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4406
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4407
4408
/* Searching for the first zero. */
4409
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4410
jump = JUMP(SLJIT_NOT_ZERO);
4411
/* Two byte sequence. */
4412
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4413
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4414
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4415
4416
JUMPHERE(jump);
4417
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4418
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4419
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4420
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4421
4422
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4423
jump = JUMP(SLJIT_NOT_ZERO);
4424
/* Three byte sequence. */
4425
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4426
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4427
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4428
4429
/* Four byte sequence. */
4430
JUMPHERE(jump);
4431
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4432
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4433
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4434
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4435
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4436
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4437
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4438
}
4439
4440
static void do_utfreadtype8(compiler_common *common)
4441
{
4442
/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4443
of the character (>= 0xc0). Return value in TMP1. */
4444
DEFINE_COMPILER;
4445
struct sljit_jump *jump;
4446
struct sljit_jump *compare;
4447
4448
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4449
4450
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4451
jump = JUMP(SLJIT_NOT_ZERO);
4452
/* Two byte sequence. */
4453
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4454
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4455
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4456
/* The upper 5 bits are known at this point. */
4457
compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4458
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4459
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4460
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4461
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4462
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4463
4464
JUMPHERE(compare);
4465
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4466
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4467
4468
/* We only have types for characters less than 256. */
4469
JUMPHERE(jump);
4470
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4471
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4472
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4473
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4474
}
4475
4476
static void do_utfreadchar_invalid(compiler_common *common)
4477
{
4478
/* Slow decoding a UTF-8 character. TMP1 contains the first byte
4479
of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4480
undefined for invalid characters. */
4481
DEFINE_COMPILER;
4482
sljit_s32 i;
4483
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4484
struct sljit_jump *jump;
4485
struct sljit_jump *buffer_end_close;
4486
struct sljit_label *three_byte_entry;
4487
struct sljit_label *exit_invalid_label;
4488
struct sljit_jump *exit_invalid[11];
4489
4490
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4491
4492
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4493
4494
/* Usually more than 3 characters remained in the subject buffer. */
4495
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4496
4497
/* Not a valid start of a multi-byte sequence, no more bytes read. */
4498
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4499
4500
buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4501
4502
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4503
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4504
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4505
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4506
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4507
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4508
4509
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4510
jump = JUMP(SLJIT_NOT_ZERO);
4511
4512
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4513
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4514
4515
JUMPHERE(jump);
4516
4517
/* Three-byte sequence. */
4518
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4519
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4520
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4521
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4522
if (has_cmov)
4523
  {
4524
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4525
  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4526
  exit_invalid[2] = NULL;
4527
  }
4528
else
4529
  exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4530
4531
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4532
jump = JUMP(SLJIT_NOT_ZERO);
4533
4534
three_byte_entry = LABEL();
4535
4536
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4537
if (has_cmov)
4538
  {
4539
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4540
  CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4541
  exit_invalid[3] = NULL;
4542
  }
4543
else
4544
  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4545
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4546
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4547
4548
if (has_cmov)
4549
  {
4550
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4551
  CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4552
  exit_invalid[4] = NULL;
4553
  }
4554
else
4555
  exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4556
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4557
4558
JUMPHERE(jump);
4559
4560
/* Four-byte sequence. */
4561
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4562
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4563
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4564
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4565
if (has_cmov)
4566
  {
4567
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4568
  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4569
  exit_invalid[5] = NULL;
4570
  }
4571
else
4572
  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4573
4574
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4575
if (has_cmov)
4576
  {
4577
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4578
  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4579
  exit_invalid[6] = NULL;
4580
  }
4581
else
4582
  exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4583
4584
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4585
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4586
4587
JUMPHERE(buffer_end_close);
4588
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4589
exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4590
4591
/* Two-byte sequence. */
4592
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4593
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4594
/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4595
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4596
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4597
exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4598
4599
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4600
jump = JUMP(SLJIT_NOT_ZERO);
4601
4602
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4603
4604
/* Three-byte sequence. */
4605
JUMPHERE(jump);
4606
exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4607
4608
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4609
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4610
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4611
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4612
if (has_cmov)
4613
  {
4614
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4615
  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4616
  exit_invalid[10] = NULL;
4617
  }
4618
else
4619
  exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4620
4621
/* One will be substracted from STR_PTR later. */
4622
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4623
4624
/* Four byte sequences are not possible. */
4625
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4626
4627
exit_invalid_label = LABEL();
4628
for (i = 0; i < 11; i++)
4629
  sljit_set_label(exit_invalid[i], exit_invalid_label);
4630
4631
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4632
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4633
}
4634
4635
static void do_utfreadnewline_invalid(compiler_common *common)
4636
{
4637
/* Slow decoding a UTF-8 character, specialized for newlines.
4638
TMP1 contains the first byte of the character (>= 0xc0). Return
4639
char value in TMP1. */
4640
DEFINE_COMPILER;
4641
struct sljit_label *loop;
4642
struct sljit_label *skip_start;
4643
struct sljit_label *three_byte_exit;
4644
struct sljit_jump *jump[5];
4645
4646
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4647
4648
if (common->nltype != NLTYPE_ANY)
4649
  {
4650
  SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4651
4652
  /* All newlines are ascii, just skip intermediate octets. */
4653
  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4654
  loop = LABEL();
4655
  if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4656
    sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4657
  else
4658
    {
4659
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4660
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4661
    }
4662
4663
  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4664
  CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4665
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4666
4667
  JUMPHERE(jump[0]);
4668
4669
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4670
  OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4671
  return;
4672
  }
4673
4674
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4675
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4676
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4677
4678
jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4679
jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4680
4681
skip_start = LABEL();
4682
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4683
jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4684
4685
/* Skip intermediate octets. */
4686
loop = LABEL();
4687
jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4688
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4689
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4690
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4691
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4692
4693
JUMPHERE(jump[3]);
4694
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4695
4696
three_byte_exit = LABEL();
4697
JUMPHERE(jump[0]);
4698
JUMPHERE(jump[4]);
4699
4700
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4701
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4702
4703
/* Two byte long newline: 0x85. */
4704
JUMPHERE(jump[1]);
4705
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4706
4707
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4708
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4709
4710
/* Three byte long newlines: 0x2028 and 0x2029. */
4711
JUMPHERE(jump[2]);
4712
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4713
CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4714
4715
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4716
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4717
4718
OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4719
CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4720
4721
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4722
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4723
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4724
}
4725
4726
static void do_utfmoveback_invalid(compiler_common *common)
4727
{
4728
/* Goes one character back. */
4729
DEFINE_COMPILER;
4730
sljit_s32 i;
4731
struct sljit_jump *jump;
4732
struct sljit_jump *buffer_start_close;
4733
struct sljit_label *exit_ok_label;
4734
struct sljit_label *exit_invalid_label;
4735
struct sljit_jump *exit_invalid[7];
4736
4737
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4738
4739
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4740
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4741
4742
/* Two-byte sequence. */
4743
buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4744
4745
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4746
4747
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4748
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4749
4750
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4751
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4752
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4753
4754
/* Three-byte sequence. */
4755
JUMPHERE(jump);
4756
exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4757
4758
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4759
4760
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4761
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4762
4763
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4764
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4765
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4766
4767
/* Four-byte sequence. */
4768
JUMPHERE(jump);
4769
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4770
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4771
4772
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4773
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4774
exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4775
4776
exit_ok_label = LABEL();
4777
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4778
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4779
4780
/* Two-byte sequence. */
4781
JUMPHERE(buffer_start_close);
4782
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4783
4784
exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4785
4786
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4787
4788
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4789
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4790
4791
/* Three-byte sequence. */
4792
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4793
exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4794
exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4795
4796
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4797
4798
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4799
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4800
4801
/* Four-byte sequences are not possible. */
4802
4803
exit_invalid_label = LABEL();
4804
sljit_set_label(exit_invalid[5], exit_invalid_label);
4805
sljit_set_label(exit_invalid[6], exit_invalid_label);
4806
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4807
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4808
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4809
4810
JUMPHERE(exit_invalid[4]);
4811
/* -2 + 4 = 2 */
4812
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4813
4814
exit_invalid_label = LABEL();
4815
for (i = 0; i < 4; i++)
4816
  sljit_set_label(exit_invalid[i], exit_invalid_label);
4817
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4818
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4819
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4820
}
4821
4822
static void do_utfpeakcharback(compiler_common *common)
4823
{
4824
/* Peak a character back. Does not modify STR_PTR. */
4825
DEFINE_COMPILER;
4826
struct sljit_jump *jump[2];
4827
4828
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4829
4830
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4831
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4832
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4833
4834
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4835
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4836
jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4837
4838
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4839
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4840
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4841
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4842
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4843
4844
JUMPHERE(jump[1]);
4845
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4846
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4847
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4848
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4849
4850
JUMPHERE(jump[0]);
4851
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4852
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4853
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4854
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4855
4856
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4857
}
4858
4859
static void do_utfpeakcharback_invalid(compiler_common *common)
4860
{
4861
/* Peak a character back. Does not modify STR_PTR. */
4862
DEFINE_COMPILER;
4863
sljit_s32 i;
4864
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4865
struct sljit_jump *jump[2];
4866
struct sljit_label *two_byte_entry;
4867
struct sljit_label *three_byte_entry;
4868
struct sljit_label *exit_invalid_label;
4869
struct sljit_jump *exit_invalid[8];
4870
4871
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4872
4873
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4874
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4875
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4876
4877
/* Two-byte sequence. */
4878
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4879
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4880
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4881
4882
two_byte_entry = LABEL();
4883
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4884
/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4885
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4886
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4887
4888
JUMPHERE(jump[1]);
4889
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4890
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4891
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4892
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4893
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4894
4895
/* Three-byte sequence. */
4896
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4897
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4898
jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4899
4900
three_byte_entry = LABEL();
4901
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4902
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4903
4904
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4905
if (has_cmov)
4906
  {
4907
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4908
  CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4909
  exit_invalid[2] = NULL;
4910
  }
4911
else
4912
  exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4913
4914
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4915
if (has_cmov)
4916
  {
4917
  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4918
  CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4919
  exit_invalid[3] = NULL;
4920
  }
4921
else
4922
  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4923
4924
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4925
4926
JUMPHERE(jump[1]);
4927
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4928
exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4929
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4930
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4931
4932
/* Four-byte sequence. */
4933
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4934
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4935
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4936
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4937
/* ADD is used instead of OR because of the SUB 0x10000 above. */
4938
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4939
4940
if (has_cmov)
4941
  {
4942
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4943
  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4944
  exit_invalid[5] = NULL;
4945
  }
4946
else
4947
  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4948
4949
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4950
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4951
4952
JUMPHERE(jump[0]);
4953
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4954
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4955
4956
/* Two-byte sequence. */
4957
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4958
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4959
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4960
4961
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4962
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4963
exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4964
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4965
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4966
4967
/* Three-byte sequence. */
4968
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4969
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4970
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4971
4972
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4973
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4974
4975
JUMPHERE(jump[0]);
4976
exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4977
4978
/* Two-byte sequence. */
4979
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4980
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4981
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4982
4983
exit_invalid_label = LABEL();
4984
for (i = 0; i < 8; i++)
4985
  sljit_set_label(exit_invalid[i], exit_invalid_label);
4986
4987
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4988
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4989
}
4990
4991
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4992
4993
#if PCRE2_CODE_UNIT_WIDTH == 16
4994
4995
static void do_utfreadchar_invalid(compiler_common *common)
4996
{
4997
/* Slow decoding a UTF-16 character. TMP1 contains the first half
4998
of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4999
undefined for invalid characters. */
5000
DEFINE_COMPILER;
5001
struct sljit_jump *exit_invalid[3];
5002
5003
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5004
5005
/* TMP2 contains the high surrogate. */
5006
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5007
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5008
5009
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5010
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5011
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5012
5013
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5014
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5015
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5016
5017
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5018
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5019
5020
JUMPHERE(exit_invalid[0]);
5021
JUMPHERE(exit_invalid[1]);
5022
JUMPHERE(exit_invalid[2]);
5023
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5024
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5025
}
5026
5027
static void do_utfreadnewline_invalid(compiler_common *common)
5028
{
5029
/* Slow decoding a UTF-16 character, specialized for newlines.
5030
TMP1 contains the first half of the character (>= 0xd800). Return
5031
char value in TMP1. */
5032
5033
DEFINE_COMPILER;
5034
struct sljit_jump *exit_invalid[2];
5035
5036
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5037
5038
/* TMP2 contains the high surrogate. */
5039
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5040
5041
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5042
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5043
5044
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5045
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5046
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5047
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5048
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5049
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5050
5051
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5052
5053
JUMPHERE(exit_invalid[0]);
5054
JUMPHERE(exit_invalid[1]);
5055
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5056
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5057
}
5058
5059
static void do_utfmoveback_invalid(compiler_common *common)
5060
{
5061
/* Goes one character back. */
5062
DEFINE_COMPILER;
5063
struct sljit_jump *exit_invalid[3];
5064
5065
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5066
5067
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5068
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5069
5070
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5071
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5072
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5073
5074
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5075
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5076
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5077
5078
JUMPHERE(exit_invalid[0]);
5079
JUMPHERE(exit_invalid[1]);
5080
JUMPHERE(exit_invalid[2]);
5081
5082
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5083
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5084
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5085
}
5086
5087
static void do_utfpeakcharback_invalid(compiler_common *common)
5088
{
5089
/* Peak a character back. Does not modify STR_PTR. */
5090
DEFINE_COMPILER;
5091
struct sljit_jump *jump;
5092
struct sljit_jump *exit_invalid[3];
5093
5094
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5095
5096
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5097
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5098
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5099
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5100
5101
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5102
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5103
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5104
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5105
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5106
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5107
5108
JUMPHERE(jump);
5109
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5110
5111
JUMPHERE(exit_invalid[0]);
5112
JUMPHERE(exit_invalid[1]);
5113
JUMPHERE(exit_invalid[2]);
5114
5115
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5116
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5117
}
5118
5119
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5120
5121
/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5122
#define UCD_BLOCK_MASK 127
5123
#define UCD_BLOCK_SHIFT 7
5124
5125
static void do_getucd(compiler_common *common)
5126
{
5127
/* Search the UCD record for the character comes in TMP1.
5128
Returns chartype in TMP1 and UCD offset in TMP2. */
5129
DEFINE_COMPILER;
5130
#if PCRE2_CODE_UNIT_WIDTH == 32
5131
struct sljit_jump *jump;
5132
#endif
5133
5134
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5135
/* dummy_ucd_record */
5136
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5137
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5138
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5139
#endif
5140
5141
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5142
5143
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5144
5145
#if PCRE2_CODE_UNIT_WIDTH == 32
5146
if (!common->utf)
5147
  {
5148
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5149
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5150
  JUMPHERE(jump);
5151
  }
5152
#endif
5153
5154
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5155
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5156
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5157
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5158
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5159
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5160
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5161
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5162
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5163
}
5164
5165
static void do_getucdtype(compiler_common *common)
5166
{
5167
/* Search the UCD record for the character comes in TMP1.
5168
Returns chartype in TMP1 and UCD offset in TMP2. */
5169
DEFINE_COMPILER;
5170
#if PCRE2_CODE_UNIT_WIDTH == 32
5171
struct sljit_jump *jump;
5172
#endif
5173
5174
#if defined SLJIT_DEBUG && SLJIT_DEBUG
5175
/* dummy_ucd_record */
5176
const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5177
SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5178
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5179
#endif
5180
5181
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5182
5183
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5184
5185
#if PCRE2_CODE_UNIT_WIDTH == 32
5186
if (!common->utf)
5187
  {
5188
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5189
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5190
  JUMPHERE(jump);
5191
  }
5192
#endif
5193
5194
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5195
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5196
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5197
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5198
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5199
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5200
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5201
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5202
5203
/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5204
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5205
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5206
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5207
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5208
5209
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5210
}
5211
5212
#endif /* SUPPORT_UNICODE */
5213
5214
static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5215
{
5216
DEFINE_COMPILER;
5217
struct sljit_label *mainloop;
5218
struct sljit_label *newlinelabel = NULL;
5219
struct sljit_jump *start;
5220
struct sljit_jump *end = NULL;
5221
struct sljit_jump *end2 = NULL;
5222
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5223
struct sljit_label *loop;
5224
struct sljit_jump *jump;
5225
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5226
jump_list *newline = NULL;
5227
sljit_u32 overall_options = common->re->overall_options;
5228
BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5229
BOOL newlinecheck = FALSE;
5230
BOOL readuchar = FALSE;
5231
5232
if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5233
    && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5234
  newlinecheck = TRUE;
5235
5236
SLJIT_ASSERT(common->abort_label == NULL);
5237
5238
if ((overall_options & PCRE2_FIRSTLINE) != 0)
5239
  {
5240
  /* Search for the end of the first line. */
5241
  SLJIT_ASSERT(common->match_end_ptr != 0);
5242
  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5243
5244
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5245
    {
5246
    mainloop = LABEL();
5247
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5248
    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5249
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5250
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5251
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5252
    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5253
    JUMPHERE(end);
5254
    OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5255
    }
5256
  else
5257
    {
5258
    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5259
    mainloop = LABEL();
5260
    /* Continual stores does not cause data dependency. */
5261
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5262
    read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5263
    check_newlinechar(common, common->nltype, &newline, TRUE);
5264
    CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5265
    JUMPHERE(end);
5266
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5267
    set_jumps(newline, LABEL());
5268
    }
5269
5270
  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5271
  }
5272
else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5273
  {
5274
  /* Check whether offset limit is set and valid. */
5275
  SLJIT_ASSERT(common->match_end_ptr != 0);
5276
5277
  if (HAS_VIRTUAL_REGISTERS)
5278
    {
5279
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5280
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5281
    }
5282
  else
5283
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5284
5285
  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5286
  end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5287
  if (HAS_VIRTUAL_REGISTERS)
5288
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5289
  else
5290
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5291
5292
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5293
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5294
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5295
  if (HAS_VIRTUAL_REGISTERS)
5296
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5297
5298
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5299
  end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5300
  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5301
  JUMPHERE(end2);
5302
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5303
  add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5304
  JUMPHERE(end);
5305
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5306
  }
5307
5308
start = JUMP(SLJIT_JUMP);
5309
5310
if (newlinecheck)
5311
  {
5312
  newlinelabel = LABEL();
5313
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5314
  end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5315
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5316
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5317
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5318
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5319
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5320
#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5321
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5322
  end2 = JUMP(SLJIT_JUMP);
5323
  }
5324
5325
mainloop = LABEL();
5326
5327
/* Increasing the STR_PTR here requires one less jump in the most common case. */
5328
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5329
if (common->utf && !common->invalid_utf) readuchar = TRUE;
5330
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5331
if (newlinecheck) readuchar = TRUE;
5332
5333
if (readuchar)
5334
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5335
5336
if (newlinecheck)
5337
  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5338
5339
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5340
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5341
#if PCRE2_CODE_UNIT_WIDTH == 8
5342
if (common->invalid_utf)
5343
  {
5344
  /* Skip continuation code units. */
5345
  loop = LABEL();
5346
  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5347
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5348
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5349
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5350
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5351
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5352
  JUMPHERE(jump);
5353
  }
5354
else if (common->utf)
5355
  {
5356
  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5357
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5358
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5359
  JUMPHERE(jump);
5360
  }
5361
#elif PCRE2_CODE_UNIT_WIDTH == 16
5362
if (common->invalid_utf)
5363
  {
5364
  /* Skip continuation code units. */
5365
  loop = LABEL();
5366
  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5367
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5368
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5369
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5370
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5371
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5372
  JUMPHERE(jump);
5373
  }
5374
else if (common->utf)
5375
  {
5376
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5377
5378
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5379
    {
5380
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5381
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5382
    CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5383
    }
5384
  else
5385
    {
5386
    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5387
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5388
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5389
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5390
    }
5391
  }
5392
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5393
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5394
JUMPHERE(start);
5395
5396
if (newlinecheck)
5397
  {
5398
  JUMPHERE(end);
5399
  JUMPHERE(end2);
5400
  }
5401
5402
return mainloop;
5403
}
5404
5405
5406
static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5407
{
5408
sljit_u32 i, count = chars->count;
5409
5410
if (count == 255)
5411
  return;
5412
5413
if (count == 0)
5414
  {
5415
  chars->count = 1;
5416
  chars->chars[0] = chr;
5417
5418
  if (last)
5419
    chars->last_count = 1;
5420
  return;
5421
  }
5422
5423
for (i = 0; i < count; i++)
5424
  if (chars->chars[i] == chr)
5425
    return;
5426
5427
if (count >= MAX_DIFF_CHARS)
5428
  {
5429
  chars->count = 255;
5430
  return;
5431
  }
5432
5433
chars->chars[count] = chr;
5434
chars->count = count + 1;
5435
5436
if (last)
5437
  chars->last_count++;
5438
}
5439
5440
static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5441
{
5442
/* Recursive function, which scans prefix literals. */
5443
BOOL last, any, class, caseless;
5444
int len, repeat, len_save, consumed = 0;
5445
sljit_u32 chr; /* Any unicode character. */
5446
sljit_u8 *bytes, *bytes_end, byte;
5447
PCRE2_SPTR alternative, cc_save, oc;
5448
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5449
PCRE2_UCHAR othercase[4];
5450
#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5451
PCRE2_UCHAR othercase[2];
5452
#else
5453
PCRE2_UCHAR othercase[1];
5454
#endif
5455
5456
repeat = 1;
5457
while (TRUE)
5458
  {
5459
  if (*rec_count == 0)
5460
    return 0;
5461
  (*rec_count)--;
5462
5463
  last = TRUE;
5464
  any = FALSE;
5465
  class = FALSE;
5466
  caseless = FALSE;
5467
5468
  switch (*cc)
5469
    {
5470
    case OP_CHARI:
5471
    caseless = TRUE;
5472
    /* Fall through */
5473
    case OP_CHAR:
5474
    last = FALSE;
5475
    cc++;
5476
    break;
5477
5478
    case OP_SOD:
5479
    case OP_SOM:
5480
    case OP_SET_SOM:
5481
    case OP_NOT_WORD_BOUNDARY:
5482
    case OP_WORD_BOUNDARY:
5483
    case OP_EODN:
5484
    case OP_EOD:
5485
    case OP_CIRC:
5486
    case OP_CIRCM:
5487
    case OP_DOLL:
5488
    case OP_DOLLM:
5489
    /* Zero width assertions. */
5490
    cc++;
5491
    continue;
5492
5493
    case OP_ASSERT:
5494
    case OP_ASSERT_NOT:
5495
    case OP_ASSERTBACK:
5496
    case OP_ASSERTBACK_NOT:
5497
    case OP_ASSERT_NA:
5498
    case OP_ASSERTBACK_NA:
5499
    cc = bracketend(cc);
5500
    continue;
5501
5502
    case OP_PLUSI:
5503
    case OP_MINPLUSI:
5504
    case OP_POSPLUSI:
5505
    caseless = TRUE;
5506
    /* Fall through */
5507
    case OP_PLUS:
5508
    case OP_MINPLUS:
5509
    case OP_POSPLUS:
5510
    cc++;
5511
    break;
5512
5513
    case OP_EXACTI:
5514
    caseless = TRUE;
5515
    /* Fall through */
5516
    case OP_EXACT:
5517
    repeat = GET2(cc, 1);
5518
    last = FALSE;
5519
    cc += 1 + IMM2_SIZE;
5520
    break;
5521
5522
    case OP_QUERYI:
5523
    case OP_MINQUERYI:
5524
    case OP_POSQUERYI:
5525
    caseless = TRUE;
5526
    /* Fall through */
5527
    case OP_QUERY:
5528
    case OP_MINQUERY:
5529
    case OP_POSQUERY:
5530
    len = 1;
5531
    cc++;
5532
#ifdef SUPPORT_UNICODE
5533
    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5534
#endif
5535
    max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5536
    if (max_chars == 0)
5537
      return consumed;
5538
    last = FALSE;
5539
    break;
5540
5541
    case OP_KET:
5542
    cc += 1 + LINK_SIZE;
5543
    continue;
5544
5545
    case OP_ALT:
5546
    cc += GET(cc, 1);
5547
    continue;
5548
5549
    case OP_ONCE:
5550
    case OP_BRA:
5551
    case OP_BRAPOS:
5552
    case OP_CBRA:
5553
    case OP_CBRAPOS:
5554
    alternative = cc + GET(cc, 1);
5555
    while (*alternative == OP_ALT)
5556
      {
5557
      max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5558
      if (max_chars == 0)
5559
        return consumed;
5560
      alternative += GET(alternative, 1);
5561
      }
5562
5563
    if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5564
      cc += IMM2_SIZE;
5565
    cc += 1 + LINK_SIZE;
5566
    continue;
5567
5568
    case OP_CLASS:
5569
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5570
    if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5571
      return consumed;
5572
#endif
5573
    class = TRUE;
5574
    break;
5575
5576
    case OP_NCLASS:
5577
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5578
    if (common->utf) return consumed;
5579
#endif
5580
    class = TRUE;
5581
    break;
5582
5583
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5584
    case OP_XCLASS:
5585
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5586
    if (common->utf) return consumed;
5587
#endif
5588
    any = TRUE;
5589
    cc += GET(cc, 1);
5590
    break;
5591
#endif
5592
5593
    case OP_DIGIT:
5594
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5595
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5596
      return consumed;
5597
#endif
5598
    any = TRUE;
5599
    cc++;
5600
    break;
5601
5602
    case OP_WHITESPACE:
5603
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5604
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5605
      return consumed;
5606
#endif
5607
    any = TRUE;
5608
    cc++;
5609
    break;
5610
5611
    case OP_WORDCHAR:
5612
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5613
    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5614
      return consumed;
5615
#endif
5616
    any = TRUE;
5617
    cc++;
5618
    break;
5619
5620
    case OP_NOT:
5621
    case OP_NOTI:
5622
    cc++;
5623
    /* Fall through. */
5624
    case OP_NOT_DIGIT:
5625
    case OP_NOT_WHITESPACE:
5626
    case OP_NOT_WORDCHAR:
5627
    case OP_ANY:
5628
    case OP_ALLANY:
5629
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5630
    if (common->utf) return consumed;
5631
#endif
5632
    any = TRUE;
5633
    cc++;
5634
    break;
5635
5636
#ifdef SUPPORT_UNICODE
5637
    case OP_NOTPROP:
5638
    case OP_PROP:
5639
#if PCRE2_CODE_UNIT_WIDTH != 32
5640
    if (common->utf) return consumed;
5641
#endif
5642
    any = TRUE;
5643
    cc += 1 + 2;
5644
    break;
5645
#endif
5646
5647
    case OP_TYPEEXACT:
5648
    repeat = GET2(cc, 1);
5649
    cc += 1 + IMM2_SIZE;
5650
    continue;
5651
5652
    case OP_NOTEXACT:
5653
    case OP_NOTEXACTI:
5654
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5655
    if (common->utf) return consumed;
5656
#endif
5657
    any = TRUE;
5658
    repeat = GET2(cc, 1);
5659
    cc += 1 + IMM2_SIZE + 1;
5660
    break;
5661
5662
    default:
5663
    return consumed;
5664
    }
5665
5666
  if (any)
5667
    {
5668
    do
5669
      {
5670
      chars->count = 255;
5671
5672
      consumed++;
5673
      if (--max_chars == 0)
5674
        return consumed;
5675
      chars++;
5676
      }
5677
    while (--repeat > 0);
5678
5679
    repeat = 1;
5680
    continue;
5681
    }
5682
5683
  if (class)
5684
    {
5685
    bytes = (sljit_u8*) (cc + 1);
5686
    cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5687
5688
    switch (*cc)
5689
      {
5690
      case OP_CRSTAR:
5691
      case OP_CRMINSTAR:
5692
      case OP_CRPOSSTAR:
5693
      case OP_CRQUERY:
5694
      case OP_CRMINQUERY:
5695
      case OP_CRPOSQUERY:
5696
      max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5697
      if (max_chars == 0)
5698
        return consumed;
5699
      break;
5700
5701
      default:
5702
      case OP_CRPLUS:
5703
      case OP_CRMINPLUS:
5704
      case OP_CRPOSPLUS:
5705
      break;
5706
5707
      case OP_CRRANGE:
5708
      case OP_CRMINRANGE:
5709
      case OP_CRPOSRANGE:
5710
      repeat = GET2(cc, 1);
5711
      if (repeat <= 0)
5712
        return consumed;
5713
      break;
5714
      }
5715
5716
    do
5717
      {
5718
      if (bytes[31] & 0x80)
5719
        chars->count = 255;
5720
      else if (chars->count != 255)
5721
        {
5722
        bytes_end = bytes + 32;
5723
        chr = 0;
5724
        do
5725
          {
5726
          byte = *bytes++;
5727
          SLJIT_ASSERT((chr & 0x7) == 0);
5728
          if (byte == 0)
5729
            chr += 8;
5730
          else
5731
            {
5732
            do
5733
              {
5734
              if ((byte & 0x1) != 0)
5735
                add_prefix_char(chr, chars, TRUE);
5736
              byte >>= 1;
5737
              chr++;
5738
              }
5739
            while (byte != 0);
5740
            chr = (chr + 7) & ~7;
5741
            }
5742
          }
5743
        while (chars->count != 255 && bytes < bytes_end);
5744
        bytes = bytes_end - 32;
5745
        }
5746
5747
      consumed++;
5748
      if (--max_chars == 0)
5749
        return consumed;
5750
      chars++;
5751
      }
5752
    while (--repeat > 0);
5753
5754
    switch (*cc)
5755
      {
5756
      case OP_CRSTAR:
5757
      case OP_CRMINSTAR:
5758
      case OP_CRPOSSTAR:
5759
      return consumed;
5760
5761
      case OP_CRQUERY:
5762
      case OP_CRMINQUERY:
5763
      case OP_CRPOSQUERY:
5764
      cc++;
5765
      break;
5766
5767
      case OP_CRRANGE:
5768
      case OP_CRMINRANGE:
5769
      case OP_CRPOSRANGE:
5770
      if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5771
        return consumed;
5772
      cc += 1 + 2 * IMM2_SIZE;
5773
      break;
5774
      }
5775
5776
    repeat = 1;
5777
    continue;
5778
    }
5779
5780
  len = 1;
5781
#ifdef SUPPORT_UNICODE
5782
  if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5783
#endif
5784
5785
  if (caseless && char_has_othercase(common, cc))
5786
    {
5787
#ifdef SUPPORT_UNICODE
5788
    if (common->utf)
5789
      {
5790
      GETCHAR(chr, cc);
5791
      if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5792
        return consumed;
5793
      }
5794
    else
5795
#endif
5796
      {
5797
      chr = *cc;
5798
#ifdef SUPPORT_UNICODE
5799
      if (common->ucp && chr > 127)
5800
        othercase[0] = UCD_OTHERCASE(chr);
5801
      else
5802
#endif
5803
        othercase[0] = TABLE_GET(chr, common->fcc, chr);
5804
      }
5805
    }
5806
  else
5807
    {
5808
    caseless = FALSE;
5809
    othercase[0] = 0; /* Stops compiler warning - PH */
5810
    }
5811
5812
  len_save = len;
5813
  cc_save = cc;
5814
  while (TRUE)
5815
    {
5816
    oc = othercase;
5817
    do
5818
      {
5819
      len--;
5820
      consumed++;
5821
5822
      chr = *cc;
5823
      add_prefix_char(*cc, chars, len == 0);
5824
5825
      if (caseless)
5826
        add_prefix_char(*oc, chars, len == 0);
5827
5828
      if (--max_chars == 0)
5829
        return consumed;
5830
      chars++;
5831
      cc++;
5832
      oc++;
5833
      }
5834
    while (len > 0);
5835
5836
    if (--repeat == 0)
5837
      break;
5838
5839
    len = len_save;
5840
    cc = cc_save;
5841
    }
5842
5843
  repeat = 1;
5844
  if (last)
5845
    return consumed;
5846
  }
5847
}
5848
5849
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5850
static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5851
{
5852
#if PCRE2_CODE_UNIT_WIDTH == 8
5853
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5854
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5855
#elif PCRE2_CODE_UNIT_WIDTH == 16
5856
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5857
CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5858
#else
5859
#error "Unknown code width"
5860
#endif
5861
}
5862
#endif
5863
5864
#include "pcre2_jit_simd_inc.h"
5865
5866
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5867
5868
static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5869
{
5870
  sljit_s32 i, j, max_i = 0, max_j = 0;
5871
  sljit_u32 max_pri = 0;
5872
  PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5873
5874
  for (i = max - 1; i >= 1; i--)
5875
    {
5876
    if (chars[i].last_count > 2)
5877
      {
5878
      a1 = chars[i].chars[0];
5879
      a2 = chars[i].chars[1];
5880
      a_pri = chars[i].last_count;
5881
5882
      j = i - max_fast_forward_char_pair_offset();
5883
      if (j < 0)
5884
        j = 0;
5885
5886
      while (j < i)
5887
        {
5888
        b_pri = chars[j].last_count;
5889
        if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
5890
          {
5891
          b1 = chars[j].chars[0];
5892
          b2 = chars[j].chars[1];
5893
5894
          if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5895
            {
5896
            max_pri = a_pri + b_pri;
5897
            max_i = i;
5898
            max_j = j;
5899
            }
5900
          }
5901
        j++;
5902
        }
5903
      }
5904
    }
5905
5906
if (max_pri == 0)
5907
  return FALSE;
5908
5909
fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5910
return TRUE;
5911
}
5912
5913
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5914
5915
static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5916
{
5917
DEFINE_COMPILER;
5918
struct sljit_label *start;
5919
struct sljit_jump *match;
5920
struct sljit_jump *partial_quit;
5921
PCRE2_UCHAR mask;
5922
BOOL has_match_end = (common->match_end_ptr != 0);
5923
5924
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5925
5926
if (has_match_end)
5927
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5928
5929
if (offset > 0)
5930
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5931
5932
if (has_match_end)
5933
  {
5934
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5935
5936
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5937
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
5938
  CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5939
  }
5940
5941
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5942
5943
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5944
  {
5945
  fast_forward_char_simd(common, char1, char2, offset);
5946
5947
  if (offset > 0)
5948
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5949
5950
  if (has_match_end)
5951
    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5952
  return;
5953
  }
5954
5955
#endif
5956
5957
start = LABEL();
5958
5959
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5960
if (common->mode == PCRE2_JIT_COMPLETE)
5961
  add_jump(compiler, &common->failed_match, partial_quit);
5962
5963
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5964
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5965
5966
if (char1 == char2)
5967
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5968
else
5969
  {
5970
  mask = char1 ^ char2;
5971
  if (is_powerof2(mask))
5972
    {
5973
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5974
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5975
    }
5976
  else
5977
    {
5978
    match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5979
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5980
    JUMPHERE(match);
5981
    }
5982
  }
5983
5984
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5985
if (common->utf && offset > 0)
5986
  {
5987
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5988
  jumpto_if_not_utf_char_start(compiler, TMP1, start);
5989
  }
5990
#endif
5991
5992
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5993
5994
if (common->mode != PCRE2_JIT_COMPLETE)
5995
  JUMPHERE(partial_quit);
5996
5997
if (has_match_end)
5998
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5999
}
6000
6001
static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6002
{
6003
DEFINE_COMPILER;
6004
struct sljit_label *start;
6005
struct sljit_jump *match;
6006
fast_forward_char_data chars[MAX_N_CHARS];
6007
sljit_s32 offset;
6008
PCRE2_UCHAR mask;
6009
PCRE2_UCHAR *char_set, *char_set_end;
6010
int i, max, from;
6011
int range_right = -1, range_len;
6012
sljit_u8 *update_table = NULL;
6013
BOOL in_range;
6014
sljit_u32 rec_count;
6015
6016
for (i = 0; i < MAX_N_CHARS; i++)
6017
  {
6018
  chars[i].count = 0;
6019
  chars[i].last_count = 0;
6020
  }
6021
6022
rec_count = 10000;
6023
max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6024
6025
if (max < 1)
6026
  return FALSE;
6027
6028
/* Convert last_count to priority. */
6029
for (i = 0; i < max; i++)
6030
  {
6031
  SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
6032
6033
  if (chars[i].count == 1)
6034
    {
6035
    chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6036
    /* Simplifies algorithms later. */
6037
    chars[i].chars[1] = chars[i].chars[0];
6038
    }
6039
  else if (chars[i].count == 2)
6040
    {
6041
    SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6042
6043
    if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6044
      chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6045
    else
6046
      chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6047
    }
6048
  else
6049
    chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6050
  }
6051
6052
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6053
if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6054
  return TRUE;
6055
#endif
6056
6057
in_range = FALSE;
6058
/* Prevent compiler "uninitialized" warning */
6059
from = 0;
6060
range_len = 4 /* minimum length */ - 1;
6061
for (i = 0; i <= max; i++)
6062
  {
6063
  if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6064
    {
6065
    range_len = i - from;
6066
    range_right = i - 1;
6067
    }
6068
6069
  if (i < max && chars[i].count < 255)
6070
    {
6071
    SLJIT_ASSERT(chars[i].count > 0);
6072
    if (!in_range)
6073
      {
6074
      in_range = TRUE;
6075
      from = i;
6076
      }
6077
    }
6078
  else
6079
    in_range = FALSE;
6080
  }
6081
6082
if (range_right >= 0)
6083
  {
6084
  update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6085
  if (update_table == NULL)
6086
    return TRUE;
6087
  memset(update_table, IN_UCHARS(range_len), 256);
6088
6089
  for (i = 0; i < range_len; i++)
6090
    {
6091
    SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6092
6093
    char_set = chars[range_right - i].chars;
6094
    char_set_end = char_set + chars[range_right - i].count;
6095
    do
6096
      {
6097
      if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6098
        update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6099
      char_set++;
6100
      }
6101
    while (char_set < char_set_end);
6102
    }
6103
  }
6104
6105
offset = -1;
6106
/* Scan forward. */
6107
for (i = 0; i < max; i++)
6108
  {
6109
  if (range_right == i)
6110
    continue;
6111
6112
  if (offset == -1)
6113
    {
6114
    if (chars[i].last_count >= 2)
6115
      offset = i;
6116
    }
6117
  else if (chars[offset].last_count < chars[i].last_count)
6118
    offset = i;
6119
  }
6120
6121
SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6122
6123
if (range_right < 0)
6124
  {
6125
  if (offset < 0)
6126
    return FALSE;
6127
  /* Works regardless the value is 1 or 2. */
6128
  fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6129
  return TRUE;
6130
  }
6131
6132
SLJIT_ASSERT(range_right != offset);
6133
6134
if (common->match_end_ptr != 0)
6135
  {
6136
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6137
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6138
  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6139
  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6140
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6141
  CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6142
  }
6143
else
6144
  {
6145
  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6146
  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6147
  }
6148
6149
SLJIT_ASSERT(range_right >= 0);
6150
6151
if (!HAS_VIRTUAL_REGISTERS)
6152
  OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6153
6154
start = LABEL();
6155
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6156
6157
#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6158
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6159
#else
6160
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6161
#endif
6162
6163
if (!HAS_VIRTUAL_REGISTERS)
6164
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6165
else
6166
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6167
6168
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6169
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6170
6171
if (offset >= 0)
6172
  {
6173
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6174
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6175
6176
  if (chars[offset].count == 1)
6177
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6178
  else
6179
    {
6180
    mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6181
    if (is_powerof2(mask))
6182
      {
6183
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6184
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6185
      }
6186
    else
6187
      {
6188
      match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6189
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6190
      JUMPHERE(match);
6191
      }
6192
    }
6193
  }
6194
6195
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6196
if (common->utf && offset != 0)
6197
  {
6198
  if (offset < 0)
6199
    {
6200
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6201
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6202
    }
6203
  else
6204
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6205
6206
  jumpto_if_not_utf_char_start(compiler, TMP1, start);
6207
6208
  if (offset < 0)
6209
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6210
  }
6211
#endif
6212
6213
if (offset >= 0)
6214
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6215
6216
if (common->match_end_ptr != 0)
6217
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6218
else
6219
  OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6220
return TRUE;
6221
}
6222
6223
static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6224
{
6225
PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6226
PCRE2_UCHAR oc;
6227
6228
oc = first_char;
6229
if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6230
  {
6231
  oc = TABLE_GET(first_char, common->fcc, first_char);
6232
#if defined SUPPORT_UNICODE
6233
  if (first_char > 127 && (common->utf || common->ucp))
6234
    oc = UCD_OTHERCASE(first_char);
6235
#endif
6236
  }
6237
6238
fast_forward_first_char2(common, first_char, oc, 0);
6239
}
6240
6241
static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6242
{
6243
DEFINE_COMPILER;
6244
struct sljit_label *loop;
6245
struct sljit_jump *lastchar = NULL;
6246
struct sljit_jump *firstchar;
6247
struct sljit_jump *quit = NULL;
6248
struct sljit_jump *foundcr = NULL;
6249
struct sljit_jump *notfoundnl;
6250
jump_list *newline = NULL;
6251
6252
if (common->match_end_ptr != 0)
6253
  {
6254
  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6255
  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6256
  }
6257
6258
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6259
  {
6260
#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6261
  if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6262
    {
6263
    if (HAS_VIRTUAL_REGISTERS)
6264
      {
6265
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6266
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6267
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6268
      }
6269
    else
6270
      {
6271
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6272
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6273
      }
6274
    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6275
6276
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6277
    OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6278
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6279
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6280
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6281
#endif
6282
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6283
6284
    fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6285
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6286
    }
6287
  else
6288
#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6289
    {
6290
    lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6291
    if (HAS_VIRTUAL_REGISTERS)
6292
      {
6293
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6294
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6295
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6296
      }
6297
    else
6298
      {
6299
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6300
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6301
      }
6302
    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6303
6304
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6305
    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6306
    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6307
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6308
    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6309
#endif
6310
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6311
6312
    loop = LABEL();
6313
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6314
    quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6315
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6316
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6317
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6318
    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6319
6320
    JUMPHERE(quit);
6321
    JUMPHERE(lastchar);
6322
    }
6323
6324
  JUMPHERE(firstchar);
6325
6326
  if (common->match_end_ptr != 0)
6327
    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6328
  return;
6329
  }
6330
6331
if (HAS_VIRTUAL_REGISTERS)
6332
  {
6333
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6334
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6335
  }
6336
else
6337
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6338
6339
/* Example: match /^/ to \r\n from offset 1. */
6340
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6341
6342
if (common->nltype == NLTYPE_ANY)
6343
  move_back(common, NULL, FALSE);
6344
else
6345
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6346
6347
loop = LABEL();
6348
common->ff_newline_shortcut = loop;
6349
6350
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6351
if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6352
  {
6353
  if (common->nltype == NLTYPE_ANYCRLF)
6354
    {
6355
    fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6356
    if (common->mode != PCRE2_JIT_COMPLETE)
6357
      lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6358
6359
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6360
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6361
    quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6362
    }
6363
   else
6364
    {
6365
    fast_forward_char_simd(common, common->newline, common->newline, 0);
6366
6367
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6368
    if (common->mode != PCRE2_JIT_COMPLETE)
6369
      {
6370
      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6371
      CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
6372
      }
6373
    }
6374
  }
6375
else
6376
#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6377
  {
6378
  read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6379
  lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6380
  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6381
    foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6382
  check_newlinechar(common, common->nltype, &newline, FALSE);
6383
  set_jumps(newline, loop);
6384
  }
6385
6386
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6387
  {
6388
  if (quit == NULL)
6389
    {
6390
    quit = JUMP(SLJIT_JUMP);
6391
    JUMPHERE(foundcr);
6392
    }
6393
6394
  notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6395
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6396
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6397
  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6398
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6399
  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6400
#endif
6401
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6402
  JUMPHERE(notfoundnl);
6403
  JUMPHERE(quit);
6404
  }
6405
6406
if (lastchar)
6407
  JUMPHERE(lastchar);
6408
JUMPHERE(firstchar);
6409
6410
if (common->match_end_ptr != 0)
6411
  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6412
}
6413
6414
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6415
6416
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6417
{
6418
DEFINE_COMPILER;
6419
const sljit_u8 *start_bits = common->re->start_bitmap;
6420
struct sljit_label *start;
6421
struct sljit_jump *partial_quit;
6422
#if PCRE2_CODE_UNIT_WIDTH != 8
6423
struct sljit_jump *found = NULL;
6424
#endif
6425
jump_list *matches = NULL;
6426
6427
if (common->match_end_ptr != 0)
6428
  {
6429
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6430
  OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6431
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6432
  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6433
  CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6434
  }
6435
6436
start = LABEL();
6437
6438
partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6439
if (common->mode == PCRE2_JIT_COMPLETE)
6440
  add_jump(compiler, &common->failed_match, partial_quit);
6441
6442
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6443
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6444
6445
if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6446
  {
6447
#if PCRE2_CODE_UNIT_WIDTH != 8
6448
  if ((start_bits[31] & 0x80) != 0)
6449
    found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6450
  else
6451
    CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6452
#elif defined SUPPORT_UNICODE
6453
  if (common->utf && is_char7_bitset(start_bits, FALSE))
6454
    CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6455
#endif
6456
  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6457
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6458
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6459
  if (!HAS_VIRTUAL_REGISTERS)
6460
    {
6461
    OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6462
    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6463
    }
6464
  else
6465
    {
6466
    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6467
    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6468
    }
6469
  JUMPTO(SLJIT_ZERO, start);
6470
  }
6471
else
6472
  set_jumps(matches, start);
6473
6474
#if PCRE2_CODE_UNIT_WIDTH != 8
6475
if (found != NULL)
6476
  JUMPHERE(found);
6477
#endif
6478
6479
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6480
6481
if (common->mode != PCRE2_JIT_COMPLETE)
6482
  JUMPHERE(partial_quit);
6483
6484
if (common->match_end_ptr != 0)
6485
  OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6486
}
6487
6488
static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6489
{
6490
DEFINE_COMPILER;
6491
struct sljit_label *loop;
6492
struct sljit_jump *toolong;
6493
struct sljit_jump *already_found;
6494
struct sljit_jump *found;
6495
struct sljit_jump *found_oc = NULL;
6496
jump_list *not_found = NULL;
6497
sljit_u32 oc, bit;
6498
6499
SLJIT_ASSERT(common->req_char_ptr != 0);
6500
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6501
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6502
toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6503
already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6504
6505
if (has_firstchar)
6506
  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6507
else
6508
  OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6509
6510
oc = req_char;
6511
if (caseless)
6512
  {
6513
  oc = TABLE_GET(req_char, common->fcc, req_char);
6514
#if defined SUPPORT_UNICODE
6515
  if (req_char > 127 && (common->utf || common->ucp))
6516
    oc = UCD_OTHERCASE(req_char);
6517
#endif
6518
  }
6519
6520
#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6521
if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6522
  {
6523
  not_found = fast_requested_char_simd(common, req_char, oc);
6524
  }
6525
else
6526
#endif
6527
  {
6528
  loop = LABEL();
6529
  add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6530
6531
  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6532
6533
  if (req_char == oc)
6534
    found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6535
  else
6536
    {
6537
    bit = req_char ^ oc;
6538
    if (is_powerof2(bit))
6539
      {
6540
       OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6541
      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6542
      }
6543
    else
6544
      {
6545
      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6546
      found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6547
      }
6548
    }
6549
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6550
  JUMPTO(SLJIT_JUMP, loop);
6551
6552
  JUMPHERE(found);
6553
  if (found_oc)
6554
    JUMPHERE(found_oc);
6555
  }
6556
6557
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6558
6559
JUMPHERE(already_found);
6560
JUMPHERE(toolong);
6561
return not_found;
6562
}
6563
6564
static void do_revertframes(compiler_common *common)
6565
{
6566
DEFINE_COMPILER;
6567
struct sljit_jump *jump;
6568
struct sljit_label *mainloop;
6569
6570
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6571
GET_LOCAL_BASE(TMP1, 0, 0);
6572
6573
/* Drop frames until we reach STACK_TOP. */
6574
mainloop = LABEL();
6575
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
6576
jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6577
6578
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6579
if (HAS_VIRTUAL_REGISTERS)
6580
  {
6581
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6582
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6583
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6584
  }
6585
else
6586
  {
6587
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6588
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6589
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6590
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6591
  GET_LOCAL_BASE(TMP1, 0, 0);
6592
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6593
  }
6594
JUMPTO(SLJIT_JUMP, mainloop);
6595
6596
JUMPHERE(jump);
6597
jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6598
/* End of reverting values. */
6599
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6600
6601
JUMPHERE(jump);
6602
OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
6603
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6604
if (HAS_VIRTUAL_REGISTERS)
6605
  {
6606
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6607
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6608
  }
6609
else
6610
  {
6611
  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6612
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6613
  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6614
  }
6615
JUMPTO(SLJIT_JUMP, mainloop);
6616
}
6617
6618
static void check_wordboundary(compiler_common *common)
6619
{
6620
DEFINE_COMPILER;
6621
struct sljit_jump *skipread;
6622
jump_list *skipread_list = NULL;
6623
#ifdef SUPPORT_UNICODE
6624
struct sljit_label *valid_utf;
6625
jump_list *invalid_utf1 = NULL;
6626
#endif /* SUPPORT_UNICODE */
6627
jump_list *invalid_utf2 = NULL;
6628
#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6629
struct sljit_jump *jump;
6630
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6631
6632
SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6633
6634
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6635
/* Get type of the previous char, and put it to TMP3. */
6636
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6637
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6638
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6639
skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6640
6641
#ifdef SUPPORT_UNICODE
6642
if (common->invalid_utf)
6643
  {
6644
  peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6645
6646
  if (common->mode != PCRE2_JIT_COMPLETE)
6647
    {
6648
    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6649
    OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6650
    move_back(common, NULL, TRUE);
6651
    check_start_used_ptr(common);
6652
    OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6653
    OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6654
    }
6655
  }
6656
else
6657
#endif /* SUPPORT_UNICODE */
6658
  {
6659
  if (common->mode == PCRE2_JIT_COMPLETE)
6660
    peek_char_back(common, READ_CHAR_MAX, NULL);
6661
  else
6662
    {
6663
    move_back(common, NULL, TRUE);
6664
    check_start_used_ptr(common);
6665
    read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6666
    }
6667
  }
6668
6669
/* Testing char type. */
6670
#ifdef SUPPORT_UNICODE
6671
if (common->ucp)
6672
  {
6673
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6674
  jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6675
  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6676
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6677
  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6678
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6679
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6680
  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6681
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6682
  JUMPHERE(jump);
6683
  OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6684
  }
6685
else
6686
#endif /* SUPPORT_UNICODE */
6687
  {
6688
#if PCRE2_CODE_UNIT_WIDTH != 8
6689
  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6690
#elif defined SUPPORT_UNICODE
6691
  /* Here TMP3 has already been zeroed. */
6692
  jump = NULL;
6693
  if (common->utf)
6694
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6695
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6696
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6697
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6698
  OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6699
#if PCRE2_CODE_UNIT_WIDTH != 8
6700
  JUMPHERE(jump);
6701
#elif defined SUPPORT_UNICODE
6702
  if (jump != NULL)
6703
    JUMPHERE(jump);
6704
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6705
  }
6706
JUMPHERE(skipread);
6707
6708
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6709
check_str_end(common, &skipread_list);
6710
peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6711
6712
/* Testing char type. This is a code duplication. */
6713
#ifdef SUPPORT_UNICODE
6714
6715
valid_utf = LABEL();
6716
6717
if (common->ucp)
6718
  {
6719
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6720
  jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6721
  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6722
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6723
  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6724
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6725
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6726
  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6727
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6728
  JUMPHERE(jump);
6729
  }
6730
else
6731
#endif /* SUPPORT_UNICODE */
6732
  {
6733
#if PCRE2_CODE_UNIT_WIDTH != 8
6734
  /* TMP2 may be destroyed by peek_char. */
6735
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6736
  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6737
#elif defined SUPPORT_UNICODE
6738
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6739
  jump = NULL;
6740
  if (common->utf)
6741
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6742
#endif
6743
  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6744
  OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6745
  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6746
#if PCRE2_CODE_UNIT_WIDTH != 8
6747
  JUMPHERE(jump);
6748
#elif defined SUPPORT_UNICODE
6749
  if (jump != NULL)
6750
    JUMPHERE(jump);
6751
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6752
  }
6753
set_jumps(skipread_list, LABEL());
6754
6755
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6756
OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6757
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6758
6759
#ifdef SUPPORT_UNICODE
6760
if (common->invalid_utf)
6761
  {
6762
  set_jumps(invalid_utf1, LABEL());
6763
6764
  peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6765
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6766
6767
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6768
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6769
  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6770
6771
  set_jumps(invalid_utf2, LABEL());
6772
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6773
  OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6774
  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6775
  }
6776
#endif /* SUPPORT_UNICODE */
6777
}
6778
6779
static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6780
{
6781
/* May destroy TMP1. */
6782
DEFINE_COMPILER;
6783
int ranges[MAX_CLASS_RANGE_SIZE];
6784
sljit_u8 bit, cbit, all;
6785
int i, byte, length = 0;
6786
6787
bit = bits[0] & 0x1;
6788
/* All bits will be zero or one (since bit is zero or one). */
6789
all = -bit;
6790
6791
for (i = 0; i < 256; )
6792
  {
6793
  byte = i >> 3;
6794
  if ((i & 0x7) == 0 && bits[byte] == all)
6795
    i += 8;
6796
  else
6797
    {
6798
    cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6799
    if (cbit != bit)
6800
      {
6801
      if (length >= MAX_CLASS_RANGE_SIZE)
6802
        return FALSE;
6803
      ranges[length] = i;
6804
      length++;
6805
      bit = cbit;
6806
      all = -cbit;
6807
      }
6808
    i++;
6809
    }
6810
  }
6811
6812
if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6813
  {
6814
  if (length >= MAX_CLASS_RANGE_SIZE)
6815
    return FALSE;
6816
  ranges[length] = 256;
6817
  length++;
6818
  }
6819
6820
if (length < 0 || length > 4)
6821
  return FALSE;
6822
6823
bit = bits[0] & 0x1;
6824
if (invert) bit ^= 0x1;
6825
6826
/* No character is accepted. */
6827
if (length == 0 && bit == 0)
6828
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6829
6830
switch(length)
6831
  {
6832
  case 0:
6833
  /* When bit != 0, all characters are accepted. */
6834
  return TRUE;
6835
6836
  case 1:
6837
  add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6838
  return TRUE;
6839
6840
  case 2:
6841
  if (ranges[0] + 1 != ranges[1])
6842
    {
6843
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6844
    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6845
    }
6846
  else
6847
    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6848
  return TRUE;
6849
6850
  case 3:
6851
  if (bit != 0)
6852
    {
6853
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6854
    if (ranges[0] + 1 != ranges[1])
6855
      {
6856
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6857
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6858
      }
6859
    else
6860
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6861
    return TRUE;
6862
    }
6863
6864
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6865
  if (ranges[1] + 1 != ranges[2])
6866
    {
6867
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6868
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6869
    }
6870
  else
6871
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6872
  return TRUE;
6873
6874
  case 4:
6875
  if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6876
      && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6877
      && (ranges[1] & (ranges[2] - ranges[0])) == 0
6878
      && is_powerof2(ranges[2] - ranges[0]))
6879
    {
6880
    SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6881
    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6882
    if (ranges[2] + 1 != ranges[3])
6883
      {
6884
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6885
      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6886
      }
6887
    else
6888
      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6889
    return TRUE;
6890
    }
6891
6892
  if (bit != 0)
6893
    {
6894
    i = 0;
6895
    if (ranges[0] + 1 != ranges[1])
6896
      {
6897
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6898
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6899
      i = ranges[0];
6900
      }
6901
    else
6902
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6903
6904
    if (ranges[2] + 1 != ranges[3])
6905
      {
6906
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6907
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6908
      }
6909
    else
6910
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6911
    return TRUE;
6912
    }
6913
6914
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6915
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6916
  if (ranges[1] + 1 != ranges[2])
6917
    {
6918
    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6919
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6920
    }
6921
  else
6922
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6923
  return TRUE;
6924
6925
  default:
6926
  SLJIT_UNREACHABLE();
6927
  return FALSE;
6928
  }
6929
}
6930
6931
static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6932
{
6933
/* May destroy TMP1. */
6934
DEFINE_COMPILER;
6935
uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6936
uint8_t byte;
6937
sljit_s32 type;
6938
int i, j, k, len, c;
6939
6940
if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6941
  return FALSE;
6942
6943
len = 0;
6944
6945
for (i = 0; i < 32; i++)
6946
  {
6947
  byte = bits[i];
6948
6949
  if (nclass)
6950
    byte = ~byte;
6951
6952
  j = 0;
6953
  while (byte != 0)
6954
    {
6955
    if (byte & 0x1)
6956
      {
6957
      c = i * 8 + j;
6958
6959
      k = len;
6960
6961
      if ((c & 0x20) != 0)
6962
        {
6963
        for (k = 0; k < len; k++)
6964
          if (char_list[k] == c - 0x20)
6965
            {
6966
            char_list[k] |= 0x120;
6967
            break;
6968
            }
6969
        }
6970
6971
      if (k == len)
6972
        {
6973
        if (len >= MAX_CLASS_CHARS_SIZE)
6974
          return FALSE;
6975
6976
        char_list[len++] = (uint16_t) c;
6977
        }
6978
      }
6979
6980
    byte >>= 1;
6981
    j++;
6982
    }
6983
  }
6984
6985
if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
6986
6987
i = 0;
6988
j = 0;
6989
6990
if (char_list[0] == 0)
6991
  {
6992
  i++;
6993
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
6994
  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6995
  }
6996
else
6997
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6998
6999
while (i < len)
7000
  {
7001
  if ((char_list[i] & 0x100) != 0)
7002
    j++;
7003
  else
7004
    {
7005
    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7006
    CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7007
    }
7008
  i++;
7009
  }
7010
7011
if (j != 0)
7012
  {
7013
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7014
7015
  for (i = 0; i < len; i++)
7016
    if ((char_list[i] & 0x100) != 0)
7017
      {
7018
      j--;
7019
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7020
      CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7021
      }
7022
  }
7023
7024
if (invert)
7025
  nclass = !nclass;
7026
7027
type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7028
add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7029
return TRUE;
7030
}
7031
7032
static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7033
{
7034
/* May destroy TMP1. */
7035
if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7036
  return TRUE;
7037
return optimize_class_chars(common, bits, nclass, invert, backtracks);
7038
}
7039
7040
static void check_anynewline(compiler_common *common)
7041
{
7042
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7043
DEFINE_COMPILER;
7044
7045
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7046
7047
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7048
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7049
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7050
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7051
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7052
#if PCRE2_CODE_UNIT_WIDTH == 8
7053
if (common->utf)
7054
  {
7055
#endif
7056
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7057
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7058
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7059
#if PCRE2_CODE_UNIT_WIDTH == 8
7060
  }
7061
#endif
7062
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7063
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7064
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7065
}
7066
7067
static void check_hspace(compiler_common *common)
7068
{
7069
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7070
DEFINE_COMPILER;
7071
7072
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7073
7074
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
7075
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7076
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
7077
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7078
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
7079
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7080
#if PCRE2_CODE_UNIT_WIDTH == 8
7081
if (common->utf)
7082
  {
7083
#endif
7084
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7085
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7086
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7087
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7088
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7089
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7090
  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7091
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7092
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7093
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7094
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7095
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7096
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7097
#if PCRE2_CODE_UNIT_WIDTH == 8
7098
  }
7099
#endif
7100
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7101
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7102
7103
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7104
}
7105
7106
static void check_vspace(compiler_common *common)
7107
{
7108
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7109
DEFINE_COMPILER;
7110
7111
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7112
7113
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7114
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7115
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7116
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7117
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7118
#if PCRE2_CODE_UNIT_WIDTH == 8
7119
if (common->utf)
7120
  {
7121
#endif
7122
  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7123
  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7124
  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7125
#if PCRE2_CODE_UNIT_WIDTH == 8
7126
  }
7127
#endif
7128
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7129
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7130
7131
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7132
}
7133
7134
static void do_casefulcmp(compiler_common *common)
7135
{
7136
DEFINE_COMPILER;
7137
struct sljit_jump *jump;
7138
struct sljit_label *label;
7139
int char1_reg;
7140
int char2_reg;
7141
7142
if (HAS_VIRTUAL_REGISTERS)
7143
  {
7144
  char1_reg = STR_END;
7145
  char2_reg = STACK_TOP;
7146
  }
7147
else
7148
  {
7149
  char1_reg = TMP3;
7150
  char2_reg = RETURN_ADDR;
7151
  }
7152
7153
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7154
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7155
7156
if (char1_reg == STR_END)
7157
  {
7158
  OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7159
  OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7160
  }
7161
7162
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7163
  {
7164
  label = LABEL();
7165
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7166
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7167
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7168
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7169
  JUMPTO(SLJIT_NOT_ZERO, label);
7170
7171
  JUMPHERE(jump);
7172
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7173
  }
7174
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7175
  {
7176
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7177
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7178
7179
  label = LABEL();
7180
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7181
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7182
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7183
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7184
  JUMPTO(SLJIT_NOT_ZERO, label);
7185
7186
  JUMPHERE(jump);
7187
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7188
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7189
  }
7190
else
7191
  {
7192
  label = LABEL();
7193
  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7194
  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7195
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7196
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7197
  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7198
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7199
  JUMPTO(SLJIT_NOT_ZERO, label);
7200
7201
  JUMPHERE(jump);
7202
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7203
  }
7204
7205
if (char1_reg == STR_END)
7206
  {
7207
  OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7208
  OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7209
  }
7210
7211
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7212
}
7213
7214
static void do_caselesscmp(compiler_common *common)
7215
{
7216
DEFINE_COMPILER;
7217
struct sljit_jump *jump;
7218
struct sljit_label *label;
7219
int char1_reg = STR_END;
7220
int char2_reg;
7221
int lcc_table;
7222
int opt_type = 0;
7223
7224
if (HAS_VIRTUAL_REGISTERS)
7225
  {
7226
  char2_reg = STACK_TOP;
7227
  lcc_table = STACK_LIMIT;
7228
  }
7229
else
7230
  {
7231
  char2_reg = RETURN_ADDR;
7232
  lcc_table = TMP3;
7233
  }
7234
7235
if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7236
  opt_type = 1;
7237
else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7238
  opt_type = 2;
7239
7240
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7241
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7242
7243
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7244
7245
if (char2_reg == STACK_TOP)
7246
  {
7247
  OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7248
  OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7249
  }
7250
7251
OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7252
7253
if (opt_type == 1)
7254
  {
7255
  label = LABEL();
7256
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7257
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7258
  }
7259
else if (opt_type == 2)
7260
  {
7261
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7262
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7263
7264
  label = LABEL();
7265
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7266
  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7267
  }
7268
else
7269
  {
7270
  label = LABEL();
7271
  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7272
  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7273
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7274
  }
7275
7276
#if PCRE2_CODE_UNIT_WIDTH != 8
7277
jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7278
#endif
7279
OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7280
#if PCRE2_CODE_UNIT_WIDTH != 8
7281
JUMPHERE(jump);
7282
jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7283
#endif
7284
OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7285
#if PCRE2_CODE_UNIT_WIDTH != 8
7286
JUMPHERE(jump);
7287
#endif
7288
7289
if (opt_type == 0)
7290
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7291
7292
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7293
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7294
JUMPTO(SLJIT_NOT_ZERO, label);
7295
7296
JUMPHERE(jump);
7297
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7298
7299
if (opt_type == 2)
7300
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7301
7302
if (char2_reg == STACK_TOP)
7303
  {
7304
  OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7305
  OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7306
  }
7307
7308
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7309
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7310
}
7311
7312
static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7313
    compare_context *context, jump_list **backtracks)
7314
{
7315
DEFINE_COMPILER;
7316
unsigned int othercasebit = 0;
7317
PCRE2_SPTR othercasechar = NULL;
7318
#ifdef SUPPORT_UNICODE
7319
int utflength;
7320
#endif
7321
7322
if (caseless && char_has_othercase(common, cc))
7323
  {
7324
  othercasebit = char_get_othercase_bit(common, cc);
7325
  SLJIT_ASSERT(othercasebit);
7326
  /* Extracting bit difference info. */
7327
#if PCRE2_CODE_UNIT_WIDTH == 8
7328
  othercasechar = cc + (othercasebit >> 8);
7329
  othercasebit &= 0xff;
7330
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7331
  /* Note that this code only handles characters in the BMP. If there
7332
  ever are characters outside the BMP whose othercase differs in only one
7333
  bit from itself (there currently are none), this code will need to be
7334
  revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7335
  othercasechar = cc + (othercasebit >> 9);
7336
  if ((othercasebit & 0x100) != 0)
7337
    othercasebit = (othercasebit & 0xff) << 8;
7338
  else
7339
    othercasebit &= 0xff;
7340
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7341
  }
7342
7343
if (context->sourcereg == -1)
7344
  {
7345
#if PCRE2_CODE_UNIT_WIDTH == 8
7346
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7347
  if (context->length >= 4)
7348
    OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7349
  else if (context->length >= 2)
7350
    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7351
  else
7352
#endif
7353
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7354
#elif PCRE2_CODE_UNIT_WIDTH == 16
7355
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7356
  if (context->length >= 4)
7357
    OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7358
  else
7359
#endif
7360
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7361
#elif PCRE2_CODE_UNIT_WIDTH == 32
7362
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7363
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7364
  context->sourcereg = TMP2;
7365
  }
7366
7367
#ifdef SUPPORT_UNICODE
7368
utflength = 1;
7369
if (common->utf && HAS_EXTRALEN(*cc))
7370
  utflength += GET_EXTRALEN(*cc);
7371
7372
do
7373
  {
7374
#endif
7375
7376
  context->length -= IN_UCHARS(1);
7377
#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7378
7379
  /* Unaligned read is supported. */
7380
  if (othercasebit != 0 && othercasechar == cc)
7381
    {
7382
    context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7383
    context->oc.asuchars[context->ucharptr] = othercasebit;
7384
    }
7385
  else
7386
    {
7387
    context->c.asuchars[context->ucharptr] = *cc;
7388
    context->oc.asuchars[context->ucharptr] = 0;
7389
    }
7390
  context->ucharptr++;
7391
7392
#if PCRE2_CODE_UNIT_WIDTH == 8
7393
  if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7394
#else
7395
  if (context->ucharptr >= 2 || context->length == 0)
7396
#endif
7397
    {
7398
    if (context->length >= 4)
7399
      OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7400
    else if (context->length >= 2)
7401
      OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7402
#if PCRE2_CODE_UNIT_WIDTH == 8
7403
    else if (context->length >= 1)
7404
      OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7405
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7406
    context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7407
7408
    switch(context->ucharptr)
7409
      {
7410
      case 4 / sizeof(PCRE2_UCHAR):
7411
      if (context->oc.asint != 0)
7412
        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7413
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7414
      break;
7415
7416
      case 2 / sizeof(PCRE2_UCHAR):
7417
      if (context->oc.asushort != 0)
7418
        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7419
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7420
      break;
7421
7422
#if PCRE2_CODE_UNIT_WIDTH == 8
7423
      case 1:
7424
      if (context->oc.asbyte != 0)
7425
        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7426
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7427
      break;
7428
#endif
7429
7430
      default:
7431
      SLJIT_UNREACHABLE();
7432
      break;
7433
      }
7434
    context->ucharptr = 0;
7435
    }
7436
7437
#else
7438
7439
  /* Unaligned read is unsupported or in 32 bit mode. */
7440
  if (context->length >= 1)
7441
    OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7442
7443
  context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7444
7445
  if (othercasebit != 0 && othercasechar == cc)
7446
    {
7447
    OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7448
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7449
    }
7450
  else
7451
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7452
7453
#endif
7454
7455
  cc++;
7456
#ifdef SUPPORT_UNICODE
7457
  utflength--;
7458
  }
7459
while (utflength > 0);
7460
#endif
7461
7462
return cc;
7463
}
7464
7465
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7466
7467
#define SET_TYPE_OFFSET(value) \
7468
  if ((value) != typeoffset) \
7469
    { \
7470
    if ((value) < typeoffset) \
7471
      OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7472
    else \
7473
      OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7474
    } \
7475
  typeoffset = (value);
7476
7477
#define SET_CHAR_OFFSET(value) \
7478
  if ((value) != charoffset) \
7479
    { \
7480
    if ((value) < charoffset) \
7481
      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7482
    else \
7483
      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7484
    } \
7485
  charoffset = (value);
7486
7487
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7488
7489
#ifdef SUPPORT_UNICODE
7490
#define XCLASS_SAVE_CHAR 0x001
7491
#define XCLASS_CHAR_SAVED 0x002
7492
#define XCLASS_HAS_TYPE 0x004
7493
#define XCLASS_HAS_SCRIPT 0x008
7494
#define XCLASS_HAS_SCRIPT_EXTENSION 0x010
7495
#define XCLASS_HAS_BOOL 0x020
7496
#define XCLASS_HAS_BIDICL 0x040
7497
#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
7498
#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
7499
#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
7500
#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7501
7502
#endif /* SUPPORT_UNICODE */
7503
7504
static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7505
{
7506
DEFINE_COMPILER;
7507
jump_list *found = NULL;
7508
jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7509
sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7510
struct sljit_jump *jump = NULL;
7511
PCRE2_SPTR ccbegin;
7512
int compares, invertcmp, numberofcmps;
7513
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7514
BOOL utf = common->utf;
7515
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7516
7517
#ifdef SUPPORT_UNICODE
7518
sljit_u32 unicode_status = 0;
7519
int typereg = TMP1;
7520
const sljit_u32 *other_cases;
7521
sljit_uw typeoffset;
7522
#endif /* SUPPORT_UNICODE */
7523
7524
/* Scanning the necessary info. */
7525
cc++;
7526
ccbegin = cc;
7527
compares = 0;
7528
7529
if (cc[-1] & XCL_MAP)
7530
  {
7531
  min = 0;
7532
  cc += 32 / sizeof(PCRE2_UCHAR);
7533
  }
7534
7535
while (*cc != XCL_END)
7536
  {
7537
  compares++;
7538
  if (*cc == XCL_SINGLE)
7539
    {
7540
    cc ++;
7541
    GETCHARINCTEST(c, cc);
7542
    if (c > max) max = c;
7543
    if (c < min) min = c;
7544
#ifdef SUPPORT_UNICODE
7545
    unicode_status |= XCLASS_SAVE_CHAR;
7546
#endif /* SUPPORT_UNICODE */
7547
    }
7548
  else if (*cc == XCL_RANGE)
7549
    {
7550
    cc ++;
7551
    GETCHARINCTEST(c, cc);
7552
    if (c < min) min = c;
7553
    GETCHARINCTEST(c, cc);
7554
    if (c > max) max = c;
7555
#ifdef SUPPORT_UNICODE
7556
    unicode_status |= XCLASS_SAVE_CHAR;
7557
#endif /* SUPPORT_UNICODE */
7558
    }
7559
#ifdef SUPPORT_UNICODE
7560
  else
7561
    {
7562
    SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7563
    cc++;
7564
    if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
7565
      {
7566
      other_cases = PRIV(ucd_caseless_sets) + cc[1];
7567
      while (*other_cases != NOTACHAR)
7568
        {
7569
        if (*other_cases > max) max = *other_cases;
7570
        if (*other_cases < min) min = *other_cases;
7571
        other_cases++;
7572
        }
7573
      }
7574
    else
7575
      {
7576
      max = READ_CHAR_MAX;
7577
      min = 0;
7578
      }
7579
7580
    switch(*cc)
7581
      {
7582
      case PT_ANY:
7583
      /* Any either accepts everything or ignored. */
7584
      if (cc[-1] == XCL_PROP)
7585
        {
7586
        compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7587
        if (list == backtracks)
7588
          add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7589
        return;
7590
        }
7591
      break;
7592
7593
      case PT_LAMP:
7594
      case PT_GC:
7595
      case PT_PC:
7596
      case PT_ALNUM:
7597
      unicode_status |= XCLASS_HAS_TYPE;
7598
      break;
7599
7600
      case PT_SCX:
7601
      unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION;
7602
      if (cc[-1] == XCL_NOTPROP)
7603
        {
7604
        unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
7605
        break;
7606
        }
7607
      compares++;
7608
      /* Fall through */
7609
7610
      case PT_SC:
7611
      unicode_status |= XCLASS_HAS_SCRIPT;
7612
      break;
7613
7614
      case PT_SPACE:
7615
      case PT_PXSPACE:
7616
      case PT_WORD:
7617
      case PT_PXGRAPH:
7618
      case PT_PXPRINT:
7619
      case PT_PXPUNCT:
7620
      unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
7621
      break;
7622
7623
      case PT_CLIST:
7624
      case PT_UCNC:
7625
      unicode_status |= XCLASS_SAVE_CHAR;
7626
      break;
7627
7628
      case PT_BOOL:
7629
      unicode_status |= XCLASS_HAS_BOOL;
7630
      break;
7631
7632
      case PT_BIDICL:
7633
      unicode_status |= XCLASS_HAS_BIDICL;
7634
      break;
7635
7636
      default:
7637
      SLJIT_UNREACHABLE();
7638
      break;
7639
      }
7640
    cc += 2;
7641
    }
7642
#endif /* SUPPORT_UNICODE */
7643
  }
7644
SLJIT_ASSERT(compares > 0);
7645
7646
/* We are not necessary in utf mode even in 8 bit mode. */
7647
cc = ccbegin;
7648
if ((cc[-1] & XCL_NOT) != 0)
7649
  read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7650
else
7651
  {
7652
#ifdef SUPPORT_UNICODE
7653
  read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
7654
#else /* !SUPPORT_UNICODE */
7655
  read_char(common, min, max, NULL, 0);
7656
#endif /* SUPPORT_UNICODE */
7657
  }
7658
7659
if ((cc[-1] & XCL_HASPROP) == 0)
7660
  {
7661
  if ((cc[-1] & XCL_MAP) != 0)
7662
    {
7663
    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7664
    if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7665
      {
7666
      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7667
      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7668
      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7669
      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7670
      OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7671
      add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7672
      }
7673
7674
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7675
    JUMPHERE(jump);
7676
7677
    cc += 32 / sizeof(PCRE2_UCHAR);
7678
    }
7679
  else
7680
    {
7681
    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7682
    add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7683
    }
7684
  }
7685
else if ((cc[-1] & XCL_MAP) != 0)
7686
  {
7687
  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7688
#ifdef SUPPORT_UNICODE
7689
  unicode_status |= XCLASS_CHAR_SAVED;
7690
#endif /* SUPPORT_UNICODE */
7691
  if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7692
    {
7693
#if PCRE2_CODE_UNIT_WIDTH == 8
7694
    jump = NULL;
7695
    if (common->utf)
7696
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7697
      jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7698
7699
    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7700
    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7701
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7702
    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7703
    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7704
    add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7705
7706
#if PCRE2_CODE_UNIT_WIDTH == 8
7707
    if (common->utf)
7708
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7709
      JUMPHERE(jump);
7710
    }
7711
7712
  OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7713
  cc += 32 / sizeof(PCRE2_UCHAR);
7714
  }
7715
7716
#ifdef SUPPORT_UNICODE
7717
if (unicode_status & XCLASS_NEEDS_UCD)
7718
  {
7719
  if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
7720
    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7721
7722
#if PCRE2_CODE_UNIT_WIDTH == 32
7723
  if (!common->utf)
7724
    {
7725
    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7726
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7727
    JUMPHERE(jump);
7728
    }
7729
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7730
7731
  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7732
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7733
  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7734
  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7735
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7736
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7737
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7738
  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7739
  OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7740
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7741
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7742
7743
  ccbegin = cc;
7744
7745
  if (unicode_status & XCLASS_HAS_BIDICL)
7746
    {
7747
    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7748
    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
7749
7750
    while (*cc != XCL_END)
7751
      {
7752
      if (*cc == XCL_SINGLE)
7753
        {
7754
        cc ++;
7755
        GETCHARINCTEST(c, cc);
7756
        }
7757
      else if (*cc == XCL_RANGE)
7758
        {
7759
        cc ++;
7760
        GETCHARINCTEST(c, cc);
7761
        GETCHARINCTEST(c, cc);
7762
        }
7763
      else
7764
        {
7765
        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7766
        cc++;
7767
        if (*cc == PT_BIDICL)
7768
          {
7769
          compares--;
7770
          invertcmp = (compares == 0 && list != backtracks);
7771
          if (cc[-1] == XCL_NOTPROP)
7772
            invertcmp ^= 0x1;
7773
          jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7774
          add_jump(compiler, compares > 0 ? list : backtracks, jump);
7775
          }
7776
        cc += 2;
7777
        }
7778
      }
7779
7780
    cc = ccbegin;
7781
    }
7782
7783
  if (unicode_status & XCLASS_HAS_BOOL)
7784
    {
7785
    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
7786
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
7787
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7788
7789
    while (*cc != XCL_END)
7790
      {
7791
      if (*cc == XCL_SINGLE)
7792
        {
7793
        cc ++;
7794
        GETCHARINCTEST(c, cc);
7795
        }
7796
      else if (*cc == XCL_RANGE)
7797
        {
7798
        cc ++;
7799
        GETCHARINCTEST(c, cc);
7800
        GETCHARINCTEST(c, cc);
7801
        }
7802
      else
7803
        {
7804
        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7805
        cc++;
7806
        if (*cc == PT_BOOL)
7807
          {
7808
          compares--;
7809
          invertcmp = (compares == 0 && list != backtracks);
7810
          if (cc[-1] == XCL_NOTPROP)
7811
            invertcmp ^= 0x1;
7812
7813
          OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
7814
          add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
7815
          }
7816
        cc += 2;
7817
        }
7818
      }
7819
7820
    cc = ccbegin;
7821
    }
7822
7823
  if (unicode_status & XCLASS_HAS_SCRIPT)
7824
    {
7825
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7826
7827
    while (*cc != XCL_END)
7828
      {
7829
      if (*cc == XCL_SINGLE)
7830
        {
7831
        cc ++;
7832
        GETCHARINCTEST(c, cc);
7833
        }
7834
      else if (*cc == XCL_RANGE)
7835
        {
7836
        cc ++;
7837
        GETCHARINCTEST(c, cc);
7838
        GETCHARINCTEST(c, cc);
7839
        }
7840
      else
7841
        {
7842
        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7843
        cc++;
7844
        switch (*cc)
7845
          {
7846
          case PT_SCX:
7847
          if (cc[-1] == XCL_NOTPROP)
7848
            break;
7849
          /* Fall through */
7850
7851
          case PT_SC:
7852
          compares--;
7853
          invertcmp = (compares == 0 && list != backtracks);
7854
          if (cc[-1] == XCL_NOTPROP)
7855
            invertcmp ^= 0x1;
7856
7857
          add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
7858
          }
7859
        cc += 2;
7860
        }
7861
      }
7862
7863
    cc = ccbegin;
7864
    }
7865
7866
  if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
7867
    {
7868
    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7869
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
7870
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7871
7872
    if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
7873
      {
7874
      if (unicode_status & XCLASS_HAS_TYPE)
7875
        {
7876
        if (unicode_status & XCLASS_SAVE_CHAR)
7877
          {
7878
          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0);
7879
          unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0;
7880
          }
7881
        else
7882
          {
7883
          OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
7884
          unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
7885
          }
7886
        }
7887
      OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7888
      }
7889
7890
    while (*cc != XCL_END)
7891
      {
7892
      if (*cc == XCL_SINGLE)
7893
        {
7894
        cc ++;
7895
        GETCHARINCTEST(c, cc);
7896
        }
7897
      else if (*cc == XCL_RANGE)
7898
        {
7899
        cc ++;
7900
        GETCHARINCTEST(c, cc);
7901
        GETCHARINCTEST(c, cc);
7902
        }
7903
      else
7904
        {
7905
        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7906
        cc++;
7907
        if (*cc == PT_SCX)
7908
          {
7909
          compares--;
7910
          invertcmp = (compares == 0 && list != backtracks);
7911
7912
          jump = NULL;
7913
          if (cc[-1] == XCL_NOTPROP)
7914
            {
7915
            jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
7916
            if (invertcmp)
7917
              {
7918
              add_jump(compiler, backtracks, jump);
7919
              jump = NULL;
7920
              }
7921
            invertcmp ^= 0x1;
7922
            }
7923
7924
          OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
7925
          add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
7926
7927
          if (jump != NULL)
7928
            JUMPHERE(jump);
7929
          }
7930
        cc += 2;
7931
        }
7932
      }
7933
7934
    if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0)
7935
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7936
    else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
7937
      OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
7938
    cc = ccbegin;
7939
    }
7940
7941
  if (unicode_status & XCLASS_SAVE_CHAR)
7942
    OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7943
7944
  if (unicode_status & XCLASS_HAS_TYPE)
7945
    {
7946
    if (unicode_status & XCLASS_SAVE_CHAR)
7947
      typereg = RETURN_ADDR;
7948
7949
    OP1(SLJIT_MOV_U8, typereg, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7950
    }
7951
  }
7952
#endif /* SUPPORT_UNICODE */
7953
7954
/* Generating code. */
7955
charoffset = 0;
7956
numberofcmps = 0;
7957
#ifdef SUPPORT_UNICODE
7958
typeoffset = 0;
7959
#endif /* SUPPORT_UNICODE */
7960
7961
while (*cc != XCL_END)
7962
  {
7963
  compares--;
7964
  invertcmp = (compares == 0 && list != backtracks);
7965
  jump = NULL;
7966
7967
  if (*cc == XCL_SINGLE)
7968
    {
7969
    cc ++;
7970
    GETCHARINCTEST(c, cc);
7971
7972
    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7973
      {
7974
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7975
      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7976
      numberofcmps++;
7977
      }
7978
    else if (numberofcmps > 0)
7979
      {
7980
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7981
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7982
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7983
      numberofcmps = 0;
7984
      }
7985
    else
7986
      {
7987
      jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7988
      numberofcmps = 0;
7989
      }
7990
    }
7991
  else if (*cc == XCL_RANGE)
7992
    {
7993
    cc ++;
7994
    GETCHARINCTEST(c, cc);
7995
    SET_CHAR_OFFSET(c);
7996
    GETCHARINCTEST(c, cc);
7997
7998
    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7999
      {
8000
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8001
      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8002
      numberofcmps++;
8003
      }
8004
    else if (numberofcmps > 0)
8005
      {
8006
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8007
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8008
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8009
      numberofcmps = 0;
8010
      }
8011
    else
8012
      {
8013
      jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8014
      numberofcmps = 0;
8015
      }
8016
    }
8017
#ifdef SUPPORT_UNICODE
8018
  else
8019
    {
8020
    SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8021
    if (*cc == XCL_NOTPROP)
8022
      invertcmp ^= 0x1;
8023
    cc++;
8024
    switch(*cc)
8025
      {
8026
      case PT_ANY:
8027
      if (!invertcmp)
8028
        jump = JUMP(SLJIT_JUMP);
8029
      break;
8030
8031
      case PT_LAMP:
8032
      OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
8033
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8034
      OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
8035
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8036
      OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
8037
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
8038
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8039
      break;
8040
8041
      case PT_GC:
8042
      c = PRIV(ucp_typerange)[(int)cc[1] * 2];
8043
      SET_TYPE_OFFSET(c);
8044
      jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
8045
      break;
8046
8047
      case PT_PC:
8048
      jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
8049
      break;
8050
8051
      case PT_SC:
8052
      case PT_SCX:
8053
      case PT_BOOL:
8054
      case PT_BIDICL:
8055
      compares++;
8056
      /* Do nothing. */
8057
      break;
8058
8059
      case PT_SPACE:
8060
      case PT_PXSPACE:
8061
      SET_CHAR_OFFSET(9);
8062
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
8063
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8064
8065
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
8066
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8067
8068
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
8069
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8070
8071
      SET_TYPE_OFFSET(ucp_Zl);
8072
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
8073
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8074
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8075
      break;
8076
8077
      case PT_WORD:
8078
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
8079
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8080
      /* Fall through. */
8081
8082
      case PT_ALNUM:
8083
      SET_TYPE_OFFSET(ucp_Ll);
8084
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
8085
      OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8086
      SET_TYPE_OFFSET(ucp_Nd);
8087
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
8088
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8089
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8090
      break;
8091
8092
      case PT_CLIST:
8093
      other_cases = PRIV(ucd_caseless_sets) + cc[1];
8094
8095
      /* At least three characters are required.
8096
         Otherwise this case would be handled by the normal code path. */
8097
      SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8098
      SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8099
8100
      /* Optimizing character pairs, if their difference is power of 2. */
8101
      if (is_powerof2(other_cases[1] ^ other_cases[0]))
8102
        {
8103
        if (charoffset == 0)
8104
          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8105
        else
8106
          {
8107
          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8108
          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8109
          }
8110
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
8111
        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8112
        other_cases += 2;
8113
        }
8114
      else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8115
        {
8116
        if (charoffset == 0)
8117
          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8118
        else
8119
          {
8120
          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8121
          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8122
          }
8123
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
8124
        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8125
8126
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
8127
        OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8128
8129
        other_cases += 3;
8130
        }
8131
      else
8132
        {
8133
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8134
        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8135
        }
8136
8137
      while (*other_cases != NOTACHAR)
8138
        {
8139
        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8140
        OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8141
        }
8142
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8143
      break;
8144
8145
      case PT_UCNC:
8146
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
8147
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8148
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
8149
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8150
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
8151
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8152
8153
      SET_CHAR_OFFSET(0xa0);
8154
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
8155
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8156
      SET_CHAR_OFFSET(0);
8157
      OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
8158
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8159
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8160
      break;
8161
8162
      case PT_PXGRAPH:
8163
      /* C and Z groups are the farthest two groups. */
8164
      SET_TYPE_OFFSET(ucp_Ll);
8165
      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8166
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8167
8168
      jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8169
8170
      /* In case of ucp_Cf, we overwrite the result. */
8171
      SET_CHAR_OFFSET(0x2066);
8172
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8173
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8174
8175
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8176
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8177
8178
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8179
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8180
8181
      JUMPHERE(jump);
8182
      jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8183
      break;
8184
8185
      case PT_PXPRINT:
8186
      /* C and Z groups are the farthest two groups. */
8187
      SET_TYPE_OFFSET(ucp_Ll);
8188
      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8189
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8190
8191
      OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
8192
      OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
8193
8194
      jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8195
8196
      /* In case of ucp_Cf, we overwrite the result. */
8197
      SET_CHAR_OFFSET(0x2066);
8198
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8199
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8200
8201
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8202
      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8203
8204
      JUMPHERE(jump);
8205
      jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8206
      break;
8207
8208
      case PT_PXPUNCT:
8209
      SET_TYPE_OFFSET(ucp_Sc);
8210
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
8211
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8212
8213
      SET_CHAR_OFFSET(0);
8214
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
8215
      OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8216
8217
      SET_TYPE_OFFSET(ucp_Pc);
8218
      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
8219
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8220
      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8221
      break;
8222
8223
      default:
8224
      SLJIT_UNREACHABLE();
8225
      break;
8226
      }
8227
    cc += 2;
8228
    }
8229
#endif /* SUPPORT_UNICODE */
8230
8231
  if (jump != NULL)
8232
    add_jump(compiler, compares > 0 ? list : backtracks, jump);
8233
  }
8234
8235
if (found != NULL)
8236
  set_jumps(found, LABEL());
8237
}
8238
8239
#undef SET_TYPE_OFFSET
8240
#undef SET_CHAR_OFFSET
8241
8242
#endif
8243
8244
static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8245
{
8246
DEFINE_COMPILER;
8247
int length;
8248
struct sljit_jump *jump[4];
8249
#ifdef SUPPORT_UNICODE
8250
struct sljit_label *label;
8251
#endif /* SUPPORT_UNICODE */
8252
8253
switch(type)
8254
  {
8255
  case OP_SOD:
8256
  if (HAS_VIRTUAL_REGISTERS)
8257
    {
8258
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8259
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8260
    }
8261
  else
8262
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8263
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8264
  return cc;
8265
8266
  case OP_SOM:
8267
  if (HAS_VIRTUAL_REGISTERS)
8268
    {
8269
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8270
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8271
    }
8272
  else
8273
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8274
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8275
  return cc;
8276
8277
  case OP_NOT_WORD_BOUNDARY:
8278
  case OP_WORD_BOUNDARY:
8279
  add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8280
#ifdef SUPPORT_UNICODE
8281
  if (common->invalid_utf)
8282
    {
8283
    add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8284
    return cc;
8285
    }
8286
#endif /* SUPPORT_UNICODE */
8287
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
8288
  add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8289
  return cc;
8290
8291
  case OP_EODN:
8292
  /* Requires rather complex checks. */
8293
  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8294
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8295
    {
8296
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8297
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8298
    if (common->mode == PCRE2_JIT_COMPLETE)
8299
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8300
    else
8301
      {
8302
      jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8303
      OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
8304
      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8305
      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8306
      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8307
      add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8308
      check_partial(common, TRUE);
8309
      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8310
      JUMPHERE(jump[1]);
8311
      }
8312
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8313
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8314
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8315
    }
8316
  else if (common->nltype == NLTYPE_FIXED)
8317
    {
8318
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8319
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8320
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8321
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8322
    }
8323
  else
8324
    {
8325
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8326
    jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8327
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8328
    OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
8329
    jump[2] = JUMP(SLJIT_GREATER);
8330
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8331
    /* Equal. */
8332
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8333
    jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8334
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8335
8336
    JUMPHERE(jump[1]);
8337
    if (common->nltype == NLTYPE_ANYCRLF)
8338
      {
8339
      OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8340
      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8341
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8342
      }
8343
    else
8344
      {
8345
      OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8346
      read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8347
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8348
      add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8349
      sljit_set_current_flags(compiler, SLJIT_SET_Z);
8350
      add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8351
      OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8352
      }
8353
    JUMPHERE(jump[2]);
8354
    JUMPHERE(jump[3]);
8355
    }
8356
  JUMPHERE(jump[0]);
8357
  if (common->mode != PCRE2_JIT_COMPLETE)
8358
    check_partial(common, TRUE);
8359
  return cc;
8360
8361
  case OP_EOD:
8362
  add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8363
  if (common->mode != PCRE2_JIT_COMPLETE)
8364
    check_partial(common, TRUE);
8365
  return cc;
8366
8367
  case OP_DOLL:
8368
  if (HAS_VIRTUAL_REGISTERS)
8369
    {
8370
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8371
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8372
    }
8373
  else
8374
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8375
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8376
8377
  if (!common->endonly)
8378
    compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8379
  else
8380
    {
8381
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8382
    check_partial(common, FALSE);
8383
    }
8384
  return cc;
8385
8386
  case OP_DOLLM:
8387
  jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8388
  if (HAS_VIRTUAL_REGISTERS)
8389
    {
8390
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8391
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8392
    }
8393
  else
8394
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8395
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8396
  check_partial(common, FALSE);
8397
  jump[0] = JUMP(SLJIT_JUMP);
8398
  JUMPHERE(jump[1]);
8399
8400
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8401
    {
8402
    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8403
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8404
    if (common->mode == PCRE2_JIT_COMPLETE)
8405
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8406
    else
8407
      {
8408
      jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8409
      /* STR_PTR = STR_END - IN_UCHARS(1) */
8410
      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8411
      check_partial(common, TRUE);
8412
      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8413
      JUMPHERE(jump[1]);
8414
      }
8415
8416
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8417
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8418
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8419
    }
8420
  else
8421
    {
8422
    peek_char(common, common->nlmax, TMP3, 0, NULL);
8423
    check_newlinechar(common, common->nltype, backtracks, FALSE);
8424
    }
8425
  JUMPHERE(jump[0]);
8426
  return cc;
8427
8428
  case OP_CIRC:
8429
  if (HAS_VIRTUAL_REGISTERS)
8430
    {
8431
    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8432
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8433
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8434
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8435
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8436
    }
8437
  else
8438
    {
8439
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8440
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8441
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8442
    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8443
    }
8444
  return cc;
8445
8446
  case OP_CIRCM:
8447
  /* TMP2 might be used by peek_char_back. */
8448
  if (HAS_VIRTUAL_REGISTERS)
8449
    {
8450
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8451
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8452
    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8453
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8454
    }
8455
  else
8456
    {
8457
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8458
    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8459
    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8460
    }
8461
  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8462
  jump[0] = JUMP(SLJIT_JUMP);
8463
  JUMPHERE(jump[1]);
8464
8465
  if (!common->alt_circumflex)
8466
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8467
8468
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8469
    {
8470
    OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8471
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8472
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8473
    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8474
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8475
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8476
    }
8477
  else
8478
    {
8479
    peek_char_back(common, common->nlmax, backtracks);
8480
    check_newlinechar(common, common->nltype, backtracks, FALSE);
8481
    }
8482
  JUMPHERE(jump[0]);
8483
  return cc;
8484
8485
  case OP_REVERSE:
8486
  length = GET(cc, 0);
8487
  if (length == 0)
8488
    return cc + LINK_SIZE;
8489
  if (HAS_VIRTUAL_REGISTERS)
8490
    {
8491
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8492
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8493
    }
8494
  else
8495
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8496
#ifdef SUPPORT_UNICODE
8497
  if (common->utf)
8498
    {
8499
    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8500
    label = LABEL();
8501
    add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8502
    move_back(common, backtracks, FALSE);
8503
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8504
    JUMPTO(SLJIT_NOT_ZERO, label);
8505
    }
8506
  else
8507
#endif
8508
    {
8509
    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8510
    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8511
    }
8512
  check_start_used_ptr(common);
8513
  return cc + LINK_SIZE;
8514
  }
8515
SLJIT_UNREACHABLE();
8516
return cc;
8517
}
8518
8519
#ifdef SUPPORT_UNICODE
8520
8521
#if PCRE2_CODE_UNIT_WIDTH != 32
8522
8523
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8524
{
8525
PCRE2_SPTR start_subject = args->begin;
8526
PCRE2_SPTR end_subject = args->end;
8527
int lgb, rgb, ricount;
8528
PCRE2_SPTR prevcc, endcc, bptr;
8529
BOOL first = TRUE;
8530
uint32_t c;
8531
8532
prevcc = cc;
8533
endcc = NULL;
8534
do
8535
  {
8536
  GETCHARINC(c, cc);
8537
  rgb = UCD_GRAPHBREAK(c);
8538
8539
  if (first)
8540
    {
8541
    lgb = rgb;
8542
    endcc = cc;
8543
    first = FALSE;
8544
    continue;
8545
    }
8546
8547
  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8548
    break;
8549
8550
  /* Not breaking between Regional Indicators is allowed only if there
8551
  are an even number of preceding RIs. */
8552
8553
  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8554
    {
8555
    ricount = 0;
8556
    bptr = prevcc;
8557
8558
    /* bptr is pointing to the left-hand character */
8559
    while (bptr > start_subject)
8560
      {
8561
      bptr--;
8562
      BACKCHAR(bptr);
8563
      GETCHAR(c, bptr);
8564
8565
      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8566
        break;
8567
8568
      ricount++;
8569
      }
8570
8571
    if ((ricount & 1) != 0) break;  /* Grapheme break required */
8572
    }
8573
8574
  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8575
  allows any number of them before a following Extended_Pictographic. */
8576
8577
  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8578
       lgb != ucp_gbExtended_Pictographic)
8579
    lgb = rgb;
8580
8581
  prevcc = endcc;
8582
  endcc = cc;
8583
  }
8584
while (cc < end_subject);
8585
8586
return endcc;
8587
}
8588
8589
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8590
8591
static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8592
{
8593
PCRE2_SPTR start_subject = args->begin;
8594
PCRE2_SPTR end_subject = args->end;
8595
int lgb, rgb, ricount;
8596
PCRE2_SPTR prevcc, endcc, bptr;
8597
BOOL first = TRUE;
8598
uint32_t c;
8599
8600
prevcc = cc;
8601
endcc = NULL;
8602
do
8603
  {
8604
  GETCHARINC_INVALID(c, cc, end_subject, break);
8605
  rgb = UCD_GRAPHBREAK(c);
8606
8607
  if (first)
8608
    {
8609
    lgb = rgb;
8610
    endcc = cc;
8611
    first = FALSE;
8612
    continue;
8613
    }
8614
8615
  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8616
    break;
8617
8618
  /* Not breaking between Regional Indicators is allowed only if there
8619
  are an even number of preceding RIs. */
8620
8621
  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8622
    {
8623
    ricount = 0;
8624
    bptr = prevcc;
8625
8626
    /* bptr is pointing to the left-hand character */
8627
    while (bptr > start_subject)
8628
      {
8629
      GETCHARBACK_INVALID(c, bptr, start_subject, break);
8630
8631
      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8632
        break;
8633
8634
      ricount++;
8635
      }
8636
8637
    if ((ricount & 1) != 0)
8638
      break;  /* Grapheme break required */
8639
    }
8640
8641
  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8642
  allows any number of them before a following Extended_Pictographic. */
8643
8644
  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8645
       lgb != ucp_gbExtended_Pictographic)
8646
    lgb = rgb;
8647
8648
  prevcc = endcc;
8649
  endcc = cc;
8650
  }
8651
while (cc < end_subject);
8652
8653
return endcc;
8654
}
8655
8656
static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8657
{
8658
PCRE2_SPTR start_subject = args->begin;
8659
PCRE2_SPTR end_subject = args->end;
8660
int lgb, rgb, ricount;
8661
PCRE2_SPTR bptr;
8662
uint32_t c;
8663
8664
/* Patch by PH */
8665
/* GETCHARINC(c, cc); */
8666
c = *cc++;
8667
8668
#if PCRE2_CODE_UNIT_WIDTH == 32
8669
if (c >= 0x110000)
8670
  return NULL;
8671
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8672
lgb = UCD_GRAPHBREAK(c);
8673
8674
while (cc < end_subject)
8675
  {
8676
  c = *cc;
8677
#if PCRE2_CODE_UNIT_WIDTH == 32
8678
  if (c >= 0x110000)
8679
    break;
8680
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8681
  rgb = UCD_GRAPHBREAK(c);
8682
8683
  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8684
    break;
8685
8686
  /* Not breaking between Regional Indicators is allowed only if there
8687
  are an even number of preceding RIs. */
8688
8689
  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8690
    {
8691
    ricount = 0;
8692
    bptr = cc - 1;
8693
8694
    /* bptr is pointing to the left-hand character */
8695
    while (bptr > start_subject)
8696
      {
8697
      bptr--;
8698
      c = *bptr;
8699
#if PCRE2_CODE_UNIT_WIDTH == 32
8700
      if (c >= 0x110000)
8701
        break;
8702
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8703
8704
      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
8705
8706
      ricount++;
8707
      }
8708
8709
    if ((ricount & 1) != 0)
8710
      break;  /* Grapheme break required */
8711
    }
8712
8713
  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8714
  allows any number of them before a following Extended_Pictographic. */
8715
8716
  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8717
       lgb != ucp_gbExtended_Pictographic)
8718
    lgb = rgb;
8719
8720
  cc++;
8721
  }
8722
8723
return cc;
8724
}
8725
8726
#endif /* SUPPORT_UNICODE */
8727
8728
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8729
{
8730
DEFINE_COMPILER;
8731
int length;
8732
unsigned int c, oc, bit;
8733
compare_context context;
8734
struct sljit_jump *jump[3];
8735
jump_list *end_list;
8736
#ifdef SUPPORT_UNICODE
8737
PCRE2_UCHAR propdata[5];
8738
#endif /* SUPPORT_UNICODE */
8739
8740
switch(type)
8741
  {
8742
  case OP_NOT_DIGIT:
8743
  case OP_DIGIT:
8744
  /* Digits are usually 0-9, so it is worth to optimize them. */
8745
  if (check_str_ptr)
8746
    detect_partial_match(common, backtracks);
8747
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8748
  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8749
    read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8750
  else
8751
#endif
8752
    read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8753
    /* Flip the starting bit in the negative case. */
8754
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
8755
  add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8756
  return cc;
8757
8758
  case OP_NOT_WHITESPACE:
8759
  case OP_WHITESPACE:
8760
  if (check_str_ptr)
8761
    detect_partial_match(common, backtracks);
8762
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8763
  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8764
    read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8765
  else
8766
#endif
8767
    read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8768
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
8769
  add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8770
  return cc;
8771
8772
  case OP_NOT_WORDCHAR:
8773
  case OP_WORDCHAR:
8774
  if (check_str_ptr)
8775
    detect_partial_match(common, backtracks);
8776
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8777
  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8778
    read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8779
  else
8780
#endif
8781
    read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8782
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
8783
  add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8784
  return cc;
8785
8786
  case OP_ANY:
8787
  if (check_str_ptr)
8788
    detect_partial_match(common, backtracks);
8789
  read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8790
  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8791
    {
8792
    jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8793
    end_list = NULL;
8794
    if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8795
      add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8796
    else
8797
      check_str_end(common, &end_list);
8798
8799
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8800
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8801
    set_jumps(end_list, LABEL());
8802
    JUMPHERE(jump[0]);
8803
    }
8804
  else
8805
    check_newlinechar(common, common->nltype, backtracks, TRUE);
8806
  return cc;
8807
8808
  case OP_ALLANY:
8809
  if (check_str_ptr)
8810
    detect_partial_match(common, backtracks);
8811
#ifdef SUPPORT_UNICODE
8812
  if (common->utf)
8813
    {
8814
    if (common->invalid_utf)
8815
      {
8816
      read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8817
      return cc;
8818
      }
8819
8820
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8821
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8822
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8823
#if PCRE2_CODE_UNIT_WIDTH == 8
8824
    jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8825
    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8826
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8827
#elif PCRE2_CODE_UNIT_WIDTH == 16
8828
    jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8829
    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8830
    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
8831
    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8832
    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8833
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8834
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8835
    JUMPHERE(jump[0]);
8836
    return cc;
8837
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8838
    }
8839
#endif /* SUPPORT_UNICODE */
8840
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8841
  return cc;
8842
8843
  case OP_ANYBYTE:
8844
  if (check_str_ptr)
8845
    detect_partial_match(common, backtracks);
8846
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8847
  return cc;
8848
8849
#ifdef SUPPORT_UNICODE
8850
  case OP_NOTPROP:
8851
  case OP_PROP:
8852
  propdata[0] = XCL_HASPROP;
8853
  propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8854
  propdata[2] = cc[0];
8855
  propdata[3] = cc[1];
8856
  propdata[4] = XCL_END;
8857
  if (check_str_ptr)
8858
    detect_partial_match(common, backtracks);
8859
  compile_xclass_matchingpath(common, propdata, backtracks);
8860
  return cc + 2;
8861
#endif
8862
8863
  case OP_ANYNL:
8864
  if (check_str_ptr)
8865
    detect_partial_match(common, backtracks);
8866
  read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8867
  jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8868
  /* We don't need to handle soft partial matching case. */
8869
  end_list = NULL;
8870
  if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8871
    add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8872
  else
8873
    check_str_end(common, &end_list);
8874
  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8875
  jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8876
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8877
  jump[2] = JUMP(SLJIT_JUMP);
8878
  JUMPHERE(jump[0]);
8879
  check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8880
  set_jumps(end_list, LABEL());
8881
  JUMPHERE(jump[1]);
8882
  JUMPHERE(jump[2]);
8883
  return cc;
8884
8885
  case OP_NOT_HSPACE:
8886
  case OP_HSPACE:
8887
  if (check_str_ptr)
8888
    detect_partial_match(common, backtracks);
8889
8890
  if (type == OP_NOT_HSPACE)
8891
    read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8892
  else
8893
    read_char(common, 0x9, 0x3000, NULL, 0);
8894
8895
  add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8896
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
8897
  add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8898
  return cc;
8899
8900
  case OP_NOT_VSPACE:
8901
  case OP_VSPACE:
8902
  if (check_str_ptr)
8903
    detect_partial_match(common, backtracks);
8904
8905
  if (type == OP_NOT_VSPACE)
8906
    read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8907
  else
8908
    read_char(common, 0xa, 0x2029, NULL, 0);
8909
8910
  add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8911
  sljit_set_current_flags(compiler, SLJIT_SET_Z);
8912
  add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8913
  return cc;
8914
8915
#ifdef SUPPORT_UNICODE
8916
  case OP_EXTUNI:
8917
  if (check_str_ptr)
8918
    detect_partial_match(common, backtracks);
8919
8920
  SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8921
  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8922
8923
#if PCRE2_CODE_UNIT_WIDTH != 32
8924
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
8925
    common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
8926
  if (common->invalid_utf)
8927
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8928
#else
8929
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
8930
    common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
8931
  if (!common->utf || common->invalid_utf)
8932
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8933
#endif
8934
8935
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8936
8937
  if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8938
    {
8939
    jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8940
    /* Since we successfully read a char above, partial matching must occure. */
8941
    check_partial(common, TRUE);
8942
    JUMPHERE(jump[0]);
8943
    }
8944
  return cc;
8945
#endif
8946
8947
  case OP_CHAR:
8948
  case OP_CHARI:
8949
  length = 1;
8950
#ifdef SUPPORT_UNICODE
8951
  if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8952
#endif
8953
8954
  if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8955
    detect_partial_match(common, backtracks);
8956
8957
  if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8958
    {
8959
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8960
    if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8961
      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8962
8963
    context.length = IN_UCHARS(length);
8964
    context.sourcereg = -1;
8965
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8966
    context.ucharptr = 0;
8967
#endif
8968
    return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8969
    }
8970
8971
#ifdef SUPPORT_UNICODE
8972
  if (common->utf)
8973
    {
8974
    GETCHAR(c, cc);
8975
    }
8976
  else
8977
#endif
8978
    c = *cc;
8979
8980
  SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8981
8982
  if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8983
    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8984
8985
  oc = char_othercase(common, c);
8986
  read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8987
8988
  SLJIT_ASSERT(!is_powerof2(c ^ oc));
8989
8990
  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8991
    {
8992
    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
8993
    CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8994
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8995
    }
8996
  else
8997
    {
8998
    jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8999
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9000
    JUMPHERE(jump[0]);
9001
    }
9002
  return cc + length;
9003
9004
  case OP_NOT:
9005
  case OP_NOTI:
9006
  if (check_str_ptr)
9007
    detect_partial_match(common, backtracks);
9008
9009
  length = 1;
9010
#ifdef SUPPORT_UNICODE
9011
  if (common->utf)
9012
    {
9013
#if PCRE2_CODE_UNIT_WIDTH == 8
9014
    c = *cc;
9015
    if (c < 128 && !common->invalid_utf)
9016
      {
9017
      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9018
      if (type == OP_NOT || !char_has_othercase(common, cc))
9019
        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9020
      else
9021
        {
9022
        /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
9023
        OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
9024
        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
9025
        }
9026
      /* Skip the variable-length character. */
9027
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9028
      jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
9029
      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
9030
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
9031
      JUMPHERE(jump[0]);
9032
      return cc + 1;
9033
      }
9034
    else
9035
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
9036
      {
9037
      GETCHARLEN(c, cc, length);
9038
      }
9039
    }
9040
  else
9041
#endif /* SUPPORT_UNICODE */
9042
    c = *cc;
9043
9044
  if (type == OP_NOT || !char_has_othercase(common, cc))
9045
    {
9046
    read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
9047
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9048
    }
9049
  else
9050
    {
9051
    oc = char_othercase(common, c);
9052
    read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
9053
    bit = c ^ oc;
9054
    if (is_powerof2(bit))
9055
      {
9056
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
9057
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
9058
      }
9059
    else
9060
      {
9061
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9062
      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9063
      }
9064
    }
9065
  return cc + length;
9066
9067
  case OP_CLASS:
9068
  case OP_NCLASS:
9069
  if (check_str_ptr)
9070
    detect_partial_match(common, backtracks);
9071
9072
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9073
  bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
9074
  if (type == OP_NCLASS)
9075
    read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
9076
  else
9077
    read_char(common, 0, bit, NULL, 0);
9078
#else
9079
  if (type == OP_NCLASS)
9080
    read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
9081
  else
9082
    read_char(common, 0, 255, NULL, 0);
9083
#endif
9084
9085
  if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
9086
    return cc + 32 / sizeof(PCRE2_UCHAR);
9087
9088
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9089
  jump[0] = NULL;
9090
  if (common->utf)
9091
    {
9092
    jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
9093
    if (type == OP_CLASS)
9094
      {
9095
      add_jump(compiler, backtracks, jump[0]);
9096
      jump[0] = NULL;
9097
      }
9098
    }
9099
#elif PCRE2_CODE_UNIT_WIDTH != 8
9100
  jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
9101
  if (type == OP_CLASS)
9102
    {
9103
    add_jump(compiler, backtracks, jump[0]);
9104
    jump[0] = NULL;
9105
    }
9106
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
9107
9108
  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
9109
  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
9110
  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
9111
  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
9112
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
9113
  add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
9114
9115
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
9116
  if (jump[0] != NULL)
9117
    JUMPHERE(jump[0]);
9118
#endif
9119
  return cc + 32 / sizeof(PCRE2_UCHAR);
9120
9121
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9122
  case OP_XCLASS:
9123
  if (check_str_ptr)
9124
    detect_partial_match(common, backtracks);
9125
  compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
9126
  return cc + GET(cc, 0) - 1;
9127
#endif
9128
  }
9129
SLJIT_UNREACHABLE();
9130
return cc;
9131
}
9132
9133
static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9134
{
9135
/* This function consumes at least one input character. */
9136
/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9137
DEFINE_COMPILER;
9138
PCRE2_SPTR ccbegin = cc;
9139
compare_context context;
9140
int size;
9141
9142
context.length = 0;
9143
do
9144
  {
9145
  if (cc >= ccend)
9146
    break;
9147
9148
  if (*cc == OP_CHAR)
9149
    {
9150
    size = 1;
9151
#ifdef SUPPORT_UNICODE
9152
    if (common->utf && HAS_EXTRALEN(cc[1]))
9153
      size += GET_EXTRALEN(cc[1]);
9154
#endif
9155
    }
9156
  else if (*cc == OP_CHARI)
9157
    {
9158
    size = 1;
9159
#ifdef SUPPORT_UNICODE
9160
    if (common->utf)
9161
      {
9162
      if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9163
        size = 0;
9164
      else if (HAS_EXTRALEN(cc[1]))
9165
        size += GET_EXTRALEN(cc[1]);
9166
      }
9167
    else
9168
#endif
9169
    if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9170
      size = 0;
9171
    }
9172
  else
9173
    size = 0;
9174
9175
  cc += 1 + size;
9176
  context.length += IN_UCHARS(size);
9177
  }
9178
while (size > 0 && context.length <= 128);
9179
9180
cc = ccbegin;
9181
if (context.length > 0)
9182
  {
9183
  /* We have a fixed-length byte sequence. */
9184
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9185
  add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9186
9187
  context.sourcereg = -1;
9188
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9189
  context.ucharptr = 0;
9190
#endif
9191
  do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9192
  return cc;
9193
  }
9194
9195
/* A non-fixed length character will be checked if length == 0. */
9196
return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9197
}
9198
9199
/* Forward definitions. */
9200
static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9201
static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9202
9203
#define PUSH_BACKTRACK(size, ccstart, error) \
9204
  do \
9205
    { \
9206
    backtrack = sljit_alloc_memory(compiler, (size)); \
9207
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9208
      return error; \
9209
    memset(backtrack, 0, size); \
9210
    backtrack->prev = parent->top; \
9211
    backtrack->cc = (ccstart); \
9212
    parent->top = backtrack; \
9213
    } \
9214
  while (0)
9215
9216
#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9217
  do \
9218
    { \
9219
    backtrack = sljit_alloc_memory(compiler, (size)); \
9220
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9221
      return; \
9222
    memset(backtrack, 0, size); \
9223
    backtrack->prev = parent->top; \
9224
    backtrack->cc = (ccstart); \
9225
    parent->top = backtrack; \
9226
    } \
9227
  while (0)
9228
9229
#define BACKTRACK_AS(type) ((type *)backtrack)
9230
9231
static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9232
{
9233
/* The OVECTOR offset goes to TMP2. */
9234
DEFINE_COMPILER;
9235
int count = GET2(cc, 1 + IMM2_SIZE);
9236
PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9237
unsigned int offset;
9238
jump_list *found = NULL;
9239
9240
SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9241
9242
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9243
9244
count--;
9245
while (count-- > 0)
9246
  {
9247
  offset = GET2(slot, 0) << 1;
9248
  GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9249
  add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9250
  slot += common->name_entry_size;
9251
  }
9252
9253
offset = GET2(slot, 0) << 1;
9254
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9255
if (backtracks != NULL && !common->unset_backref)
9256
  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9257
9258
set_jumps(found, LABEL());
9259
}
9260
9261
static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9262
{
9263
DEFINE_COMPILER;
9264
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9265
int offset = 0;
9266
struct sljit_jump *jump = NULL;
9267
struct sljit_jump *partial;
9268
struct sljit_jump *nopartial;
9269
#if defined SUPPORT_UNICODE
9270
struct sljit_label *loop;
9271
struct sljit_label *caseless_loop;
9272
jump_list *no_match = NULL;
9273
int source_reg = COUNT_MATCH;
9274
int source_end_reg = ARGUMENTS;
9275
int char1_reg = STACK_LIMIT;
9276
#endif /* SUPPORT_UNICODE */
9277
9278
if (ref)
9279
  {
9280
  offset = GET2(cc, 1) << 1;
9281
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9282
  /* OVECTOR(1) contains the "string begin - 1" constant. */
9283
  if (withchecks && !common->unset_backref)
9284
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9285
  }
9286
else
9287
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9288
9289
#if defined SUPPORT_UNICODE
9290
if (common->utf && *cc == OP_REFI)
9291
  {
9292
  SLJIT_ASSERT(common->iref_ptr != 0);
9293
9294
  if (ref)
9295
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9296
  else
9297
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9298
9299
  if (withchecks && emptyfail)
9300
    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9301
9302
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9303
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9304
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9305
9306
  OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9307
  OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9308
9309
  loop = LABEL();
9310
  jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9311
  partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9312
9313
  /* Read original character. It must be a valid UTF character. */
9314
  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9315
  OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9316
9317
  read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9318
9319
  OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9320
  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9321
  OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9322
9323
  /* Read second character. */
9324
  read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9325
9326
  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9327
9328
  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9329
9330
  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9331
9332
  OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9333
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9334
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9335
9336
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9337
9338
  OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9339
  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9340
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9341
  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9342
9343
  add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9344
  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9345
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9346
9347
  caseless_loop = LABEL();
9348
  OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9349
  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9350
  OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
9351
  JUMPTO(SLJIT_EQUAL, loop);
9352
  JUMPTO(SLJIT_LESS, caseless_loop);
9353
9354
  set_jumps(no_match, LABEL());
9355
  if (common->mode == PCRE2_JIT_COMPLETE)
9356
    JUMPHERE(partial);
9357
9358
  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9359
  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9360
  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9361
  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9362
9363
  if (common->mode != PCRE2_JIT_COMPLETE)
9364
    {
9365
    JUMPHERE(partial);
9366
    OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9367
    OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9368
    OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9369
9370
    check_partial(common, FALSE);
9371
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9372
    }
9373
9374
  JUMPHERE(jump);
9375
  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9376
  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9377
  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9378
  return;
9379
  }
9380
else
9381
#endif /* SUPPORT_UNICODE */
9382
  {
9383
  if (ref)
9384
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9385
  else
9386
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9387
9388
  if (withchecks)
9389
    jump = JUMP(SLJIT_ZERO);
9390
9391
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9392
  partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9393
  if (common->mode == PCRE2_JIT_COMPLETE)
9394
    add_jump(compiler, backtracks, partial);
9395
9396
  add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9397
  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9398
9399
  if (common->mode != PCRE2_JIT_COMPLETE)
9400
    {
9401
    nopartial = JUMP(SLJIT_JUMP);
9402
    JUMPHERE(partial);
9403
    /* TMP2 -= STR_END - STR_PTR */
9404
    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9405
    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9406
    partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9407
    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9408
    add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9409
    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9410
    JUMPHERE(partial);
9411
    check_partial(common, FALSE);
9412
    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9413
    JUMPHERE(nopartial);
9414
    }
9415
  }
9416
9417
if (jump != NULL)
9418
  {
9419
  if (emptyfail)
9420
    add_jump(compiler, backtracks, jump);
9421
  else
9422
    JUMPHERE(jump);
9423
  }
9424
}
9425
9426
static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9427
{
9428
DEFINE_COMPILER;
9429
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9430
backtrack_common *backtrack;
9431
PCRE2_UCHAR type;
9432
int offset = 0;
9433
struct sljit_label *label;
9434
struct sljit_jump *zerolength;
9435
struct sljit_jump *jump = NULL;
9436
PCRE2_SPTR ccbegin = cc;
9437
int min = 0, max = 0;
9438
BOOL minimize;
9439
9440
PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9441
9442
if (ref)
9443
  offset = GET2(cc, 1) << 1;
9444
else
9445
  cc += IMM2_SIZE;
9446
type = cc[1 + IMM2_SIZE];
9447
9448
SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9449
minimize = (type & 0x1) != 0;
9450
switch(type)
9451
  {
9452
  case OP_CRSTAR:
9453
  case OP_CRMINSTAR:
9454
  min = 0;
9455
  max = 0;
9456
  cc += 1 + IMM2_SIZE + 1;
9457
  break;
9458
  case OP_CRPLUS:
9459
  case OP_CRMINPLUS:
9460
  min = 1;
9461
  max = 0;
9462
  cc += 1 + IMM2_SIZE + 1;
9463
  break;
9464
  case OP_CRQUERY:
9465
  case OP_CRMINQUERY:
9466
  min = 0;
9467
  max = 1;
9468
  cc += 1 + IMM2_SIZE + 1;
9469
  break;
9470
  case OP_CRRANGE:
9471
  case OP_CRMINRANGE:
9472
  min = GET2(cc, 1 + IMM2_SIZE + 1);
9473
  max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9474
  cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9475
  break;
9476
  default:
9477
  SLJIT_UNREACHABLE();
9478
  break;
9479
  }
9480
9481
if (!minimize)
9482
  {
9483
  if (min == 0)
9484
    {
9485
    allocate_stack(common, 2);
9486
    if (ref)
9487
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9488
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9489
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9490
    /* Temporary release of STR_PTR. */
9491
    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9492
    /* Handles both invalid and empty cases. Since the minimum repeat,
9493
    is zero the invalid case is basically the same as an empty case. */
9494
    if (ref)
9495
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9496
    else
9497
      {
9498
      compile_dnref_search(common, ccbegin, NULL);
9499
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9500
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9501
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9502
      }
9503
    /* Restore if not zero length. */
9504
    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9505
    }
9506
  else
9507
    {
9508
    allocate_stack(common, 1);
9509
    if (ref)
9510
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9511
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9512
    if (ref)
9513
      {
9514
      add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9515
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9516
      }
9517
    else
9518
      {
9519
      compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9520
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9521
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9522
      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9523
      }
9524
    }
9525
9526
  if (min > 1 || max > 1)
9527
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9528
9529
  label = LABEL();
9530
  if (!ref)
9531
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9532
  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9533
9534
  if (min > 1 || max > 1)
9535
    {
9536
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9537
    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9538
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9539
    if (min > 1)
9540
      CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9541
    if (max > 1)
9542
      {
9543
      jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9544
      allocate_stack(common, 1);
9545
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9546
      JUMPTO(SLJIT_JUMP, label);
9547
      JUMPHERE(jump);
9548
      }
9549
    }
9550
9551
  if (max == 0)
9552
    {
9553
    /* Includes min > 1 case as well. */
9554
    allocate_stack(common, 1);
9555
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9556
    JUMPTO(SLJIT_JUMP, label);
9557
    }
9558
9559
  JUMPHERE(zerolength);
9560
  BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9561
9562
  count_match(common);
9563
  return cc;
9564
  }
9565
9566
allocate_stack(common, ref ? 2 : 3);
9567
if (ref)
9568
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9569
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9570
if (type != OP_CRMINSTAR)
9571
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9572
9573
if (min == 0)
9574
  {
9575
  /* Handles both invalid and empty cases. Since the minimum repeat,
9576
  is zero the invalid case is basically the same as an empty case. */
9577
  if (ref)
9578
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9579
  else
9580
    {
9581
    compile_dnref_search(common, ccbegin, NULL);
9582
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9583
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9584
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9585
    }
9586
  /* Length is non-zero, we can match real repeats. */
9587
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9588
  jump = JUMP(SLJIT_JUMP);
9589
  }
9590
else
9591
  {
9592
  if (ref)
9593
    {
9594
    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9595
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9596
    }
9597
  else
9598
    {
9599
    compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9600
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9601
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9602
    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9603
    }
9604
  }
9605
9606
BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9607
if (max > 0)
9608
  add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9609
9610
if (!ref)
9611
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9612
compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9613
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9614
9615
if (min > 1)
9616
  {
9617
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9618
  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9619
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9620
  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9621
  }
9622
else if (max > 0)
9623
  OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9624
9625
if (jump != NULL)
9626
  JUMPHERE(jump);
9627
JUMPHERE(zerolength);
9628
9629
count_match(common);
9630
return cc;
9631
}
9632
9633
static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9634
{
9635
DEFINE_COMPILER;
9636
backtrack_common *backtrack;
9637
recurse_entry *entry = common->entries;
9638
recurse_entry *prev = NULL;
9639
sljit_sw start = GET(cc, 1);
9640
PCRE2_SPTR start_cc;
9641
BOOL needs_control_head;
9642
9643
PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9644
9645
/* Inlining simple patterns. */
9646
if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9647
  {
9648
  start_cc = common->start + start;
9649
  compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9650
  BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9651
  return cc + 1 + LINK_SIZE;
9652
  }
9653
9654
while (entry != NULL)
9655
  {
9656
  if (entry->start == start)
9657
    break;
9658
  prev = entry;
9659
  entry = entry->next;
9660
  }
9661
9662
if (entry == NULL)
9663
  {
9664
  entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9665
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9666
    return NULL;
9667
  entry->next = NULL;
9668
  entry->entry_label = NULL;
9669
  entry->backtrack_label = NULL;
9670
  entry->entry_calls = NULL;
9671
  entry->backtrack_calls = NULL;
9672
  entry->start = start;
9673
9674
  if (prev != NULL)
9675
    prev->next = entry;
9676
  else
9677
    common->entries = entry;
9678
  }
9679
9680
BACKTRACK_AS(recurse_backtrack)->entry = entry;
9681
9682
if (entry->entry_label == NULL)
9683
  add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9684
else
9685
  JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9686
/* Leave if the match is failed. */
9687
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9688
BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9689
return cc + 1 + LINK_SIZE;
9690
}
9691
9692
static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9693
{
9694
PCRE2_SPTR begin;
9695
PCRE2_SIZE *ovector;
9696
sljit_u32 oveccount, capture_top;
9697
9698
if (arguments->callout == NULL)
9699
  return 0;
9700
9701
SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9702
9703
begin = arguments->begin;
9704
ovector = (PCRE2_SIZE*)(callout_block + 1);
9705
oveccount = callout_block->capture_top;
9706
9707
SLJIT_ASSERT(oveccount >= 1);
9708
9709
callout_block->version = 2;
9710
callout_block->callout_flags = 0;
9711
9712
/* Offsets in subject. */
9713
callout_block->subject_length = arguments->end - arguments->begin;
9714
callout_block->start_match = jit_ovector[0] - begin;
9715
callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9716
callout_block->subject = begin;
9717
9718
/* Convert and copy the JIT offset vector to the ovector array. */
9719
callout_block->capture_top = 1;
9720
callout_block->offset_vector = ovector;
9721
9722
ovector[0] = PCRE2_UNSET;
9723
ovector[1] = PCRE2_UNSET;
9724
ovector += 2;
9725
jit_ovector += 2;
9726
capture_top = 1;
9727
9728
/* Convert pointers to sizes. */
9729
while (--oveccount != 0)
9730
  {
9731
  capture_top++;
9732
9733
  ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9734
  ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9735
9736
  if (ovector[0] != PCRE2_UNSET)
9737
    callout_block->capture_top = capture_top;
9738
9739
  ovector += 2;
9740
  jit_ovector += 2;
9741
  }
9742
9743
return (arguments->callout)(callout_block, arguments->callout_data);
9744
}
9745
9746
#define CALLOUT_ARG_OFFSET(arg) \
9747
    SLJIT_OFFSETOF(pcre2_callout_block, arg)
9748
9749
static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9750
{
9751
DEFINE_COMPILER;
9752
backtrack_common *backtrack;
9753
sljit_s32 mov_opcode;
9754
unsigned int callout_length = (*cc == OP_CALLOUT)
9755
    ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9756
sljit_sw value1;
9757
sljit_sw value2;
9758
sljit_sw value3;
9759
sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
9760
9761
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9762
9763
callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9764
9765
allocate_stack(common, callout_arg_size);
9766
9767
SLJIT_ASSERT(common->capture_last_ptr != 0);
9768
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9769
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9770
value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9771
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9772
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9773
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9774
9775
/* These pointer sized fields temporarly stores internal variables. */
9776
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9777
9778
if (common->mark_ptr != 0)
9779
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9780
mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9781
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9782
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9783
9784
if (*cc == OP_CALLOUT)
9785
  {
9786
  value1 = 0;
9787
  value2 = 0;
9788
  value3 = 0;
9789
  }
9790
else
9791
  {
9792
  value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9793
  value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9794
  value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9795
  }
9796
9797
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9798
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9799
OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9800
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9801
9802
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9803
9804
/* Needed to save important temporary registers. */
9805
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9806
/* SLJIT_R0 = arguments */
9807
OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9808
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9809
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
9810
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9811
free_stack(common, callout_arg_size);
9812
9813
/* Check return value. */
9814
OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9815
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
9816
if (common->abort_label == NULL)
9817
  add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
9818
else
9819
  JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
9820
return cc + callout_length;
9821
}
9822
9823
#undef CALLOUT_ARG_SIZE
9824
#undef CALLOUT_ARG_OFFSET
9825
9826
static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9827
{
9828
while (TRUE)
9829
  {
9830
  switch (*cc)
9831
    {
9832
    case OP_CALLOUT_STR:
9833
    cc += GET(cc, 1 + 2*LINK_SIZE);
9834
    break;
9835
9836
    case OP_NOT_WORD_BOUNDARY:
9837
    case OP_WORD_BOUNDARY:
9838
    case OP_CIRC:
9839
    case OP_CIRCM:
9840
    case OP_DOLL:
9841
    case OP_DOLLM:
9842
    case OP_CALLOUT:
9843
    case OP_ALT:
9844
    cc += PRIV(OP_lengths)[*cc];
9845
    break;
9846
9847
    case OP_KET:
9848
    return FALSE;
9849
9850
    default:
9851
    return TRUE;
9852
    }
9853
  }
9854
}
9855
9856
static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9857
{
9858
DEFINE_COMPILER;
9859
int framesize;
9860
int extrasize;
9861
BOOL local_quit_available = FALSE;
9862
BOOL needs_control_head;
9863
int private_data_ptr;
9864
backtrack_common altbacktrack;
9865
PCRE2_SPTR ccbegin;
9866
PCRE2_UCHAR opcode;
9867
PCRE2_UCHAR bra = OP_BRA;
9868
jump_list *tmp = NULL;
9869
jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9870
jump_list **found;
9871
/* Saving previous accept variables. */
9872
BOOL save_local_quit_available = common->local_quit_available;
9873
BOOL save_in_positive_assertion = common->in_positive_assertion;
9874
then_trap_backtrack *save_then_trap = common->then_trap;
9875
struct sljit_label *save_quit_label = common->quit_label;
9876
struct sljit_label *save_accept_label = common->accept_label;
9877
jump_list *save_quit = common->quit;
9878
jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9879
jump_list *save_accept = common->accept;
9880
struct sljit_jump *jump;
9881
struct sljit_jump *brajump = NULL;
9882
9883
/* Assert captures then. */
9884
common->then_trap = NULL;
9885
9886
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9887
  {
9888
  SLJIT_ASSERT(!conditional);
9889
  bra = *cc;
9890
  cc++;
9891
  }
9892
private_data_ptr = PRIVATE_DATA(cc);
9893
SLJIT_ASSERT(private_data_ptr != 0);
9894
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9895
backtrack->framesize = framesize;
9896
backtrack->private_data_ptr = private_data_ptr;
9897
opcode = *cc;
9898
SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9899
found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9900
ccbegin = cc;
9901
cc += GET(cc, 1);
9902
9903
if (bra == OP_BRAMINZERO)
9904
  {
9905
  /* This is a braminzero backtrack path. */
9906
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9907
  free_stack(common, 1);
9908
  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9909
  }
9910
9911
if (framesize < 0)
9912
  {
9913
  extrasize = 1;
9914
  if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9915
    extrasize = 0;
9916
9917
  if (needs_control_head)
9918
    extrasize++;
9919
9920
  if (framesize == no_frame)
9921
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9922
9923
  if (extrasize > 0)
9924
    allocate_stack(common, extrasize);
9925
9926
  if (needs_control_head)
9927
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9928
9929
  if (extrasize > 0)
9930
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9931
9932
  if (needs_control_head)
9933
    {
9934
    SLJIT_ASSERT(extrasize == 2);
9935
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9936
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9937
    }
9938
  }
9939
else
9940
  {
9941
  extrasize = needs_control_head ? 3 : 2;
9942
  allocate_stack(common, framesize + extrasize);
9943
9944
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9945
  OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9946
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9947
  if (needs_control_head)
9948
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9949
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9950
9951
  if (needs_control_head)
9952
    {
9953
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9954
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9955
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9956
    }
9957
  else
9958
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9959
9960
  init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9961
  }
9962
9963
memset(&altbacktrack, 0, sizeof(backtrack_common));
9964
if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9965
  {
9966
  /* Control verbs cannot escape from these asserts. */
9967
  local_quit_available = TRUE;
9968
  common->local_quit_available = TRUE;
9969
  common->quit_label = NULL;
9970
  common->quit = NULL;
9971
  }
9972
9973
common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9974
common->positive_assertion_quit = NULL;
9975
9976
while (1)
9977
  {
9978
  common->accept_label = NULL;
9979
  common->accept = NULL;
9980
  altbacktrack.top = NULL;
9981
  altbacktrack.topbacktracks = NULL;
9982
9983
  if (*ccbegin == OP_ALT && extrasize > 0)
9984
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9985
9986
  altbacktrack.cc = ccbegin;
9987
  compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9988
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9989
    {
9990
    if (local_quit_available)
9991
      {
9992
      common->local_quit_available = save_local_quit_available;
9993
      common->quit_label = save_quit_label;
9994
      common->quit = save_quit;
9995
      }
9996
    common->in_positive_assertion = save_in_positive_assertion;
9997
    common->then_trap = save_then_trap;
9998
    common->accept_label = save_accept_label;
9999
    common->positive_assertion_quit = save_positive_assertion_quit;
10000
    common->accept = save_accept;
10001
    return NULL;
10002
    }
10003
  common->accept_label = LABEL();
10004
  if (common->accept != NULL)
10005
    set_jumps(common->accept, common->accept_label);
10006
10007
  /* Reset stack. */
10008
  if (framesize < 0)
10009
    {
10010
    if (framesize == no_frame)
10011
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10012
    else if (extrasize > 0)
10013
      free_stack(common, extrasize);
10014
10015
    if (needs_control_head)
10016
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10017
    }
10018
  else
10019
    {
10020
    if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
10021
      {
10022
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10023
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10024
      if (needs_control_head)
10025
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10026
      }
10027
    else
10028
      {
10029
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10030
      if (needs_control_head)
10031
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
10032
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10033
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10034
      }
10035
    }
10036
10037
  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
10038
    {
10039
    /* We know that STR_PTR was stored on the top of the stack. */
10040
    if (conditional)
10041
      {
10042
      if (extrasize > 0)
10043
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
10044
      }
10045
    else if (bra == OP_BRAZERO)
10046
      {
10047
      if (framesize < 0)
10048
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10049
      else
10050
        {
10051
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10052
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
10053
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10054
        }
10055
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10056
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10057
      }
10058
    else if (framesize >= 0)
10059
      {
10060
      /* For OP_BRA and OP_BRAMINZERO. */
10061
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10062
      }
10063
    }
10064
  add_jump(compiler, found, JUMP(SLJIT_JUMP));
10065
10066
  compile_backtrackingpath(common, altbacktrack.top);
10067
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10068
    {
10069
    if (local_quit_available)
10070
      {
10071
      common->local_quit_available = save_local_quit_available;
10072
      common->quit_label = save_quit_label;
10073
      common->quit = save_quit;
10074
      }
10075
    common->in_positive_assertion = save_in_positive_assertion;
10076
    common->then_trap = save_then_trap;
10077
    common->accept_label = save_accept_label;
10078
    common->positive_assertion_quit = save_positive_assertion_quit;
10079
    common->accept = save_accept;
10080
    return NULL;
10081
    }
10082
  set_jumps(altbacktrack.topbacktracks, LABEL());
10083
10084
  if (*cc != OP_ALT)
10085
    break;
10086
10087
  ccbegin = cc;
10088
  cc += GET(cc, 1);
10089
  }
10090
10091
if (local_quit_available)
10092
  {
10093
  SLJIT_ASSERT(common->positive_assertion_quit == NULL);
10094
  /* Makes the check less complicated below. */
10095
  common->positive_assertion_quit = common->quit;
10096
  }
10097
10098
/* None of them matched. */
10099
if (common->positive_assertion_quit != NULL)
10100
  {
10101
  jump = JUMP(SLJIT_JUMP);
10102
  set_jumps(common->positive_assertion_quit, LABEL());
10103
  SLJIT_ASSERT(framesize != no_stack);
10104
  if (framesize < 0)
10105
    OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
10106
  else
10107
    {
10108
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10109
    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10110
    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
10111
    }
10112
  JUMPHERE(jump);
10113
  }
10114
10115
if (needs_control_head)
10116
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
10117
10118
if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
10119
  {
10120
  /* Assert is failed. */
10121
  if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
10122
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10123
10124
  if (framesize < 0)
10125
    {
10126
    /* The topmost item should be 0. */
10127
    if (bra == OP_BRAZERO)
10128
      {
10129
      if (extrasize == 2)
10130
        free_stack(common, 1);
10131
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10132
      }
10133
    else if (extrasize > 0)
10134
      free_stack(common, extrasize);
10135
    }
10136
  else
10137
    {
10138
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10139
    /* The topmost item should be 0. */
10140
    if (bra == OP_BRAZERO)
10141
      {
10142
      free_stack(common, framesize + extrasize - 1);
10143
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10144
      }
10145
    else
10146
      free_stack(common, framesize + extrasize);
10147
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10148
    }
10149
  jump = JUMP(SLJIT_JUMP);
10150
  if (bra != OP_BRAZERO)
10151
    add_jump(compiler, target, jump);
10152
10153
  /* Assert is successful. */
10154
  set_jumps(tmp, LABEL());
10155
  if (framesize < 0)
10156
    {
10157
    /* We know that STR_PTR was stored on the top of the stack. */
10158
    if (extrasize > 0)
10159
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10160
10161
    /* Keep the STR_PTR on the top of the stack. */
10162
    if (bra == OP_BRAZERO)
10163
      {
10164
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10165
      if (extrasize == 2)
10166
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10167
      }
10168
    else if (bra == OP_BRAMINZERO)
10169
      {
10170
      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10171
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10172
      }
10173
    }
10174
  else
10175
    {
10176
    if (bra == OP_BRA)
10177
      {
10178
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10179
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10180
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10181
      }
10182
    else
10183
      {
10184
      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10185
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
10186
      if (extrasize == 2)
10187
        {
10188
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10189
        if (bra == OP_BRAMINZERO)
10190
          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10191
        }
10192
      else
10193
        {
10194
        SLJIT_ASSERT(extrasize == 3);
10195
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10196
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10197
        }
10198
      }
10199
    }
10200
10201
  if (bra == OP_BRAZERO)
10202
    {
10203
    backtrack->matchingpath = LABEL();
10204
    SET_LABEL(jump, backtrack->matchingpath);
10205
    }
10206
  else if (bra == OP_BRAMINZERO)
10207
    {
10208
    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10209
    JUMPHERE(brajump);
10210
    if (framesize >= 0)
10211
      {
10212
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10213
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10214
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10215
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10216
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10217
      }
10218
    set_jumps(backtrack->common.topbacktracks, LABEL());
10219
    }
10220
  }
10221
else
10222
  {
10223
  /* AssertNot is successful. */
10224
  if (framesize < 0)
10225
    {
10226
    if (extrasize > 0)
10227
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10228
10229
    if (bra != OP_BRA)
10230
      {
10231
      if (extrasize == 2)
10232
        free_stack(common, 1);
10233
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10234
      }
10235
    else if (extrasize > 0)
10236
      free_stack(common, extrasize);
10237
    }
10238
  else
10239
    {
10240
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10241
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10242
    /* The topmost item should be 0. */
10243
    if (bra != OP_BRA)
10244
      {
10245
      free_stack(common, framesize + extrasize - 1);
10246
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10247
      }
10248
    else
10249
      free_stack(common, framesize + extrasize);
10250
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10251
    }
10252
10253
  if (bra == OP_BRAZERO)
10254
    backtrack->matchingpath = LABEL();
10255
  else if (bra == OP_BRAMINZERO)
10256
    {
10257
    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10258
    JUMPHERE(brajump);
10259
    }
10260
10261
  if (bra != OP_BRA)
10262
    {
10263
    SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10264
    set_jumps(backtrack->common.topbacktracks, LABEL());
10265
    backtrack->common.topbacktracks = NULL;
10266
    }
10267
  }
10268
10269
if (local_quit_available)
10270
  {
10271
  common->local_quit_available = save_local_quit_available;
10272
  common->quit_label = save_quit_label;
10273
  common->quit = save_quit;
10274
  }
10275
common->in_positive_assertion = save_in_positive_assertion;
10276
common->then_trap = save_then_trap;
10277
common->accept_label = save_accept_label;
10278
common->positive_assertion_quit = save_positive_assertion_quit;
10279
common->accept = save_accept;
10280
return cc + 1 + LINK_SIZE;
10281
}
10282
10283
static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10284
{
10285
DEFINE_COMPILER;
10286
int stacksize;
10287
10288
if (framesize < 0)
10289
  {
10290
  if (framesize == no_frame)
10291
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10292
  else
10293
    {
10294
    stacksize = needs_control_head ? 1 : 0;
10295
    if (ket != OP_KET || has_alternatives)
10296
      stacksize++;
10297
10298
    if (stacksize > 0)
10299
      free_stack(common, stacksize);
10300
    }
10301
10302
  if (needs_control_head)
10303
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10304
10305
  /* TMP2 which is set here used by OP_KETRMAX below. */
10306
  if (ket == OP_KETRMAX)
10307
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10308
  else if (ket == OP_KETRMIN)
10309
    {
10310
    /* Move the STR_PTR to the private_data_ptr. */
10311
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10312
    }
10313
  }
10314
else
10315
  {
10316
  stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10317
  OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10318
  if (needs_control_head)
10319
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10320
10321
  if (ket == OP_KETRMAX)
10322
    {
10323
    /* TMP2 which is set here used by OP_KETRMAX below. */
10324
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10325
    }
10326
  }
10327
if (needs_control_head)
10328
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10329
}
10330
10331
static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10332
{
10333
DEFINE_COMPILER;
10334
10335
if (common->capture_last_ptr != 0)
10336
  {
10337
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10338
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10339
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10340
  stacksize++;
10341
  }
10342
if (common->optimized_cbracket[offset >> 1] == 0)
10343
  {
10344
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10345
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10346
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10347
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10348
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10349
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10350
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10351
  stacksize += 2;
10352
  }
10353
return stacksize;
10354
}
10355
10356
static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10357
{
10358
  if (PRIV(script_run)(ptr, endptr, FALSE))
10359
    return endptr;
10360
  return NULL;
10361
}
10362
10363
#ifdef SUPPORT_UNICODE
10364
10365
static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10366
{
10367
  if (PRIV(script_run)(ptr, endptr, TRUE))
10368
    return endptr;
10369
  return NULL;
10370
}
10371
10372
#endif /* SUPPORT_UNICODE */
10373
10374
static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10375
{
10376
DEFINE_COMPILER;
10377
10378
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10379
10380
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10381
#ifdef SUPPORT_UNICODE
10382
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
10383
  common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
10384
#else
10385
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
10386
#endif
10387
10388
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10389
add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10390
}
10391
10392
/*
10393
  Handling bracketed expressions is probably the most complex part.
10394
10395
  Stack layout naming characters:
10396
    S - Push the current STR_PTR
10397
    0 - Push a 0 (NULL)
10398
    A - Push the current STR_PTR. Needed for restoring the STR_PTR
10399
        before the next alternative. Not pushed if there are no alternatives.
10400
    M - Any values pushed by the current alternative. Can be empty, or anything.
10401
    C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10402
    L - Push the previous local (pointed by localptr) to the stack
10403
   () - opional values stored on the stack
10404
  ()* - optonal, can be stored multiple times
10405
10406
  The following list shows the regular expression templates, their PCRE byte codes
10407
  and stack layout supported by pcre-sljit.
10408
10409
  (?:)                     OP_BRA     | OP_KET                A M
10410
  ()                       OP_CBRA    | OP_KET                C M
10411
  (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
10412
                           OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
10413
  (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
10414
                           OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
10415
  ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
10416
                           OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
10417
  ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
10418
                           OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
10419
  (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
10420
  (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
10421
  ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
10422
  ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
10423
  (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
10424
           OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
10425
  (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
10426
           OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
10427
  ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
10428
           OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
10429
  ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
10430
           OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
10431
10432
10433
  Stack layout naming characters:
10434
    A - Push the alternative index (starting from 0) on the stack.
10435
        Not pushed if there is no alternatives.
10436
    M - Any values pushed by the current alternative. Can be empty, or anything.
10437
10438
  The next list shows the possible content of a bracket:
10439
  (|)     OP_*BRA    | OP_ALT ...         M A
10440
  (?()|)  OP_*COND   | OP_ALT             M A
10441
  (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
10442
                                          Or nothing, if trace is unnecessary
10443
*/
10444
10445
static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10446
{
10447
DEFINE_COMPILER;
10448
backtrack_common *backtrack;
10449
PCRE2_UCHAR opcode;
10450
int private_data_ptr = 0;
10451
int offset = 0;
10452
int i, stacksize;
10453
int repeat_ptr = 0, repeat_length = 0;
10454
int repeat_type = 0, repeat_count = 0;
10455
PCRE2_SPTR ccbegin;
10456
PCRE2_SPTR matchingpath;
10457
PCRE2_SPTR slot;
10458
PCRE2_UCHAR bra = OP_BRA;
10459
PCRE2_UCHAR ket;
10460
assert_backtrack *assert;
10461
BOOL has_alternatives;
10462
BOOL needs_control_head = FALSE;
10463
struct sljit_jump *jump;
10464
struct sljit_jump *skip;
10465
struct sljit_label *rmax_label = NULL;
10466
struct sljit_jump *braminzero = NULL;
10467
10468
PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10469
10470
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10471
  {
10472
  bra = *cc;
10473
  cc++;
10474
  opcode = *cc;
10475
  }
10476
10477
opcode = *cc;
10478
ccbegin = cc;
10479
matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10480
ket = *matchingpath;
10481
if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10482
  {
10483
  repeat_ptr = PRIVATE_DATA(matchingpath);
10484
  repeat_length = PRIVATE_DATA(matchingpath + 1);
10485
  repeat_type = PRIVATE_DATA(matchingpath + 2);
10486
  repeat_count = PRIVATE_DATA(matchingpath + 3);
10487
  SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10488
  if (repeat_type == OP_UPTO)
10489
    ket = OP_KETRMAX;
10490
  if (repeat_type == OP_MINUPTO)
10491
    ket = OP_KETRMIN;
10492
  }
10493
10494
matchingpath = ccbegin + 1 + LINK_SIZE;
10495
SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10496
SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10497
cc += GET(cc, 1);
10498
10499
has_alternatives = *cc == OP_ALT;
10500
if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10501
  {
10502
  SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10503
    compile_time_checks_must_be_grouped_together);
10504
  has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10505
  }
10506
10507
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10508
  opcode = OP_SCOND;
10509
10510
if (opcode == OP_CBRA || opcode == OP_SCBRA)
10511
  {
10512
  /* Capturing brackets has a pre-allocated space. */
10513
  offset = GET2(ccbegin, 1 + LINK_SIZE);
10514
  if (common->optimized_cbracket[offset] == 0)
10515
    {
10516
    private_data_ptr = OVECTOR_PRIV(offset);
10517
    offset <<= 1;
10518
    }
10519
  else
10520
    {
10521
    offset <<= 1;
10522
    private_data_ptr = OVECTOR(offset);
10523
    }
10524
  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10525
  matchingpath += IMM2_SIZE;
10526
  }
10527
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10528
  {
10529
  /* Other brackets simply allocate the next entry. */
10530
  private_data_ptr = PRIVATE_DATA(ccbegin);
10531
  SLJIT_ASSERT(private_data_ptr != 0);
10532
  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10533
  if (opcode == OP_ONCE)
10534
    BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10535
  }
10536
10537
/* Instructions before the first alternative. */
10538
stacksize = 0;
10539
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10540
  stacksize++;
10541
if (bra == OP_BRAZERO)
10542
  stacksize++;
10543
10544
if (stacksize > 0)
10545
  allocate_stack(common, stacksize);
10546
10547
stacksize = 0;
10548
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10549
  {
10550
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10551
  stacksize++;
10552
  }
10553
10554
if (bra == OP_BRAZERO)
10555
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10556
10557
if (bra == OP_BRAMINZERO)
10558
  {
10559
  /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10560
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10561
  if (ket != OP_KETRMIN)
10562
    {
10563
    free_stack(common, 1);
10564
    braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10565
    }
10566
  else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10567
    {
10568
    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10569
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10570
    /* Nothing stored during the first run. */
10571
    skip = JUMP(SLJIT_JUMP);
10572
    JUMPHERE(jump);
10573
    /* Checking zero-length iteration. */
10574
    if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10575
      {
10576
      /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10577
      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10578
      }
10579
    else
10580
      {
10581
      /* Except when the whole stack frame must be saved. */
10582
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10583
      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10584
      }
10585
    JUMPHERE(skip);
10586
    }
10587
  else
10588
    {
10589
    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10590
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10591
    JUMPHERE(jump);
10592
    }
10593
  }
10594
10595
if (repeat_type != 0)
10596
  {
10597
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10598
  if (repeat_type == OP_EXACT)
10599
    rmax_label = LABEL();
10600
  }
10601
10602
if (ket == OP_KETRMIN)
10603
  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10604
10605
if (ket == OP_KETRMAX)
10606
  {
10607
  rmax_label = LABEL();
10608
  if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10609
    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10610
  }
10611
10612
/* Handling capturing brackets and alternatives. */
10613
if (opcode == OP_ONCE)
10614
  {
10615
  stacksize = 0;
10616
  if (needs_control_head)
10617
    {
10618
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10619
    stacksize++;
10620
    }
10621
10622
  if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10623
    {
10624
    /* Neither capturing brackets nor recursions are found in the block. */
10625
    if (ket == OP_KETRMIN)
10626
      {
10627
      stacksize += 2;
10628
      if (!needs_control_head)
10629
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10630
      }
10631
    else
10632
      {
10633
      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10634
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10635
      if (ket == OP_KETRMAX || has_alternatives)
10636
        stacksize++;
10637
      }
10638
10639
    if (stacksize > 0)
10640
      allocate_stack(common, stacksize);
10641
10642
    stacksize = 0;
10643
    if (needs_control_head)
10644
      {
10645
      stacksize++;
10646
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10647
      }
10648
10649
    if (ket == OP_KETRMIN)
10650
      {
10651
      if (needs_control_head)
10652
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10653
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10654
      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10655
        OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10656
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10657
      }
10658
    else if (ket == OP_KETRMAX || has_alternatives)
10659
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10660
    }
10661
  else
10662
    {
10663
    if (ket != OP_KET || has_alternatives)
10664
      stacksize++;
10665
10666
    stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10667
    allocate_stack(common, stacksize);
10668
10669
    if (needs_control_head)
10670
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10671
10672
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10673
    OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10674
10675
    stacksize = needs_control_head ? 1 : 0;
10676
    if (ket != OP_KET || has_alternatives)
10677
      {
10678
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10679
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10680
      stacksize++;
10681
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10682
      }
10683
    else
10684
      {
10685
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10686
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10687
      }
10688
    init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10689
    }
10690
  }
10691
else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10692
  {
10693
  /* Saving the previous values. */
10694
  if (common->optimized_cbracket[offset >> 1] != 0)
10695
    {
10696
    SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10697
    allocate_stack(common, 2);
10698
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10699
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10700
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10701
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10702
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10703
    }
10704
  else
10705
    {
10706
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10707
    allocate_stack(common, 1);
10708
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10709
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10710
    }
10711
  }
10712
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10713
  {
10714
  /* Saving the previous value. */
10715
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10716
  allocate_stack(common, 1);
10717
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10718
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10719
  }
10720
else if (has_alternatives)
10721
  {
10722
  /* Pushing the starting string pointer. */
10723
  allocate_stack(common, 1);
10724
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10725
  }
10726
10727
/* Generating code for the first alternative. */
10728
if (opcode == OP_COND || opcode == OP_SCOND)
10729
  {
10730
  if (*matchingpath == OP_CREF)
10731
    {
10732
    SLJIT_ASSERT(has_alternatives);
10733
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10734
      CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10735
    matchingpath += 1 + IMM2_SIZE;
10736
    }
10737
  else if (*matchingpath == OP_DNCREF)
10738
    {
10739
    SLJIT_ASSERT(has_alternatives);
10740
10741
    i = GET2(matchingpath, 1 + IMM2_SIZE);
10742
    slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10743
    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10744
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10745
    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10746
    slot += common->name_entry_size;
10747
    i--;
10748
    while (i-- > 0)
10749
      {
10750
      OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10751
      OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10752
      slot += common->name_entry_size;
10753
      }
10754
    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10755
    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10756
    matchingpath += 1 + 2 * IMM2_SIZE;
10757
    }
10758
  else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10759
    {
10760
    /* Never has other case. */
10761
    BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10762
    SLJIT_ASSERT(!has_alternatives);
10763
10764
    if (*matchingpath == OP_TRUE)
10765
      {
10766
      stacksize = 1;
10767
      matchingpath++;
10768
      }
10769
    else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10770
      stacksize = 0;
10771
    else if (*matchingpath == OP_RREF)
10772
      {
10773
      stacksize = GET2(matchingpath, 1);
10774
      if (common->currententry == NULL)
10775
        stacksize = 0;
10776
      else if (stacksize == RREF_ANY)
10777
        stacksize = 1;
10778
      else if (common->currententry->start == 0)
10779
        stacksize = stacksize == 0;
10780
      else
10781
        stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10782
10783
      if (stacksize != 0)
10784
        matchingpath += 1 + IMM2_SIZE;
10785
      }
10786
    else
10787
      {
10788
      if (common->currententry == NULL || common->currententry->start == 0)
10789
        stacksize = 0;
10790
      else
10791
        {
10792
        stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10793
        slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10794
        i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10795
        while (stacksize > 0)
10796
          {
10797
          if ((int)GET2(slot, 0) == i)
10798
            break;
10799
          slot += common->name_entry_size;
10800
          stacksize--;
10801
          }
10802
        }
10803
10804
      if (stacksize != 0)
10805
        matchingpath += 1 + 2 * IMM2_SIZE;
10806
      }
10807
10808
      /* The stacksize == 0 is a common "else" case. */
10809
      if (stacksize == 0)
10810
        {
10811
        if (*cc == OP_ALT)
10812
          {
10813
          matchingpath = cc + 1 + LINK_SIZE;
10814
          cc += GET(cc, 1);
10815
          }
10816
        else
10817
          matchingpath = cc;
10818
        }
10819
    }
10820
  else
10821
    {
10822
    SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10823
    /* Similar code as PUSH_BACKTRACK macro. */
10824
    assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10825
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10826
      return NULL;
10827
    memset(assert, 0, sizeof(assert_backtrack));
10828
    assert->common.cc = matchingpath;
10829
    BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10830
    matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10831
    }
10832
  }
10833
10834
compile_matchingpath(common, matchingpath, cc, backtrack);
10835
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10836
  return NULL;
10837
10838
if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10839
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10840
10841
if (opcode == OP_ONCE)
10842
  match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10843
10844
if (opcode == OP_SCRIPT_RUN)
10845
  match_script_run_common(common, private_data_ptr, backtrack);
10846
10847
stacksize = 0;
10848
if (repeat_type == OP_MINUPTO)
10849
  {
10850
  /* We need to preserve the counter. TMP2 will be used below. */
10851
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10852
  stacksize++;
10853
  }
10854
if (ket != OP_KET || bra != OP_BRA)
10855
  stacksize++;
10856
if (offset != 0)
10857
  {
10858
  if (common->capture_last_ptr != 0)
10859
    stacksize++;
10860
  if (common->optimized_cbracket[offset >> 1] == 0)
10861
    stacksize += 2;
10862
  }
10863
if (has_alternatives && opcode != OP_ONCE)
10864
  stacksize++;
10865
10866
if (stacksize > 0)
10867
  allocate_stack(common, stacksize);
10868
10869
stacksize = 0;
10870
if (repeat_type == OP_MINUPTO)
10871
  {
10872
  /* TMP2 was set above. */
10873
  OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10874
  stacksize++;
10875
  }
10876
10877
if (ket != OP_KET || bra != OP_BRA)
10878
  {
10879
  if (ket != OP_KET)
10880
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10881
  else
10882
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10883
  stacksize++;
10884
  }
10885
10886
if (offset != 0)
10887
  stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10888
10889
/* Skip and count the other alternatives. */
10890
i = 1;
10891
while (*cc == OP_ALT)
10892
  {
10893
  cc += GET(cc, 1);
10894
  i++;
10895
  }
10896
10897
if (has_alternatives)
10898
  {
10899
  if (opcode != OP_ONCE)
10900
    {
10901
    if (i <= 3)
10902
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10903
    else
10904
      BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10905
    }
10906
  if (ket != OP_KETRMAX)
10907
    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10908
  }
10909
10910
/* Must be after the matchingpath label. */
10911
if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10912
  {
10913
  SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10914
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10915
  }
10916
10917
if (ket == OP_KETRMAX)
10918
  {
10919
  if (repeat_type != 0)
10920
    {
10921
    if (has_alternatives)
10922
      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10923
    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10924
    JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10925
    /* Drop STR_PTR for greedy plus quantifier. */
10926
    if (opcode != OP_ONCE)
10927
      free_stack(common, 1);
10928
    }
10929
  else if (opcode < OP_BRA || opcode >= OP_SBRA)
10930
    {
10931
    if (has_alternatives)
10932
      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10933
10934
    /* Checking zero-length iteration. */
10935
    if (opcode != OP_ONCE)
10936
      {
10937
      /* This case includes opcodes such as OP_SCRIPT_RUN. */
10938
      CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10939
      /* Drop STR_PTR for greedy plus quantifier. */
10940
      if (bra != OP_BRAZERO)
10941
        free_stack(common, 1);
10942
      }
10943
    else
10944
      /* TMP2 must contain the starting STR_PTR. */
10945
      CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10946
    }
10947
  else
10948
    JUMPTO(SLJIT_JUMP, rmax_label);
10949
  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10950
  }
10951
10952
if (repeat_type == OP_EXACT)
10953
  {
10954
  count_match(common);
10955
  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10956
  JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10957
  }
10958
else if (repeat_type == OP_UPTO)
10959
  {
10960
  /* We need to preserve the counter. */
10961
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10962
  allocate_stack(common, 1);
10963
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10964
  }
10965
10966
if (bra == OP_BRAZERO)
10967
  BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10968
10969
if (bra == OP_BRAMINZERO)
10970
  {
10971
  /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10972
  JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10973
  if (braminzero != NULL)
10974
    {
10975
    JUMPHERE(braminzero);
10976
    /* We need to release the end pointer to perform the
10977
    backtrack for the zero-length iteration. When
10978
    framesize is < 0, OP_ONCE will do the release itself. */
10979
    if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10980
      {
10981
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10982
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10983
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10984
      }
10985
    else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10986
      free_stack(common, 1);
10987
    }
10988
  /* Continue to the normal backtrack. */
10989
  }
10990
10991
if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10992
  count_match(common);
10993
10994
cc += 1 + LINK_SIZE;
10995
10996
if (opcode == OP_ONCE)
10997
  {
10998
  /* We temporarily encode the needs_control_head in the lowest bit.
10999
     Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
11000
     the same value for small signed numbers (including negative numbers). */
11001
  BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
11002
  }
11003
return cc + repeat_length;
11004
}
11005
11006
static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11007
{
11008
DEFINE_COMPILER;
11009
backtrack_common *backtrack;
11010
PCRE2_UCHAR opcode;
11011
int private_data_ptr;
11012
int cbraprivptr = 0;
11013
BOOL needs_control_head;
11014
int framesize;
11015
int stacksize;
11016
int offset = 0;
11017
BOOL zero = FALSE;
11018
PCRE2_SPTR ccbegin = NULL;
11019
int stack; /* Also contains the offset of control head. */
11020
struct sljit_label *loop = NULL;
11021
struct jump_list *emptymatch = NULL;
11022
11023
PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
11024
if (*cc == OP_BRAPOSZERO)
11025
  {
11026
  zero = TRUE;
11027
  cc++;
11028
  }
11029
11030
opcode = *cc;
11031
private_data_ptr = PRIVATE_DATA(cc);
11032
SLJIT_ASSERT(private_data_ptr != 0);
11033
BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
11034
switch(opcode)
11035
  {
11036
  case OP_BRAPOS:
11037
  case OP_SBRAPOS:
11038
  ccbegin = cc + 1 + LINK_SIZE;
11039
  break;
11040
11041
  case OP_CBRAPOS:
11042
  case OP_SCBRAPOS:
11043
  offset = GET2(cc, 1 + LINK_SIZE);
11044
  /* This case cannot be optimized in the same was as
11045
  normal capturing brackets. */
11046
  SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
11047
  cbraprivptr = OVECTOR_PRIV(offset);
11048
  offset <<= 1;
11049
  ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
11050
  break;
11051
11052
  default:
11053
  SLJIT_UNREACHABLE();
11054
  break;
11055
  }
11056
11057
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
11058
BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
11059
if (framesize < 0)
11060
  {
11061
  if (offset != 0)
11062
    {
11063
    stacksize = 2;
11064
    if (common->capture_last_ptr != 0)
11065
      stacksize++;
11066
    }
11067
  else
11068
    stacksize = 1;
11069
11070
  if (needs_control_head)
11071
    stacksize++;
11072
  if (!zero)
11073
    stacksize++;
11074
11075
  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11076
  allocate_stack(common, stacksize);
11077
  if (framesize == no_frame)
11078
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
11079
11080
  stack = 0;
11081
  if (offset != 0)
11082
    {
11083
    stack = 2;
11084
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
11085
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
11086
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11087
    if (common->capture_last_ptr != 0)
11088
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
11089
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11090
    if (needs_control_head)
11091
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11092
    if (common->capture_last_ptr != 0)
11093
      {
11094
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
11095
      stack = 3;
11096
      }
11097
    }
11098
  else
11099
    {
11100
    if (needs_control_head)
11101
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11102
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11103
    stack = 1;
11104
    }
11105
11106
  if (needs_control_head)
11107
    stack++;
11108
  if (!zero)
11109
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
11110
  if (needs_control_head)
11111
    {
11112
    stack--;
11113
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11114
    }
11115
  }
11116
else
11117
  {
11118
  stacksize = framesize + 1;
11119
  if (!zero)
11120
    stacksize++;
11121
  if (needs_control_head)
11122
    stacksize++;
11123
  if (offset == 0)
11124
    stacksize++;
11125
  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11126
11127
  allocate_stack(common, stacksize);
11128
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11129
  if (needs_control_head)
11130
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11131
  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11132
11133
  stack = 0;
11134
  if (!zero)
11135
    {
11136
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11137
    stack = 1;
11138
    }
11139
  if (needs_control_head)
11140
    {
11141
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11142
    stack++;
11143
    }
11144
  if (offset == 0)
11145
    {
11146
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11147
    stack++;
11148
    }
11149
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11150
  init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11151
  stack -= 1 + (offset == 0);
11152
  }
11153
11154
if (offset != 0)
11155
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11156
11157
loop = LABEL();
11158
while (*cc != OP_KETRPOS)
11159
  {
11160
  backtrack->top = NULL;
11161
  backtrack->topbacktracks = NULL;
11162
  cc += GET(cc, 1);
11163
11164
  compile_matchingpath(common, ccbegin, cc, backtrack);
11165
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11166
    return NULL;
11167
11168
  if (framesize < 0)
11169
    {
11170
    if (framesize == no_frame)
11171
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11172
11173
    if (offset != 0)
11174
      {
11175
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11176
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11177
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11178
      if (common->capture_last_ptr != 0)
11179
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11180
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11181
      }
11182
    else
11183
      {
11184
      if (opcode == OP_SBRAPOS)
11185
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11186
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11187
      }
11188
11189
    /* Even if the match is empty, we need to reset the control head. */
11190
    if (needs_control_head)
11191
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11192
11193
    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11194
      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11195
11196
    if (!zero)
11197
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11198
    }
11199
  else
11200
    {
11201
    if (offset != 0)
11202
      {
11203
      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11204
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11205
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11206
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11207
      if (common->capture_last_ptr != 0)
11208
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11209
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11210
      }
11211
    else
11212
      {
11213
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11214
      OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11215
      if (opcode == OP_SBRAPOS)
11216
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11217
      OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11218
      }
11219
11220
    /* Even if the match is empty, we need to reset the control head. */
11221
    if (needs_control_head)
11222
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11223
11224
    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11225
      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11226
11227
    if (!zero)
11228
      {
11229
      if (framesize < 0)
11230
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11231
      else
11232
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11233
      }
11234
    }
11235
11236
  JUMPTO(SLJIT_JUMP, loop);
11237
  flush_stubs(common);
11238
11239
  compile_backtrackingpath(common, backtrack->top);
11240
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11241
    return NULL;
11242
  set_jumps(backtrack->topbacktracks, LABEL());
11243
11244
  if (framesize < 0)
11245
    {
11246
    if (offset != 0)
11247
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11248
    else
11249
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11250
    }
11251
  else
11252
    {
11253
    if (offset != 0)
11254
      {
11255
      /* Last alternative. */
11256
      if (*cc == OP_KETRPOS)
11257
        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11258
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11259
      }
11260
    else
11261
      {
11262
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11263
      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11264
      }
11265
    }
11266
11267
  if (*cc == OP_KETRPOS)
11268
    break;
11269
  ccbegin = cc + 1 + LINK_SIZE;
11270
  }
11271
11272
/* We don't have to restore the control head in case of a failed match. */
11273
11274
backtrack->topbacktracks = NULL;
11275
if (!zero)
11276
  {
11277
  if (framesize < 0)
11278
    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11279
  else /* TMP2 is set to [private_data_ptr] above. */
11280
    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11281
  }
11282
11283
/* None of them matched. */
11284
set_jumps(emptymatch, LABEL());
11285
count_match(common);
11286
return cc + 1 + LINK_SIZE;
11287
}
11288
11289
static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11290
{
11291
int class_len;
11292
11293
*opcode = *cc;
11294
*exact = 0;
11295
11296
if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11297
  {
11298
  cc++;
11299
  *type = OP_CHAR;
11300
  }
11301
else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11302
  {
11303
  cc++;
11304
  *type = OP_CHARI;
11305
  *opcode -= OP_STARI - OP_STAR;
11306
  }
11307
else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11308
  {
11309
  cc++;
11310
  *type = OP_NOT;
11311
  *opcode -= OP_NOTSTAR - OP_STAR;
11312
  }
11313
else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11314
  {
11315
  cc++;
11316
  *type = OP_NOTI;
11317
  *opcode -= OP_NOTSTARI - OP_STAR;
11318
  }
11319
else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11320
  {
11321
  cc++;
11322
  *opcode -= OP_TYPESTAR - OP_STAR;
11323
  *type = OP_END;
11324
  }
11325
else
11326
  {
11327
  SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11328
  *type = *opcode;
11329
  cc++;
11330
  class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11331
  *opcode = cc[class_len - 1];
11332
11333
  if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11334
    {
11335
    *opcode -= OP_CRSTAR - OP_STAR;
11336
    *end = cc + class_len;
11337
11338
    if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11339
      {
11340
      *exact = 1;
11341
      *opcode -= OP_PLUS - OP_STAR;
11342
      }
11343
    }
11344
  else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11345
    {
11346
    *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11347
    *end = cc + class_len;
11348
11349
    if (*opcode == OP_POSPLUS)
11350
      {
11351
      *exact = 1;
11352
      *opcode = OP_POSSTAR;
11353
      }
11354
    }
11355
  else
11356
    {
11357
    SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11358
    *max = GET2(cc, (class_len + IMM2_SIZE));
11359
    *exact = GET2(cc, class_len);
11360
11361
    if (*max == 0)
11362
      {
11363
      if (*opcode == OP_CRPOSRANGE)
11364
        *opcode = OP_POSSTAR;
11365
      else
11366
        *opcode -= OP_CRRANGE - OP_STAR;
11367
      }
11368
    else
11369
      {
11370
      *max -= *exact;
11371
      if (*max == 0)
11372
        *opcode = OP_EXACT;
11373
      else if (*max == 1)
11374
        {
11375
        if (*opcode == OP_CRPOSRANGE)
11376
          *opcode = OP_POSQUERY;
11377
        else
11378
          *opcode -= OP_CRRANGE - OP_QUERY;
11379
        }
11380
      else
11381
        {
11382
        if (*opcode == OP_CRPOSRANGE)
11383
          *opcode = OP_POSUPTO;
11384
        else
11385
          *opcode -= OP_CRRANGE - OP_UPTO;
11386
        }
11387
      }
11388
    *end = cc + class_len + 2 * IMM2_SIZE;
11389
    }
11390
  return cc;
11391
  }
11392
11393
switch(*opcode)
11394
  {
11395
  case OP_EXACT:
11396
  *exact = GET2(cc, 0);
11397
  cc += IMM2_SIZE;
11398
  break;
11399
11400
  case OP_PLUS:
11401
  case OP_MINPLUS:
11402
  *exact = 1;
11403
  *opcode -= OP_PLUS - OP_STAR;
11404
  break;
11405
11406
  case OP_POSPLUS:
11407
  *exact = 1;
11408
  *opcode = OP_POSSTAR;
11409
  break;
11410
11411
  case OP_UPTO:
11412
  case OP_MINUPTO:
11413
  case OP_POSUPTO:
11414
  *max = GET2(cc, 0);
11415
  cc += IMM2_SIZE;
11416
  break;
11417
  }
11418
11419
if (*type == OP_END)
11420
  {
11421
  *type = *cc;
11422
  *end = next_opcode(common, cc);
11423
  cc++;
11424
  return cc;
11425
  }
11426
11427
*end = cc + 1;
11428
#ifdef SUPPORT_UNICODE
11429
if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11430
#endif
11431
return cc;
11432
}
11433
11434
static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11435
{
11436
DEFINE_COMPILER;
11437
backtrack_common *backtrack;
11438
PCRE2_UCHAR opcode;
11439
PCRE2_UCHAR type;
11440
sljit_u32 max = 0, exact;
11441
sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11442
sljit_s32 early_fail_type;
11443
BOOL charpos_enabled;
11444
PCRE2_UCHAR charpos_char;
11445
unsigned int charpos_othercasebit;
11446
PCRE2_SPTR end;
11447
jump_list *no_match = NULL;
11448
jump_list *no_char1_match = NULL;
11449
struct sljit_jump *jump = NULL;
11450
struct sljit_label *label;
11451
int private_data_ptr = PRIVATE_DATA(cc);
11452
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11453
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11454
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
11455
int tmp_base, tmp_offset;
11456
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11457
BOOL use_tmp;
11458
#endif
11459
11460
PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11461
11462
early_fail_type = (early_fail_ptr & 0x7);
11463
early_fail_ptr >>= 3;
11464
11465
/* During recursion, these optimizations are disabled. */
11466
if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11467
  {
11468
  early_fail_ptr = 0;
11469
  early_fail_type = type_skip;
11470
  }
11471
11472
SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11473
  || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11474
11475
if (early_fail_type == type_fail)
11476
  add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11477
11478
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11479
11480
if (type != OP_EXTUNI)
11481
  {
11482
  tmp_base = TMP3;
11483
  tmp_offset = 0;
11484
  }
11485
else
11486
  {
11487
  tmp_base = SLJIT_MEM1(SLJIT_SP);
11488
  tmp_offset = POSSESSIVE0;
11489
  }
11490
11491
/* Handle fixed part first. */
11492
if (exact > 1)
11493
  {
11494
  SLJIT_ASSERT(early_fail_ptr == 0);
11495
11496
  if (common->mode == PCRE2_JIT_COMPLETE
11497
#ifdef SUPPORT_UNICODE
11498
      && !common->utf
11499
#endif
11500
      && type != OP_ANYNL && type != OP_EXTUNI)
11501
    {
11502
    OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11503
    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11504
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11505
    label = LABEL();
11506
    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11507
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11508
    JUMPTO(SLJIT_NOT_ZERO, label);
11509
    }
11510
  else
11511
    {
11512
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11513
    label = LABEL();
11514
    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11515
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11516
    JUMPTO(SLJIT_NOT_ZERO, label);
11517
    }
11518
  }
11519
else if (exact == 1)
11520
  compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11521
11522
if (early_fail_type == type_fail_range)
11523
  {
11524
  /* Range end first, followed by range start. */
11525
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11526
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
11527
  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11528
  OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11529
  add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11530
11531
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11532
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
11533
  }
11534
11535
switch(opcode)
11536
  {
11537
  case OP_STAR:
11538
  case OP_UPTO:
11539
  SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11540
11541
  if (type == OP_ANYNL || type == OP_EXTUNI)
11542
    {
11543
    SLJIT_ASSERT(private_data_ptr == 0);
11544
    SLJIT_ASSERT(early_fail_ptr == 0);
11545
11546
    allocate_stack(common, 2);
11547
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11548
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11549
11550
    if (opcode == OP_UPTO)
11551
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11552
11553
    label = LABEL();
11554
    compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11555
    if (opcode == OP_UPTO)
11556
      {
11557
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11558
      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11559
      jump = JUMP(SLJIT_ZERO);
11560
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11561
      }
11562
11563
    /* We cannot use TMP3 because of allocate_stack. */
11564
    allocate_stack(common, 1);
11565
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11566
    JUMPTO(SLJIT_JUMP, label);
11567
    if (jump != NULL)
11568
      JUMPHERE(jump);
11569
    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11570
    break;
11571
    }
11572
#ifdef SUPPORT_UNICODE
11573
  else if (type == OP_ALLANY && !common->invalid_utf)
11574
#else
11575
  else if (type == OP_ALLANY)
11576
#endif
11577
    {
11578
    if (opcode == OP_STAR)
11579
      {
11580
      if (private_data_ptr == 0)
11581
        allocate_stack(common, 2);
11582
11583
      OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11584
      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11585
11586
      OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11587
      process_partial_match(common);
11588
11589
      if (early_fail_ptr != 0)
11590
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11591
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11592
      break;
11593
      }
11594
#ifdef SUPPORT_UNICODE
11595
    else if (!common->utf)
11596
#else
11597
    else
11598
#endif
11599
      {
11600
      if (private_data_ptr == 0)
11601
        allocate_stack(common, 2);
11602
11603
      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11604
      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11605
11606
      if (common->mode == PCRE2_JIT_COMPLETE)
11607
        {
11608
        OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11609
        CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11610
        }
11611
      else
11612
        {
11613
        jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11614
        process_partial_match(common);
11615
        JUMPHERE(jump);
11616
        }
11617
11618
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11619
11620
      if (early_fail_ptr != 0)
11621
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11622
      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11623
      break;
11624
      }
11625
    }
11626
11627
  charpos_enabled = FALSE;
11628
  charpos_char = 0;
11629
  charpos_othercasebit = 0;
11630
11631
  if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11632
    {
11633
#ifdef SUPPORT_UNICODE
11634
    charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11635
#else
11636
    charpos_enabled = TRUE;
11637
#endif
11638
    if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11639
      {
11640
      charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11641
      if (charpos_othercasebit == 0)
11642
        charpos_enabled = FALSE;
11643
      }
11644
11645
    if (charpos_enabled)
11646
      {
11647
      charpos_char = end[1];
11648
      /* Consume the OP_CHAR opcode. */
11649
      end += 2;
11650
#if PCRE2_CODE_UNIT_WIDTH == 8
11651
      SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11652
#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11653
      SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11654
      if ((charpos_othercasebit & 0x100) != 0)
11655
        charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11656
#endif
11657
      if (charpos_othercasebit != 0)
11658
        charpos_char |= charpos_othercasebit;
11659
11660
      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11661
      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11662
      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11663
      }
11664
    }
11665
11666
  if (charpos_enabled)
11667
    {
11668
    if (opcode == OP_UPTO)
11669
      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11670
11671
    /* Search the first instance of charpos_char. */
11672
    jump = JUMP(SLJIT_JUMP);
11673
    label = LABEL();
11674
    if (opcode == OP_UPTO)
11675
      {
11676
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11677
      add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11678
      }
11679
    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11680
    if (early_fail_ptr != 0)
11681
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11682
    JUMPHERE(jump);
11683
11684
    detect_partial_match(common, &backtrack->topbacktracks);
11685
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11686
    if (charpos_othercasebit != 0)
11687
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11688
    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11689
11690
    if (private_data_ptr == 0)
11691
      allocate_stack(common, 2);
11692
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11693
    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11694
11695
    if (opcode == OP_UPTO)
11696
      {
11697
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11698
      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11699
      }
11700
11701
    /* Search the last instance of charpos_char. */
11702
    label = LABEL();
11703
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11704
    if (early_fail_ptr != 0)
11705
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11706
    detect_partial_match(common, &no_match);
11707
    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11708
    if (charpos_othercasebit != 0)
11709
      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11710
11711
    if (opcode == OP_STAR)
11712
      {
11713
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11714
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11715
      JUMPTO(SLJIT_JUMP, label);
11716
      }
11717
    else
11718
      {
11719
      jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11720
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11721
      JUMPHERE(jump);
11722
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11723
      JUMPTO(SLJIT_NOT_ZERO, label);
11724
      }
11725
11726
    set_jumps(no_match, LABEL());
11727
    OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11728
    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11729
    }
11730
  else
11731
    {
11732
    if (private_data_ptr == 0)
11733
      allocate_stack(common, 2);
11734
11735
    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11736
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11737
    use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11738
    SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11739
11740
    if (common->utf)
11741
      OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11742
#endif
11743
    if (opcode == OP_UPTO)
11744
      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11745
11746
    detect_partial_match(common, &no_match);
11747
    label = LABEL();
11748
    compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11749
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11750
    if (common->utf)
11751
      OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11752
#endif
11753
11754
    if (opcode == OP_UPTO)
11755
      {
11756
      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11757
      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11758
      }
11759
11760
    detect_partial_match_to(common, label);
11761
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11762
11763
    set_jumps(no_char1_match, LABEL());
11764
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11765
    if (common->utf)
11766
      {
11767
      set_jumps(no_match, LABEL());
11768
      if (use_tmp)
11769
        {
11770
        OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11771
        OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11772
        }
11773
      else
11774
        OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11775
      }
11776
    else
11777
#endif
11778
      {
11779
      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11780
      set_jumps(no_match, LABEL());
11781
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11782
      }
11783
11784
    if (early_fail_ptr != 0)
11785
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11786
    }
11787
11788
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11789
  break;
11790
11791
  case OP_MINSTAR:
11792
  if (private_data_ptr == 0)
11793
    allocate_stack(common, 1);
11794
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11795
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11796
  if (early_fail_ptr != 0)
11797
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11798
  break;
11799
11800
  case OP_MINUPTO:
11801
  SLJIT_ASSERT(early_fail_ptr == 0);
11802
  if (private_data_ptr == 0)
11803
    allocate_stack(common, 2);
11804
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11805
  OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11806
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11807
  break;
11808
11809
  case OP_QUERY:
11810
  case OP_MINQUERY:
11811
  SLJIT_ASSERT(early_fail_ptr == 0);
11812
  if (private_data_ptr == 0)
11813
    allocate_stack(common, 1);
11814
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11815
  if (opcode == OP_QUERY)
11816
    compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11817
  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11818
  break;
11819
11820
  case OP_EXACT:
11821
  break;
11822
11823
  case OP_POSSTAR:
11824
#if defined SUPPORT_UNICODE
11825
  if (type == OP_ALLANY && !common->invalid_utf)
11826
#else
11827
  if (type == OP_ALLANY)
11828
#endif
11829
    {
11830
    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11831
    process_partial_match(common);
11832
    if (early_fail_ptr != 0)
11833
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11834
    break;
11835
    }
11836
11837
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11838
  if (common->utf)
11839
    {
11840
    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11841
    detect_partial_match(common, &no_match);
11842
    label = LABEL();
11843
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11844
    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11845
    detect_partial_match_to(common, label);
11846
11847
    set_jumps(no_match, LABEL());
11848
    OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11849
    if (early_fail_ptr != 0)
11850
      {
11851
      if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11852
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11853
      else
11854
        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11855
      }
11856
    break;
11857
    }
11858
#endif
11859
11860
  detect_partial_match(common, &no_match);
11861
  label = LABEL();
11862
  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11863
  detect_partial_match_to(common, label);
11864
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11865
11866
  set_jumps(no_char1_match, LABEL());
11867
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11868
  set_jumps(no_match, LABEL());
11869
  if (early_fail_ptr != 0)
11870
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11871
  break;
11872
11873
  case OP_POSUPTO:
11874
  SLJIT_ASSERT(early_fail_ptr == 0);
11875
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11876
  if (common->utf)
11877
    {
11878
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11879
    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11880
11881
    detect_partial_match(common, &no_match);
11882
    label = LABEL();
11883
    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11884
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11885
    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11886
    add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11887
    detect_partial_match_to(common, label);
11888
11889
    set_jumps(no_match, LABEL());
11890
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11891
    break;
11892
    }
11893
#endif
11894
11895
  if (type == OP_ALLANY)
11896
    {
11897
    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11898
11899
    if (common->mode == PCRE2_JIT_COMPLETE)
11900
      {
11901
      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11902
      CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11903
      }
11904
    else
11905
      {
11906
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11907
      process_partial_match(common);
11908
      JUMPHERE(jump);
11909
      }
11910
    break;
11911
    }
11912
11913
  OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11914
11915
  detect_partial_match(common, &no_match);
11916
  label = LABEL();
11917
  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11918
  OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11919
  add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11920
  detect_partial_match_to(common, label);
11921
  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11922
11923
  set_jumps(no_char1_match, LABEL());
11924
  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11925
  set_jumps(no_match, LABEL());
11926
  break;
11927
11928
  case OP_POSQUERY:
11929
  SLJIT_ASSERT(early_fail_ptr == 0);
11930
  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11931
  compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11932
  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11933
  set_jumps(no_match, LABEL());
11934
  OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11935
  break;
11936
11937
  default:
11938
  SLJIT_UNREACHABLE();
11939
  break;
11940
  }
11941
11942
count_match(common);
11943
return end;
11944
}
11945
11946
static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11947
{
11948
DEFINE_COMPILER;
11949
backtrack_common *backtrack;
11950
11951
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11952
11953
if (*cc == OP_FAIL)
11954
  {
11955
  add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11956
  return cc + 1;
11957
  }
11958
11959
if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11960
  add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11961
11962
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11963
  {
11964
  /* No need to check notempty conditions. */
11965
  if (common->accept_label == NULL)
11966
    add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11967
  else
11968
    JUMPTO(SLJIT_JUMP, common->accept_label);
11969
  return cc + 1;
11970
  }
11971
11972
if (common->accept_label == NULL)
11973
  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11974
else
11975
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11976
11977
if (HAS_VIRTUAL_REGISTERS)
11978
  {
11979
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11980
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11981
  }
11982
else
11983
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11984
11985
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11986
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11987
OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11988
if (common->accept_label == NULL)
11989
  add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11990
else
11991
  JUMPTO(SLJIT_ZERO, common->accept_label);
11992
11993
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11994
if (common->accept_label == NULL)
11995
  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11996
else
11997
  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11998
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11999
return cc + 1;
12000
}
12001
12002
static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
12003
{
12004
DEFINE_COMPILER;
12005
int offset = GET2(cc, 1);
12006
BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
12007
12008
/* Data will be discarded anyway... */
12009
if (common->currententry != NULL)
12010
  return cc + 1 + IMM2_SIZE;
12011
12012
if (!optimized_cbracket)
12013
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
12014
offset <<= 1;
12015
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12016
if (!optimized_cbracket)
12017
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12018
return cc + 1 + IMM2_SIZE;
12019
}
12020
12021
static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12022
{
12023
DEFINE_COMPILER;
12024
backtrack_common *backtrack;
12025
PCRE2_UCHAR opcode = *cc;
12026
PCRE2_SPTR ccend = cc + 1;
12027
12028
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
12029
    opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
12030
  ccend += 2 + cc[1];
12031
12032
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12033
12034
if (opcode == OP_SKIP)
12035
  {
12036
  allocate_stack(common, 1);
12037
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12038
  return ccend;
12039
  }
12040
12041
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
12042
  {
12043
  if (HAS_VIRTUAL_REGISTERS)
12044
    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12045
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12046
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12047
  OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12048
  }
12049
12050
return ccend;
12051
}
12052
12053
static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
12054
12055
static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12056
{
12057
DEFINE_COMPILER;
12058
backtrack_common *backtrack;
12059
BOOL needs_control_head;
12060
int size;
12061
12062
PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12063
common->then_trap = BACKTRACK_AS(then_trap_backtrack);
12064
BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12065
BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
12066
BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
12067
12068
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12069
size = 3 + (size < 0 ? 0 : size);
12070
12071
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12072
allocate_stack(common, size);
12073
if (size > 3)
12074
  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
12075
else
12076
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12077
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
12078
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
12079
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
12080
12081
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12082
if (size >= 0)
12083
  init_frame(common, cc, ccend, size - 1, 0);
12084
}
12085
12086
static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12087
{
12088
DEFINE_COMPILER;
12089
backtrack_common *backtrack;
12090
BOOL has_then_trap = FALSE;
12091
then_trap_backtrack *save_then_trap = NULL;
12092
12093
SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
12094
12095
if (common->has_then && common->then_offsets[cc - common->start] != 0)
12096
  {
12097
  SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
12098
  has_then_trap = TRUE;
12099
  save_then_trap = common->then_trap;
12100
  /* Tail item on backtrack. */
12101
  compile_then_trap_matchingpath(common, cc, ccend, parent);
12102
  }
12103
12104
while (cc < ccend)
12105
  {
12106
  switch(*cc)
12107
    {
12108
    case OP_SOD:
12109
    case OP_SOM:
12110
    case OP_NOT_WORD_BOUNDARY:
12111
    case OP_WORD_BOUNDARY:
12112
    case OP_EODN:
12113
    case OP_EOD:
12114
    case OP_DOLL:
12115
    case OP_DOLLM:
12116
    case OP_CIRC:
12117
    case OP_CIRCM:
12118
    case OP_REVERSE:
12119
    cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12120
    break;
12121
12122
    case OP_NOT_DIGIT:
12123
    case OP_DIGIT:
12124
    case OP_NOT_WHITESPACE:
12125
    case OP_WHITESPACE:
12126
    case OP_NOT_WORDCHAR:
12127
    case OP_WORDCHAR:
12128
    case OP_ANY:
12129
    case OP_ALLANY:
12130
    case OP_ANYBYTE:
12131
    case OP_NOTPROP:
12132
    case OP_PROP:
12133
    case OP_ANYNL:
12134
    case OP_NOT_HSPACE:
12135
    case OP_HSPACE:
12136
    case OP_NOT_VSPACE:
12137
    case OP_VSPACE:
12138
    case OP_EXTUNI:
12139
    case OP_NOT:
12140
    case OP_NOTI:
12141
    cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12142
    break;
12143
12144
    case OP_SET_SOM:
12145
    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12146
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12147
    allocate_stack(common, 1);
12148
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12149
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12150
    cc++;
12151
    break;
12152
12153
    case OP_CHAR:
12154
    case OP_CHARI:
12155
    if (common->mode == PCRE2_JIT_COMPLETE)
12156
      cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12157
    else
12158
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12159
    break;
12160
12161
    case OP_STAR:
12162
    case OP_MINSTAR:
12163
    case OP_PLUS:
12164
    case OP_MINPLUS:
12165
    case OP_QUERY:
12166
    case OP_MINQUERY:
12167
    case OP_UPTO:
12168
    case OP_MINUPTO:
12169
    case OP_EXACT:
12170
    case OP_POSSTAR:
12171
    case OP_POSPLUS:
12172
    case OP_POSQUERY:
12173
    case OP_POSUPTO:
12174
    case OP_STARI:
12175
    case OP_MINSTARI:
12176
    case OP_PLUSI:
12177
    case OP_MINPLUSI:
12178
    case OP_QUERYI:
12179
    case OP_MINQUERYI:
12180
    case OP_UPTOI:
12181
    case OP_MINUPTOI:
12182
    case OP_EXACTI:
12183
    case OP_POSSTARI:
12184
    case OP_POSPLUSI:
12185
    case OP_POSQUERYI:
12186
    case OP_POSUPTOI:
12187
    case OP_NOTSTAR:
12188
    case OP_NOTMINSTAR:
12189
    case OP_NOTPLUS:
12190
    case OP_NOTMINPLUS:
12191
    case OP_NOTQUERY:
12192
    case OP_NOTMINQUERY:
12193
    case OP_NOTUPTO:
12194
    case OP_NOTMINUPTO:
12195
    case OP_NOTEXACT:
12196
    case OP_NOTPOSSTAR:
12197
    case OP_NOTPOSPLUS:
12198
    case OP_NOTPOSQUERY:
12199
    case OP_NOTPOSUPTO:
12200
    case OP_NOTSTARI:
12201
    case OP_NOTMINSTARI:
12202
    case OP_NOTPLUSI:
12203
    case OP_NOTMINPLUSI:
12204
    case OP_NOTQUERYI:
12205
    case OP_NOTMINQUERYI:
12206
    case OP_NOTUPTOI:
12207
    case OP_NOTMINUPTOI:
12208
    case OP_NOTEXACTI:
12209
    case OP_NOTPOSSTARI:
12210
    case OP_NOTPOSPLUSI:
12211
    case OP_NOTPOSQUERYI:
12212
    case OP_NOTPOSUPTOI:
12213
    case OP_TYPESTAR:
12214
    case OP_TYPEMINSTAR:
12215
    case OP_TYPEPLUS:
12216
    case OP_TYPEMINPLUS:
12217
    case OP_TYPEQUERY:
12218
    case OP_TYPEMINQUERY:
12219
    case OP_TYPEUPTO:
12220
    case OP_TYPEMINUPTO:
12221
    case OP_TYPEEXACT:
12222
    case OP_TYPEPOSSTAR:
12223
    case OP_TYPEPOSPLUS:
12224
    case OP_TYPEPOSQUERY:
12225
    case OP_TYPEPOSUPTO:
12226
    cc = compile_iterator_matchingpath(common, cc, parent);
12227
    break;
12228
12229
    case OP_CLASS:
12230
    case OP_NCLASS:
12231
    if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12232
      cc = compile_iterator_matchingpath(common, cc, parent);
12233
    else
12234
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12235
    break;
12236
12237
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12238
    case OP_XCLASS:
12239
    if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12240
      cc = compile_iterator_matchingpath(common, cc, parent);
12241
    else
12242
      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12243
    break;
12244
#endif
12245
12246
    case OP_REF:
12247
    case OP_REFI:
12248
    if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12249
      cc = compile_ref_iterator_matchingpath(common, cc, parent);
12250
    else
12251
      {
12252
      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12253
      cc += 1 + IMM2_SIZE;
12254
      }
12255
    break;
12256
12257
    case OP_DNREF:
12258
    case OP_DNREFI:
12259
    if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12260
      cc = compile_ref_iterator_matchingpath(common, cc, parent);
12261
    else
12262
      {
12263
      compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12264
      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12265
      cc += 1 + 2 * IMM2_SIZE;
12266
      }
12267
    break;
12268
12269
    case OP_RECURSE:
12270
    cc = compile_recurse_matchingpath(common, cc, parent);
12271
    break;
12272
12273
    case OP_CALLOUT:
12274
    case OP_CALLOUT_STR:
12275
    cc = compile_callout_matchingpath(common, cc, parent);
12276
    break;
12277
12278
    case OP_ASSERT:
12279
    case OP_ASSERT_NOT:
12280
    case OP_ASSERTBACK:
12281
    case OP_ASSERTBACK_NOT:
12282
    PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12283
    cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12284
    break;
12285
12286
    case OP_BRAMINZERO:
12287
    PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12288
    cc = bracketend(cc + 1);
12289
    if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12290
      {
12291
      allocate_stack(common, 1);
12292
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12293
      }
12294
    else
12295
      {
12296
      allocate_stack(common, 2);
12297
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12298
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12299
      }
12300
    BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12301
    count_match(common);
12302
    break;
12303
12304
    case OP_ASSERT_NA:
12305
    case OP_ASSERTBACK_NA:
12306
    case OP_ONCE:
12307
    case OP_SCRIPT_RUN:
12308
    case OP_BRA:
12309
    case OP_CBRA:
12310
    case OP_COND:
12311
    case OP_SBRA:
12312
    case OP_SCBRA:
12313
    case OP_SCOND:
12314
    cc = compile_bracket_matchingpath(common, cc, parent);
12315
    break;
12316
12317
    case OP_BRAZERO:
12318
    if (cc[1] > OP_ASSERTBACK_NOT)
12319
      cc = compile_bracket_matchingpath(common, cc, parent);
12320
    else
12321
      {
12322
      PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12323
      cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12324
      }
12325
    break;
12326
12327
    case OP_BRAPOS:
12328
    case OP_CBRAPOS:
12329
    case OP_SBRAPOS:
12330
    case OP_SCBRAPOS:
12331
    case OP_BRAPOSZERO:
12332
    cc = compile_bracketpos_matchingpath(common, cc, parent);
12333
    break;
12334
12335
    case OP_MARK:
12336
    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12337
    SLJIT_ASSERT(common->mark_ptr != 0);
12338
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12339
    allocate_stack(common, common->has_skip_arg ? 5 : 1);
12340
    if (HAS_VIRTUAL_REGISTERS)
12341
      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12342
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12343
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12344
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12345
    OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12346
    if (common->has_skip_arg)
12347
      {
12348
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12349
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12350
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12351
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12352
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12353
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12354
      }
12355
    cc += 1 + 2 + cc[1];
12356
    break;
12357
12358
    case OP_PRUNE:
12359
    case OP_PRUNE_ARG:
12360
    case OP_SKIP:
12361
    case OP_SKIP_ARG:
12362
    case OP_THEN:
12363
    case OP_THEN_ARG:
12364
    case OP_COMMIT:
12365
    case OP_COMMIT_ARG:
12366
    cc = compile_control_verb_matchingpath(common, cc, parent);
12367
    break;
12368
12369
    case OP_FAIL:
12370
    case OP_ACCEPT:
12371
    case OP_ASSERT_ACCEPT:
12372
    cc = compile_fail_accept_matchingpath(common, cc, parent);
12373
    break;
12374
12375
    case OP_CLOSE:
12376
    cc = compile_close_matchingpath(common, cc);
12377
    break;
12378
12379
    case OP_SKIPZERO:
12380
    cc = bracketend(cc + 1);
12381
    break;
12382
12383
    default:
12384
    SLJIT_UNREACHABLE();
12385
    return;
12386
    }
12387
  if (cc == NULL)
12388
    return;
12389
  }
12390
12391
if (has_then_trap)
12392
  {
12393
  /* Head item on backtrack. */
12394
  PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12395
  BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12396
  BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12397
  common->then_trap = save_then_trap;
12398
  }
12399
SLJIT_ASSERT(cc == ccend);
12400
}
12401
12402
#undef PUSH_BACKTRACK
12403
#undef PUSH_BACKTRACK_NOVALUE
12404
#undef BACKTRACK_AS
12405
12406
#define COMPILE_BACKTRACKINGPATH(current) \
12407
  do \
12408
    { \
12409
    compile_backtrackingpath(common, (current)); \
12410
    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12411
      return; \
12412
    } \
12413
  while (0)
12414
12415
#define CURRENT_AS(type) ((type *)current)
12416
12417
static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12418
{
12419
DEFINE_COMPILER;
12420
PCRE2_SPTR cc = current->cc;
12421
PCRE2_UCHAR opcode;
12422
PCRE2_UCHAR type;
12423
sljit_u32 max = 0, exact;
12424
struct sljit_label *label = NULL;
12425
struct sljit_jump *jump = NULL;
12426
jump_list *jumplist = NULL;
12427
PCRE2_SPTR end;
12428
int private_data_ptr = PRIVATE_DATA(cc);
12429
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12430
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12431
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
12432
12433
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12434
12435
switch(opcode)
12436
  {
12437
  case OP_STAR:
12438
  case OP_UPTO:
12439
  if (type == OP_ANYNL || type == OP_EXTUNI)
12440
    {
12441
    SLJIT_ASSERT(private_data_ptr == 0);
12442
    set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12443
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12444
    free_stack(common, 1);
12445
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12446
    }
12447
  else
12448
    {
12449
    if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12450
      {
12451
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12452
      OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12453
      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12454
12455
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12456
      label = LABEL();
12457
      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12458
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12459
      if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12460
        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12461
      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12462
      move_back(common, NULL, TRUE);
12463
      CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12464
      }
12465
    else
12466
      {
12467
      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12468
      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12469
      move_back(common, NULL, TRUE);
12470
      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12471
      JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12472
      }
12473
    JUMPHERE(jump);
12474
    if (private_data_ptr == 0)
12475
      free_stack(common, 2);
12476
    }
12477
  break;
12478
12479
  case OP_MINSTAR:
12480
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12481
  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12482
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12483
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12484
  set_jumps(jumplist, LABEL());
12485
  if (private_data_ptr == 0)
12486
    free_stack(common, 1);
12487
  break;
12488
12489
  case OP_MINUPTO:
12490
  OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12491
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12492
  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12493
  add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12494
12495
  OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12496
  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12497
  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12498
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12499
12500
  set_jumps(jumplist, LABEL());
12501
  if (private_data_ptr == 0)
12502
    free_stack(common, 2);
12503
  break;
12504
12505
  case OP_QUERY:
12506
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12507
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12508
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12509
  jump = JUMP(SLJIT_JUMP);
12510
  set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12511
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12512
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12513
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12514
  JUMPHERE(jump);
12515
  if (private_data_ptr == 0)
12516
    free_stack(common, 1);
12517
  break;
12518
12519
  case OP_MINQUERY:
12520
  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12521
  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12522
  jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12523
  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12524
  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12525
  set_jumps(jumplist, LABEL());
12526
  JUMPHERE(jump);
12527
  if (private_data_ptr == 0)
12528
    free_stack(common, 1);
12529
  break;
12530
12531
  case OP_EXACT:
12532
  case OP_POSSTAR:
12533
  case OP_POSQUERY:
12534
  case OP_POSUPTO:
12535
  break;
12536
12537
  default:
12538
  SLJIT_UNREACHABLE();
12539
  break;
12540
  }
12541
12542
set_jumps(current->topbacktracks, LABEL());
12543
}
12544
12545
static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12546
{
12547
DEFINE_COMPILER;
12548
PCRE2_SPTR cc = current->cc;
12549
BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12550
PCRE2_UCHAR type;
12551
12552
type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12553
12554
if ((type & 0x1) == 0)
12555
  {
12556
  /* Maximize case. */
12557
  set_jumps(current->topbacktracks, LABEL());
12558
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12559
  free_stack(common, 1);
12560
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12561
  return;
12562
  }
12563
12564
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12565
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12566
set_jumps(current->topbacktracks, LABEL());
12567
free_stack(common, ref ? 2 : 3);
12568
}
12569
12570
static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12571
{
12572
DEFINE_COMPILER;
12573
recurse_entry *entry;
12574
12575
if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12576
  {
12577
  entry = CURRENT_AS(recurse_backtrack)->entry;
12578
  if (entry->backtrack_label == NULL)
12579
    add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12580
  else
12581
    JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12582
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12583
  }
12584
else
12585
  compile_backtrackingpath(common, current->top);
12586
12587
set_jumps(current->topbacktracks, LABEL());
12588
}
12589
12590
static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12591
{
12592
DEFINE_COMPILER;
12593
PCRE2_SPTR cc = current->cc;
12594
PCRE2_UCHAR bra = OP_BRA;
12595
struct sljit_jump *brajump = NULL;
12596
12597
SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12598
if (*cc == OP_BRAZERO)
12599
  {
12600
  bra = *cc;
12601
  cc++;
12602
  }
12603
12604
if (bra == OP_BRAZERO)
12605
  {
12606
  SLJIT_ASSERT(current->topbacktracks == NULL);
12607
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12608
  }
12609
12610
if (CURRENT_AS(assert_backtrack)->framesize < 0)
12611
  {
12612
  set_jumps(current->topbacktracks, LABEL());
12613
12614
  if (bra == OP_BRAZERO)
12615
    {
12616
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12617
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12618
    free_stack(common, 1);
12619
    }
12620
  return;
12621
  }
12622
12623
if (bra == OP_BRAZERO)
12624
  {
12625
  if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12626
    {
12627
    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12628
    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12629
    free_stack(common, 1);
12630
    return;
12631
    }
12632
  free_stack(common, 1);
12633
  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12634
  }
12635
12636
if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12637
  {
12638
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12639
  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12640
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12641
  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12642
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12643
12644
  set_jumps(current->topbacktracks, LABEL());
12645
  }
12646
else
12647
  set_jumps(current->topbacktracks, LABEL());
12648
12649
if (bra == OP_BRAZERO)
12650
  {
12651
  /* We know there is enough place on the stack. */
12652
  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12653
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12654
  JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12655
  JUMPHERE(brajump);
12656
  }
12657
}
12658
12659
static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12660
{
12661
DEFINE_COMPILER;
12662
int opcode, stacksize, alt_count, alt_max;
12663
int offset = 0;
12664
int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12665
int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12666
PCRE2_SPTR cc = current->cc;
12667
PCRE2_SPTR ccbegin;
12668
PCRE2_SPTR ccprev;
12669
PCRE2_UCHAR bra = OP_BRA;
12670
PCRE2_UCHAR ket;
12671
assert_backtrack *assert;
12672
BOOL has_alternatives;
12673
BOOL needs_control_head = FALSE;
12674
struct sljit_jump *brazero = NULL;
12675
struct sljit_jump *next_alt = NULL;
12676
struct sljit_jump *once = NULL;
12677
struct sljit_jump *cond = NULL;
12678
struct sljit_label *rmin_label = NULL;
12679
struct sljit_label *exact_label = NULL;
12680
struct sljit_put_label *put_label = NULL;
12681
12682
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12683
  {
12684
  bra = *cc;
12685
  cc++;
12686
  }
12687
12688
opcode = *cc;
12689
ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12690
ket = *ccbegin;
12691
if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12692
  {
12693
  repeat_ptr = PRIVATE_DATA(ccbegin);
12694
  repeat_type = PRIVATE_DATA(ccbegin + 2);
12695
  repeat_count = PRIVATE_DATA(ccbegin + 3);
12696
  SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12697
  if (repeat_type == OP_UPTO)
12698
    ket = OP_KETRMAX;
12699
  if (repeat_type == OP_MINUPTO)
12700
    ket = OP_KETRMIN;
12701
  }
12702
ccbegin = cc;
12703
cc += GET(cc, 1);
12704
has_alternatives = *cc == OP_ALT;
12705
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12706
  has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12707
if (opcode == OP_CBRA || opcode == OP_SCBRA)
12708
  offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12709
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12710
  opcode = OP_SCOND;
12711
12712
alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12713
12714
/* Decoding the needs_control_head in framesize. */
12715
if (opcode == OP_ONCE)
12716
  {
12717
  needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12718
  CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12719
  }
12720
12721
if (ket != OP_KET && repeat_type != 0)
12722
  {
12723
  /* TMP1 is used in OP_KETRMIN below. */
12724
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12725
  free_stack(common, 1);
12726
  if (repeat_type == OP_UPTO)
12727
    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12728
  else
12729
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12730
  }
12731
12732
if (ket == OP_KETRMAX)
12733
  {
12734
  if (bra == OP_BRAZERO)
12735
    {
12736
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12737
    free_stack(common, 1);
12738
    brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12739
    }
12740
  }
12741
else if (ket == OP_KETRMIN)
12742
  {
12743
  if (bra != OP_BRAMINZERO)
12744
    {
12745
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12746
    if (repeat_type != 0)
12747
      {
12748
      /* TMP1 was set a few lines above. */
12749
      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12750
      /* Drop STR_PTR for non-greedy plus quantifier. */
12751
      if (opcode != OP_ONCE)
12752
        free_stack(common, 1);
12753
      }
12754
    else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12755
      {
12756
      /* Checking zero-length iteration. */
12757
      if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12758
        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12759
      else
12760
        {
12761
        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12762
        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12763
        }
12764
      /* Drop STR_PTR for non-greedy plus quantifier. */
12765
      if (opcode != OP_ONCE)
12766
        free_stack(common, 1);
12767
      }
12768
    else
12769
      JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12770
    }
12771
  rmin_label = LABEL();
12772
  if (repeat_type != 0)
12773
    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12774
  }
12775
else if (bra == OP_BRAZERO)
12776
  {
12777
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12778
  free_stack(common, 1);
12779
  brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12780
  }
12781
else if (repeat_type == OP_EXACT)
12782
  {
12783
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12784
  exact_label = LABEL();
12785
  }
12786
12787
if (offset != 0)
12788
  {
12789
  if (common->capture_last_ptr != 0)
12790
    {
12791
    SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12792
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12793
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12794
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12795
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12796
    free_stack(common, 3);
12797
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12798
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12799
    }
12800
  else if (common->optimized_cbracket[offset >> 1] == 0)
12801
    {
12802
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12803
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12804
    free_stack(common, 2);
12805
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12806
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12807
    }
12808
  }
12809
12810
if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12811
  {
12812
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12813
    {
12814
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12815
    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12816
    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12817
    }
12818
  once = JUMP(SLJIT_JUMP);
12819
  }
12820
else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12821
  {
12822
  if (has_alternatives)
12823
    {
12824
    /* Always exactly one alternative. */
12825
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12826
    free_stack(common, 1);
12827
12828
    alt_max = 2;
12829
    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12830
    }
12831
  }
12832
else if (has_alternatives)
12833
  {
12834
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12835
  free_stack(common, 1);
12836
12837
  if (alt_max > 3)
12838
    {
12839
    sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12840
12841
    SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12842
    sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12843
    sljit_emit_op0(compiler, SLJIT_ENDBR);
12844
    }
12845
  else
12846
    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12847
  }
12848
12849
COMPILE_BACKTRACKINGPATH(current->top);
12850
if (current->topbacktracks)
12851
  set_jumps(current->topbacktracks, LABEL());
12852
12853
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12854
  {
12855
  /* Conditional block always has at most one alternative. */
12856
  if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12857
    {
12858
    SLJIT_ASSERT(has_alternatives);
12859
    assert = CURRENT_AS(bracket_backtrack)->u.assert;
12860
    if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12861
      {
12862
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12863
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12864
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12865
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12866
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12867
      }
12868
    cond = JUMP(SLJIT_JUMP);
12869
    set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12870
    }
12871
  else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12872
    {
12873
    SLJIT_ASSERT(has_alternatives);
12874
    cond = JUMP(SLJIT_JUMP);
12875
    set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12876
    }
12877
  else
12878
    SLJIT_ASSERT(!has_alternatives);
12879
  }
12880
12881
if (has_alternatives)
12882
  {
12883
  alt_count = 1;
12884
  do
12885
    {
12886
    current->top = NULL;
12887
    current->topbacktracks = NULL;
12888
    current->nextbacktracks = NULL;
12889
    /* Conditional blocks always have an additional alternative, even if it is empty. */
12890
    if (*cc == OP_ALT)
12891
      {
12892
      ccprev = cc + 1 + LINK_SIZE;
12893
      cc += GET(cc, 1);
12894
      if (opcode != OP_COND && opcode != OP_SCOND)
12895
        {
12896
        if (opcode != OP_ONCE)
12897
          {
12898
          if (private_data_ptr != 0)
12899
            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12900
          else
12901
            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12902
          }
12903
        else
12904
          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12905
        }
12906
      compile_matchingpath(common, ccprev, cc, current);
12907
      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12908
        return;
12909
12910
      if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12911
        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12912
12913
      if (opcode == OP_SCRIPT_RUN)
12914
        match_script_run_common(common, private_data_ptr, current);
12915
      }
12916
12917
    /* Instructions after the current alternative is successfully matched. */
12918
    /* There is a similar code in compile_bracket_matchingpath. */
12919
    if (opcode == OP_ONCE)
12920
      match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12921
12922
    stacksize = 0;
12923
    if (repeat_type == OP_MINUPTO)
12924
      {
12925
      /* We need to preserve the counter. TMP2 will be used below. */
12926
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12927
      stacksize++;
12928
      }
12929
    if (ket != OP_KET || bra != OP_BRA)
12930
      stacksize++;
12931
    if (offset != 0)
12932
      {
12933
      if (common->capture_last_ptr != 0)
12934
        stacksize++;
12935
      if (common->optimized_cbracket[offset >> 1] == 0)
12936
        stacksize += 2;
12937
      }
12938
    if (opcode != OP_ONCE)
12939
      stacksize++;
12940
12941
    if (stacksize > 0)
12942
      allocate_stack(common, stacksize);
12943
12944
    stacksize = 0;
12945
    if (repeat_type == OP_MINUPTO)
12946
      {
12947
      /* TMP2 was set above. */
12948
      OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12949
      stacksize++;
12950
      }
12951
12952
    if (ket != OP_KET || bra != OP_BRA)
12953
      {
12954
      if (ket != OP_KET)
12955
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12956
      else
12957
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12958
      stacksize++;
12959
      }
12960
12961
    if (offset != 0)
12962
      stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12963
12964
    if (opcode != OP_ONCE)
12965
      {
12966
      if (alt_max <= 3)
12967
        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12968
      else
12969
        put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12970
      }
12971
12972
    if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12973
      {
12974
      /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12975
      SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12976
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12977
      }
12978
12979
    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12980
12981
    if (opcode != OP_ONCE)
12982
      {
12983
      if (alt_max <= 3)
12984
        {
12985
        JUMPHERE(next_alt);
12986
        alt_count++;
12987
        if (alt_count < alt_max)
12988
          {
12989
          SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12990
          next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12991
          }
12992
        }
12993
      else
12994
        {
12995
        sljit_set_put_label(put_label, LABEL());
12996
        sljit_emit_op0(compiler, SLJIT_ENDBR);
12997
        }
12998
      }
12999
13000
    COMPILE_BACKTRACKINGPATH(current->top);
13001
    if (current->topbacktracks)
13002
      set_jumps(current->topbacktracks, LABEL());
13003
    SLJIT_ASSERT(!current->nextbacktracks);
13004
    }
13005
  while (*cc == OP_ALT);
13006
13007
  if (cond != NULL)
13008
    {
13009
    SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
13010
    assert = CURRENT_AS(bracket_backtrack)->u.assert;
13011
    if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
13012
      {
13013
      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13014
      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13015
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13016
      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13017
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13018
      }
13019
    JUMPHERE(cond);
13020
    }
13021
13022
  /* Free the STR_PTR. */
13023
  if (private_data_ptr == 0)
13024
    free_stack(common, 1);
13025
  }
13026
13027
if (offset != 0)
13028
  {
13029
  /* Using both tmp register is better for instruction scheduling. */
13030
  if (common->optimized_cbracket[offset >> 1] != 0)
13031
    {
13032
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13033
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13034
    free_stack(common, 2);
13035
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13036
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13037
    }
13038
  else
13039
    {
13040
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13041
    free_stack(common, 1);
13042
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13043
    }
13044
  }
13045
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
13046
  {
13047
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
13048
  free_stack(common, 1);
13049
  }
13050
else if (opcode == OP_ONCE)
13051
  {
13052
  cc = ccbegin + GET(ccbegin, 1);
13053
  stacksize = needs_control_head ? 1 : 0;
13054
13055
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13056
    {
13057
    /* Reset head and drop saved frame. */
13058
    stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
13059
    }
13060
  else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
13061
    {
13062
    /* The STR_PTR must be released. */
13063
    stacksize++;
13064
    }
13065
13066
  if (stacksize > 0)
13067
    free_stack(common, stacksize);
13068
13069
  JUMPHERE(once);
13070
  /* Restore previous private_data_ptr */
13071
  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13072
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
13073
  else if (ket == OP_KETRMIN)
13074
    {
13075
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13076
    /* See the comment below. */
13077
    free_stack(common, 2);
13078
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13079
    }
13080
  }
13081
13082
if (repeat_type == OP_EXACT)
13083
  {
13084
  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13085
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13086
  CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
13087
  }
13088
else if (ket == OP_KETRMAX)
13089
  {
13090
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13091
  if (bra != OP_BRAZERO)
13092
    free_stack(common, 1);
13093
13094
  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13095
  if (bra == OP_BRAZERO)
13096
    {
13097
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13098
    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13099
    JUMPHERE(brazero);
13100
    free_stack(common, 1);
13101
    }
13102
  }
13103
else if (ket == OP_KETRMIN)
13104
  {
13105
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13106
13107
  /* OP_ONCE removes everything in case of a backtrack, so we don't
13108
  need to explicitly release the STR_PTR. The extra release would
13109
  affect badly the free_stack(2) above. */
13110
  if (opcode != OP_ONCE)
13111
    free_stack(common, 1);
13112
  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
13113
  if (opcode == OP_ONCE)
13114
    free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
13115
  else if (bra == OP_BRAMINZERO)
13116
    free_stack(common, 1);
13117
  }
13118
else if (bra == OP_BRAZERO)
13119
  {
13120
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13121
  JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13122
  JUMPHERE(brazero);
13123
  }
13124
}
13125
13126
static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13127
{
13128
DEFINE_COMPILER;
13129
int offset;
13130
struct sljit_jump *jump;
13131
13132
if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
13133
  {
13134
  if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
13135
    {
13136
    offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
13137
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13138
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13139
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13140
    if (common->capture_last_ptr != 0)
13141
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13142
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13143
    if (common->capture_last_ptr != 0)
13144
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13145
    }
13146
  set_jumps(current->topbacktracks, LABEL());
13147
  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13148
  return;
13149
  }
13150
13151
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
13152
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13153
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
13154
13155
if (current->topbacktracks)
13156
  {
13157
  jump = JUMP(SLJIT_JUMP);
13158
  set_jumps(current->topbacktracks, LABEL());
13159
  /* Drop the stack frame. */
13160
  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13161
  JUMPHERE(jump);
13162
  }
13163
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13164
}
13165
13166
static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13167
{
13168
assert_backtrack backtrack;
13169
13170
current->top = NULL;
13171
current->topbacktracks = NULL;
13172
current->nextbacktracks = NULL;
13173
if (current->cc[1] > OP_ASSERTBACK_NOT)
13174
  {
13175
  /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13176
  compile_bracket_matchingpath(common, current->cc, current);
13177
  compile_bracket_backtrackingpath(common, current->top);
13178
  }
13179
else
13180
  {
13181
  memset(&backtrack, 0, sizeof(backtrack));
13182
  backtrack.common.cc = current->cc;
13183
  backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13184
  /* Manual call of compile_assert_matchingpath. */
13185
  compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13186
  }
13187
SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
13188
}
13189
13190
static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13191
{
13192
DEFINE_COMPILER;
13193
PCRE2_UCHAR opcode = *current->cc;
13194
struct sljit_label *loop;
13195
struct sljit_jump *jump;
13196
13197
if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13198
  {
13199
  if (common->then_trap != NULL)
13200
    {
13201
    SLJIT_ASSERT(common->control_head_ptr != 0);
13202
13203
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13204
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13205
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13206
    jump = JUMP(SLJIT_JUMP);
13207
13208
    loop = LABEL();
13209
    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13210
    JUMPHERE(jump);
13211
    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13212
    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13213
    add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13214
    return;
13215
    }
13216
  else if (!common->local_quit_available && common->in_positive_assertion)
13217
    {
13218
    add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13219
    return;
13220
    }
13221
  }
13222
13223
if (common->local_quit_available)
13224
  {
13225
  /* Abort match with a fail. */
13226
  if (common->quit_label == NULL)
13227
    add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13228
  else
13229
    JUMPTO(SLJIT_JUMP, common->quit_label);
13230
  return;
13231
  }
13232
13233
if (opcode == OP_SKIP_ARG)
13234
  {
13235
  SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13236
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13237
  OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13238
  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
13239
13240
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13241
  add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13242
  return;
13243
  }
13244
13245
if (opcode == OP_SKIP)
13246
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13247
else
13248
  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13249
add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13250
}
13251
13252
static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13253
{
13254
DEFINE_COMPILER;
13255
struct sljit_jump *jump;
13256
int size;
13257
13258
if (CURRENT_AS(then_trap_backtrack)->then_trap)
13259
  {
13260
  common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13261
  return;
13262
  }
13263
13264
size = CURRENT_AS(then_trap_backtrack)->framesize;
13265
size = 3 + (size < 0 ? 0 : size);
13266
13267
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13268
free_stack(common, size);
13269
jump = JUMP(SLJIT_JUMP);
13270
13271
set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13272
/* STACK_TOP is set by THEN. */
13273
if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13274
  {
13275
  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13276
  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13277
  }
13278
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13279
free_stack(common, 3);
13280
13281
JUMPHERE(jump);
13282
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13283
}
13284
13285
static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13286
{
13287
DEFINE_COMPILER;
13288
then_trap_backtrack *save_then_trap = common->then_trap;
13289
13290
while (current)
13291
  {
13292
  if (current->nextbacktracks != NULL)
13293
    set_jumps(current->nextbacktracks, LABEL());
13294
  switch(*current->cc)
13295
    {
13296
    case OP_SET_SOM:
13297
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13298
    free_stack(common, 1);
13299
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13300
    break;
13301
13302
    case OP_STAR:
13303
    case OP_MINSTAR:
13304
    case OP_PLUS:
13305
    case OP_MINPLUS:
13306
    case OP_QUERY:
13307
    case OP_MINQUERY:
13308
    case OP_UPTO:
13309
    case OP_MINUPTO:
13310
    case OP_EXACT:
13311
    case OP_POSSTAR:
13312
    case OP_POSPLUS:
13313
    case OP_POSQUERY:
13314
    case OP_POSUPTO:
13315
    case OP_STARI:
13316
    case OP_MINSTARI:
13317
    case OP_PLUSI:
13318
    case OP_MINPLUSI:
13319
    case OP_QUERYI:
13320
    case OP_MINQUERYI:
13321
    case OP_UPTOI:
13322
    case OP_MINUPTOI:
13323
    case OP_EXACTI:
13324
    case OP_POSSTARI:
13325
    case OP_POSPLUSI:
13326
    case OP_POSQUERYI:
13327
    case OP_POSUPTOI:
13328
    case OP_NOTSTAR:
13329
    case OP_NOTMINSTAR:
13330
    case OP_NOTPLUS:
13331
    case OP_NOTMINPLUS:
13332
    case OP_NOTQUERY:
13333
    case OP_NOTMINQUERY:
13334
    case OP_NOTUPTO:
13335
    case OP_NOTMINUPTO:
13336
    case OP_NOTEXACT:
13337
    case OP_NOTPOSSTAR:
13338
    case OP_NOTPOSPLUS:
13339
    case OP_NOTPOSQUERY:
13340
    case OP_NOTPOSUPTO:
13341
    case OP_NOTSTARI:
13342
    case OP_NOTMINSTARI:
13343
    case OP_NOTPLUSI:
13344
    case OP_NOTMINPLUSI:
13345
    case OP_NOTQUERYI:
13346
    case OP_NOTMINQUERYI:
13347
    case OP_NOTUPTOI:
13348
    case OP_NOTMINUPTOI:
13349
    case OP_NOTEXACTI:
13350
    case OP_NOTPOSSTARI:
13351
    case OP_NOTPOSPLUSI:
13352
    case OP_NOTPOSQUERYI:
13353
    case OP_NOTPOSUPTOI:
13354
    case OP_TYPESTAR:
13355
    case OP_TYPEMINSTAR:
13356
    case OP_TYPEPLUS:
13357
    case OP_TYPEMINPLUS:
13358
    case OP_TYPEQUERY:
13359
    case OP_TYPEMINQUERY:
13360
    case OP_TYPEUPTO:
13361
    case OP_TYPEMINUPTO:
13362
    case OP_TYPEEXACT:
13363
    case OP_TYPEPOSSTAR:
13364
    case OP_TYPEPOSPLUS:
13365
    case OP_TYPEPOSQUERY:
13366
    case OP_TYPEPOSUPTO:
13367
    case OP_CLASS:
13368
    case OP_NCLASS:
13369
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13370
    case OP_XCLASS:
13371
#endif
13372
    compile_iterator_backtrackingpath(common, current);
13373
    break;
13374
13375
    case OP_REF:
13376
    case OP_REFI:
13377
    case OP_DNREF:
13378
    case OP_DNREFI:
13379
    compile_ref_iterator_backtrackingpath(common, current);
13380
    break;
13381
13382
    case OP_RECURSE:
13383
    compile_recurse_backtrackingpath(common, current);
13384
    break;
13385
13386
    case OP_ASSERT:
13387
    case OP_ASSERT_NOT:
13388
    case OP_ASSERTBACK:
13389
    case OP_ASSERTBACK_NOT:
13390
    compile_assert_backtrackingpath(common, current);
13391
    break;
13392
13393
    case OP_ASSERT_NA:
13394
    case OP_ASSERTBACK_NA:
13395
    case OP_ONCE:
13396
    case OP_SCRIPT_RUN:
13397
    case OP_BRA:
13398
    case OP_CBRA:
13399
    case OP_COND:
13400
    case OP_SBRA:
13401
    case OP_SCBRA:
13402
    case OP_SCOND:
13403
    compile_bracket_backtrackingpath(common, current);
13404
    break;
13405
13406
    case OP_BRAZERO:
13407
    if (current->cc[1] > OP_ASSERTBACK_NOT)
13408
      compile_bracket_backtrackingpath(common, current);
13409
    else
13410
      compile_assert_backtrackingpath(common, current);
13411
    break;
13412
13413
    case OP_BRAPOS:
13414
    case OP_CBRAPOS:
13415
    case OP_SBRAPOS:
13416
    case OP_SCBRAPOS:
13417
    case OP_BRAPOSZERO:
13418
    compile_bracketpos_backtrackingpath(common, current);
13419
    break;
13420
13421
    case OP_BRAMINZERO:
13422
    compile_braminzero_backtrackingpath(common, current);
13423
    break;
13424
13425
    case OP_MARK:
13426
    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13427
    if (common->has_skip_arg)
13428
      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13429
    free_stack(common, common->has_skip_arg ? 5 : 1);
13430
    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13431
    if (common->has_skip_arg)
13432
      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13433
    break;
13434
13435
    case OP_THEN:
13436
    case OP_THEN_ARG:
13437
    case OP_PRUNE:
13438
    case OP_PRUNE_ARG:
13439
    case OP_SKIP:
13440
    case OP_SKIP_ARG:
13441
    compile_control_verb_backtrackingpath(common, current);
13442
    break;
13443
13444
    case OP_COMMIT:
13445
    case OP_COMMIT_ARG:
13446
    if (!common->local_quit_available)
13447
      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13448
    if (common->quit_label == NULL)
13449
      add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13450
    else
13451
      JUMPTO(SLJIT_JUMP, common->quit_label);
13452
    break;
13453
13454
    case OP_CALLOUT:
13455
    case OP_CALLOUT_STR:
13456
    case OP_FAIL:
13457
    case OP_ACCEPT:
13458
    case OP_ASSERT_ACCEPT:
13459
    set_jumps(current->topbacktracks, LABEL());
13460
    break;
13461
13462
    case OP_THEN_TRAP:
13463
    /* A virtual opcode for then traps. */
13464
    compile_then_trap_backtrackingpath(common, current);
13465
    break;
13466
13467
    default:
13468
    SLJIT_UNREACHABLE();
13469
    break;
13470
    }
13471
  current = current->prev;
13472
  }
13473
common->then_trap = save_then_trap;
13474
}
13475
13476
static SLJIT_INLINE void compile_recurse(compiler_common *common)
13477
{
13478
DEFINE_COMPILER;
13479
PCRE2_SPTR cc = common->start + common->currententry->start;
13480
PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13481
PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13482
uint32_t recurse_flags = 0;
13483
int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13484
int alt_count, alt_max, local_size;
13485
backtrack_common altbacktrack;
13486
jump_list *match = NULL;
13487
struct sljit_jump *next_alt = NULL;
13488
struct sljit_jump *accept_exit = NULL;
13489
struct sljit_label *quit;
13490
struct sljit_put_label *put_label = NULL;
13491
13492
/* Recurse captures then. */
13493
common->then_trap = NULL;
13494
13495
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13496
13497
alt_max = no_alternatives(cc);
13498
alt_count = 0;
13499
13500
/* Matching path. */
13501
SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13502
common->currententry->entry_label = LABEL();
13503
set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13504
13505
sljit_emit_fast_enter(compiler, TMP2, 0);
13506
count_match(common);
13507
13508
local_size = (alt_max > 1) ? 2 : 1;
13509
13510
/* (Reversed) stack layout:
13511
   [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13512
13513
allocate_stack(common, private_data_size + local_size);
13514
/* Save return address. */
13515
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13516
13517
copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13518
13519
/* This variable is saved and restored all time when we enter or exit from a recursive context. */
13520
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13521
13522
if (recurse_flags & recurse_flag_control_head_found)
13523
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13524
13525
if (alt_max > 1)
13526
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13527
13528
memset(&altbacktrack, 0, sizeof(backtrack_common));
13529
common->quit_label = NULL;
13530
common->accept_label = NULL;
13531
common->quit = NULL;
13532
common->accept = NULL;
13533
altbacktrack.cc = ccbegin;
13534
cc += GET(cc, 1);
13535
while (1)
13536
  {
13537
  altbacktrack.top = NULL;
13538
  altbacktrack.topbacktracks = NULL;
13539
13540
  if (altbacktrack.cc != ccbegin)
13541
    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13542
13543
  compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13544
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13545
    return;
13546
13547
  allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
13548
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13549
13550
  if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
13551
    {
13552
    if (alt_max > 3)
13553
      put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13554
    else
13555
      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13556
    }
13557
13558
  add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13559
13560
  if (alt_count == 0)
13561
    {
13562
    /* Backtracking path entry. */
13563
    SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13564
    common->currententry->backtrack_label = LABEL();
13565
    set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13566
13567
    sljit_emit_fast_enter(compiler, TMP1, 0);
13568
13569
    if (recurse_flags & recurse_flag_accept_found)
13570
      accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13571
13572
    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13573
    /* Save return address. */
13574
    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13575
13576
    copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13577
13578
    if (alt_max > 1)
13579
      {
13580
      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13581
      free_stack(common, 2);
13582
13583
      if (alt_max > 3)
13584
        {
13585
        sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13586
        sljit_set_put_label(put_label, LABEL());
13587
        sljit_emit_op0(compiler, SLJIT_ENDBR);
13588
        }
13589
      else
13590
        next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13591
      }
13592
    else
13593
      free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
13594
    }
13595
  else if (alt_max > 3)
13596
    {
13597
    sljit_set_put_label(put_label, LABEL());
13598
    sljit_emit_op0(compiler, SLJIT_ENDBR);
13599
    }
13600
  else
13601
    {
13602
    JUMPHERE(next_alt);
13603
    if (alt_count + 1 < alt_max)
13604
      {
13605
      SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13606
      next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13607
      }
13608
    }
13609
13610
  alt_count++;
13611
13612
  compile_backtrackingpath(common, altbacktrack.top);
13613
  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13614
    return;
13615
  set_jumps(altbacktrack.topbacktracks, LABEL());
13616
13617
  if (*cc != OP_ALT)
13618
    break;
13619
13620
  altbacktrack.cc = cc + 1 + LINK_SIZE;
13621
  cc += GET(cc, 1);
13622
  }
13623
13624
/* No alternative is matched. */
13625
13626
quit = LABEL();
13627
13628
copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
13629
13630
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13631
free_stack(common, private_data_size + local_size);
13632
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13633
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13634
13635
if (common->quit != NULL)
13636
  {
13637
  SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
13638
13639
  set_jumps(common->quit, LABEL());
13640
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13641
  copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13642
  JUMPTO(SLJIT_JUMP, quit);
13643
  }
13644
13645
if (recurse_flags & recurse_flag_accept_found)
13646
  {
13647
  JUMPHERE(accept_exit);
13648
  free_stack(common, 2);
13649
13650
  /* Save return address. */
13651
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13652
13653
  copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13654
13655
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13656
  free_stack(common, private_data_size + local_size);
13657
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13658
  OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13659
  }
13660
13661
if (common->accept != NULL)
13662
  {
13663
  SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
13664
13665
  set_jumps(common->accept, LABEL());
13666
13667
  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13668
  OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13669
13670
  allocate_stack(common, 2);
13671
  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13672
  }
13673
13674
set_jumps(match, LABEL());
13675
13676
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13677
13678
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13679
13680
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13681
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13682
OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13683
}
13684
13685
#undef COMPILE_BACKTRACKINGPATH
13686
#undef CURRENT_AS
13687
13688
#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13689
  (PCRE2_JIT_INVALID_UTF)
13690
13691
static int jit_compile(pcre2_code *code, sljit_u32 mode)
13692
{
13693
pcre2_real_code *re = (pcre2_real_code *)code;
13694
struct sljit_compiler *compiler;
13695
backtrack_common rootbacktrack;
13696
compiler_common common_data;
13697
compiler_common *common = &common_data;
13698
const sljit_u8 *tables = re->tables;
13699
void *allocator_data = &re->memctl;
13700
int private_data_size;
13701
PCRE2_SPTR ccend;
13702
executable_functions *functions;
13703
void *executable_func;
13704
sljit_uw executable_size;
13705
sljit_uw total_length;
13706
struct sljit_label *mainloop_label = NULL;
13707
struct sljit_label *continue_match_label;
13708
struct sljit_label *empty_match_found_label = NULL;
13709
struct sljit_label *empty_match_backtrack_label = NULL;
13710
struct sljit_label *reset_match_label;
13711
struct sljit_label *quit_label;
13712
struct sljit_jump *jump;
13713
struct sljit_jump *minlength_check_failed = NULL;
13714
struct sljit_jump *empty_match = NULL;
13715
struct sljit_jump *end_anchor_failed = NULL;
13716
jump_list *reqcu_not_found = NULL;
13717
13718
SLJIT_ASSERT(tables);
13719
13720
#if HAS_VIRTUAL_REGISTERS == 1
13721
SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13722
#elif HAS_VIRTUAL_REGISTERS == 0
13723
SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13724
#else
13725
#error "Invalid value for HAS_VIRTUAL_REGISTERS"
13726
#endif
13727
13728
memset(&rootbacktrack, 0, sizeof(backtrack_common));
13729
memset(common, 0, sizeof(compiler_common));
13730
common->re = re;
13731
common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13732
rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13733
13734
#ifdef SUPPORT_UNICODE
13735
common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13736
#endif /* SUPPORT_UNICODE */
13737
mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13738
13739
common->start = rootbacktrack.cc;
13740
common->read_only_data_head = NULL;
13741
common->fcc = tables + fcc_offset;
13742
common->lcc = (sljit_sw)(tables + lcc_offset);
13743
common->mode = mode;
13744
common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13745
common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13746
common->nltype = NLTYPE_FIXED;
13747
switch(re->newline_convention)
13748
  {
13749
  case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13750
  case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13751
  case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13752
  case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13753
  case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13754
  case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13755
  default: return PCRE2_ERROR_INTERNAL;
13756
  }
13757
common->nlmax = READ_CHAR_MAX;
13758
common->nlmin = 0;
13759
if (re->bsr_convention == PCRE2_BSR_UNICODE)
13760
  common->bsr_nltype = NLTYPE_ANY;
13761
else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13762
  common->bsr_nltype = NLTYPE_ANYCRLF;
13763
else
13764
  {
13765
#ifdef BSR_ANYCRLF
13766
  common->bsr_nltype = NLTYPE_ANYCRLF;
13767
#else
13768
  common->bsr_nltype = NLTYPE_ANY;
13769
#endif
13770
  }
13771
common->bsr_nlmax = READ_CHAR_MAX;
13772
common->bsr_nlmin = 0;
13773
common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13774
common->ctypes = (sljit_sw)(tables + ctypes_offset);
13775
common->name_count = re->name_count;
13776
common->name_entry_size = re->name_entry_size;
13777
common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13778
common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13779
#ifdef SUPPORT_UNICODE
13780
/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13781
common->utf = (re->overall_options & PCRE2_UTF) != 0;
13782
common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13783
if (common->utf)
13784
  {
13785
  if (common->nltype == NLTYPE_ANY)
13786
    common->nlmax = 0x2029;
13787
  else if (common->nltype == NLTYPE_ANYCRLF)
13788
    common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13789
  else
13790
    {
13791
    /* We only care about the first newline character. */
13792
    common->nlmax = common->newline & 0xff;
13793
    }
13794
13795
  if (common->nltype == NLTYPE_FIXED)
13796
    common->nlmin = common->newline & 0xff;
13797
  else
13798
    common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13799
13800
  if (common->bsr_nltype == NLTYPE_ANY)
13801
    common->bsr_nlmax = 0x2029;
13802
  else
13803
    common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13804
  common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13805
  }
13806
else
13807
  common->invalid_utf = FALSE;
13808
#endif /* SUPPORT_UNICODE */
13809
ccend = bracketend(common->start);
13810
13811
/* Calculate the local space size on the stack. */
13812
common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13813
common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13814
if (!common->optimized_cbracket)
13815
  return PCRE2_ERROR_NOMEMORY;
13816
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13817
memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13818
#else
13819
memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13820
#endif
13821
13822
SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13823
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13824
common->capture_last_ptr = common->ovector_start;
13825
common->ovector_start += sizeof(sljit_sw);
13826
#endif
13827
if (!check_opcode_types(common, common->start, ccend))
13828
  {
13829
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13830
  return PCRE2_ERROR_NOMEMORY;
13831
  }
13832
13833
/* Checking flags and updating ovector_start. */
13834
if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13835
  {
13836
  common->req_char_ptr = common->ovector_start;
13837
  common->ovector_start += sizeof(sljit_sw);
13838
  }
13839
if (mode != PCRE2_JIT_COMPLETE)
13840
  {
13841
  common->start_used_ptr = common->ovector_start;
13842
  common->ovector_start += sizeof(sljit_sw);
13843
  if (mode == PCRE2_JIT_PARTIAL_SOFT)
13844
    {
13845
    common->hit_start = common->ovector_start;
13846
    common->ovector_start += sizeof(sljit_sw);
13847
    }
13848
  }
13849
if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13850
  {
13851
  common->match_end_ptr = common->ovector_start;
13852
  common->ovector_start += sizeof(sljit_sw);
13853
  }
13854
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13855
common->control_head_ptr = 1;
13856
#endif
13857
if (common->control_head_ptr != 0)
13858
  {
13859
  common->control_head_ptr = common->ovector_start;
13860
  common->ovector_start += sizeof(sljit_sw);
13861
  }
13862
if (common->has_set_som)
13863
  {
13864
  /* Saving the real start pointer is necessary. */
13865
  common->start_ptr = common->ovector_start;
13866
  common->ovector_start += sizeof(sljit_sw);
13867
  }
13868
13869
/* Aligning ovector to even number of sljit words. */
13870
if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13871
  common->ovector_start += sizeof(sljit_sw);
13872
13873
if (common->start_ptr == 0)
13874
  common->start_ptr = OVECTOR(0);
13875
13876
/* Capturing brackets cannot be optimized if callouts are allowed. */
13877
if (common->capture_last_ptr != 0)
13878
  memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13879
13880
SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13881
common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13882
13883
total_length = ccend - common->start;
13884
common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13885
if (!common->private_data_ptrs)
13886
  {
13887
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13888
  return PCRE2_ERROR_NOMEMORY;
13889
  }
13890
memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13891
13892
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13893
13894
if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13895
  detect_early_fail(common, common->start, &private_data_size, 0, 0, TRUE);
13896
13897
set_private_data_ptrs(common, &private_data_size, ccend);
13898
13899
SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13900
13901
if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13902
  {
13903
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13904
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13905
  return PCRE2_ERROR_NOMEMORY;
13906
  }
13907
13908
if (common->has_then)
13909
  {
13910
  common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13911
  memset(common->then_offsets, 0, total_length);
13912
  set_then_offsets(common, common->start, NULL);
13913
  }
13914
13915
compiler = sljit_create_compiler(allocator_data, NULL);
13916
if (!compiler)
13917
  {
13918
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13919
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13920
  return PCRE2_ERROR_NOMEMORY;
13921
  }
13922
common->compiler = compiler;
13923
13924
/* Main pcre2_jit_exec entry. */
13925
SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
13926
sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, 0, 0, private_data_size);
13927
13928
/* Register init. */
13929
reset_ovector(common, (re->top_bracket + 1) * 2);
13930
if (common->req_char_ptr != 0)
13931
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13932
13933
OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13934
OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13935
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13936
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13937
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13938
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13939
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13940
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13941
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13942
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13943
13944
if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13945
  reset_early_fail(common);
13946
13947
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13948
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13949
if (common->mark_ptr != 0)
13950
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13951
if (common->control_head_ptr != 0)
13952
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13953
13954
/* Main part of the matching */
13955
if ((re->overall_options & PCRE2_ANCHORED) == 0)
13956
  {
13957
  mainloop_label = mainloop_entry(common);
13958
  continue_match_label = LABEL();
13959
  /* Forward search if possible. */
13960
  if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13961
    {
13962
    if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13963
      ;
13964
    else if ((re->flags & PCRE2_FIRSTSET) != 0)
13965
      fast_forward_first_char(common);
13966
    else if ((re->flags & PCRE2_STARTLINE) != 0)
13967
      fast_forward_newline(common);
13968
    else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13969
      fast_forward_start_bits(common);
13970
    }
13971
  }
13972
else
13973
  continue_match_label = LABEL();
13974
13975
if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13976
  {
13977
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13978
  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13979
  minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13980
  }
13981
if (common->req_char_ptr != 0)
13982
  reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13983
13984
/* Store the current STR_PTR in OVECTOR(0). */
13985
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13986
/* Copy the limit of allowed recursions. */
13987
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13988
if (common->capture_last_ptr != 0)
13989
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13990
if (common->fast_forward_bc_ptr != NULL)
13991
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13992
13993
if (common->start_ptr != OVECTOR(0))
13994
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13995
13996
/* Copy the beginning of the string. */
13997
if (mode == PCRE2_JIT_PARTIAL_SOFT)
13998
  {
13999
  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14000
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14001
  JUMPHERE(jump);
14002
  }
14003
else if (mode == PCRE2_JIT_PARTIAL_HARD)
14004
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14005
14006
compile_matchingpath(common, common->start, ccend, &rootbacktrack);
14007
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14008
  {
14009
  sljit_free_compiler(compiler);
14010
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14011
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
14012
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14013
  return PCRE2_ERROR_NOMEMORY;
14014
  }
14015
14016
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14017
  end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
14018
14019
if (common->might_be_empty)
14020
  {
14021
  empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
14022
  empty_match_found_label = LABEL();
14023
  }
14024
14025
common->accept_label = LABEL();
14026
if (common->accept != NULL)
14027
  set_jumps(common->accept, common->accept_label);
14028
14029
/* This means we have a match. Update the ovector. */
14030
copy_ovector(common, re->top_bracket + 1);
14031
common->quit_label = common->abort_label = LABEL();
14032
if (common->quit != NULL)
14033
  set_jumps(common->quit, common->quit_label);
14034
if (common->abort != NULL)
14035
  set_jumps(common->abort, common->abort_label);
14036
if (minlength_check_failed != NULL)
14037
  SET_LABEL(minlength_check_failed, common->abort_label);
14038
14039
sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
14040
sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
14041
14042
if (common->failed_match != NULL)
14043
  {
14044
  SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
14045
  set_jumps(common->failed_match, LABEL());
14046
  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14047
  JUMPTO(SLJIT_JUMP, common->abort_label);
14048
  }
14049
14050
if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14051
  JUMPHERE(end_anchor_failed);
14052
14053
if (mode != PCRE2_JIT_COMPLETE)
14054
  {
14055
  common->partialmatchlabel = LABEL();
14056
  set_jumps(common->partialmatch, common->partialmatchlabel);
14057
  return_with_partial_match(common, common->quit_label);
14058
  }
14059
14060
if (common->might_be_empty)
14061
  empty_match_backtrack_label = LABEL();
14062
compile_backtrackingpath(common, rootbacktrack.top);
14063
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14064
  {
14065
  sljit_free_compiler(compiler);
14066
  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14067
  SLJIT_FREE(common->private_data_ptrs, allocator_data);
14068
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14069
  return PCRE2_ERROR_NOMEMORY;
14070
  }
14071
14072
SLJIT_ASSERT(rootbacktrack.prev == NULL);
14073
reset_match_label = LABEL();
14074
14075
if (mode == PCRE2_JIT_PARTIAL_SOFT)
14076
  {
14077
  /* Update hit_start only in the first time. */
14078
  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
14079
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14080
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
14081
  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
14082
  JUMPHERE(jump);
14083
  }
14084
14085
/* Check we have remaining characters. */
14086
if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
14087
  {
14088
  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
14089
  }
14090
14091
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
14092
    (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
14093
14094
if ((re->overall_options & PCRE2_ANCHORED) == 0)
14095
  {
14096
  if (common->ff_newline_shortcut != NULL)
14097
    {
14098
    /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
14099
    if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
14100
      {
14101
      if (common->match_end_ptr != 0)
14102
        {
14103
        OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
14104
        OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
14105
        CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
14106
        OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
14107
        }
14108
      else
14109
        CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
14110
      }
14111
    }
14112
  else
14113
    CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
14114
  }
14115
14116
/* No more remaining characters. */
14117
if (reqcu_not_found != NULL)
14118
  set_jumps(reqcu_not_found, LABEL());
14119
14120
if (mode == PCRE2_JIT_PARTIAL_SOFT)
14121
  CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
14122
14123
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14124
JUMPTO(SLJIT_JUMP, common->quit_label);
14125
14126
flush_stubs(common);
14127
14128
if (common->might_be_empty)
14129
  {
14130
  JUMPHERE(empty_match);
14131
  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
14132
  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
14133
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
14134
  JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
14135
  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
14136
  JUMPTO(SLJIT_ZERO, empty_match_found_label);
14137
  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14138
  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
14139
  JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
14140
  }
14141
14142
common->fast_forward_bc_ptr = NULL;
14143
common->early_fail_start_ptr = 0;
14144
common->early_fail_end_ptr = 0;
14145
common->currententry = common->entries;
14146
common->local_quit_available = TRUE;
14147
quit_label = common->quit_label;
14148
if (common->currententry != NULL)
14149
  {
14150
  /* A free bit for each private data. */
14151
  common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
14152
  SLJIT_ASSERT(common->recurse_bitset_size > 0);
14153
  common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
14154
14155
  if (common->recurse_bitset != NULL)
14156
    {
14157
    do
14158
      {
14159
      /* Might add new entries. */
14160
      compile_recurse(common);
14161
      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14162
        break;
14163
      flush_stubs(common);
14164
      common->currententry = common->currententry->next;
14165
      }
14166
    while (common->currententry != NULL);
14167
14168
    SLJIT_FREE(common->recurse_bitset, allocator_data);
14169
    }
14170
14171
  if (common->currententry != NULL)
14172
    {
14173
    /* The common->recurse_bitset has been freed. */
14174
    SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14175
14176
    sljit_free_compiler(compiler);
14177
    SLJIT_FREE(common->optimized_cbracket, allocator_data);
14178
    SLJIT_FREE(common->private_data_ptrs, allocator_data);
14179
    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14180
    return PCRE2_ERROR_NOMEMORY;
14181
    }
14182
  }
14183
common->local_quit_available = FALSE;
14184
common->quit_label = quit_label;
14185
14186
/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14187
/* This is a (really) rare case. */
14188
set_jumps(common->stackalloc, LABEL());
14189
/* RETURN_ADDR is not a saved register. */
14190
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14191
14192
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14193
14194
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14195
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14196
OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14197
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14198
OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14199
14200
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
14201
14202
jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14203
OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14204
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14205
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14206
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14207
OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14208
14209
/* Allocation failed. */
14210
JUMPHERE(jump);
14211
/* We break the return address cache here, but this is a really rare case. */
14212
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14213
JUMPTO(SLJIT_JUMP, common->quit_label);
14214
14215
/* Call limit reached. */
14216
set_jumps(common->calllimit, LABEL());
14217
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14218
JUMPTO(SLJIT_JUMP, common->quit_label);
14219
14220
if (common->revertframes != NULL)
14221
  {
14222
  set_jumps(common->revertframes, LABEL());
14223
  do_revertframes(common);
14224
  }
14225
if (common->wordboundary != NULL)
14226
  {
14227
  set_jumps(common->wordboundary, LABEL());
14228
  check_wordboundary(common);
14229
  }
14230
if (common->anynewline != NULL)
14231
  {
14232
  set_jumps(common->anynewline, LABEL());
14233
  check_anynewline(common);
14234
  }
14235
if (common->hspace != NULL)
14236
  {
14237
  set_jumps(common->hspace, LABEL());
14238
  check_hspace(common);
14239
  }
14240
if (common->vspace != NULL)
14241
  {
14242
  set_jumps(common->vspace, LABEL());
14243
  check_vspace(common);
14244
  }
14245
if (common->casefulcmp != NULL)
14246
  {
14247
  set_jumps(common->casefulcmp, LABEL());
14248
  do_casefulcmp(common);
14249
  }
14250
if (common->caselesscmp != NULL)
14251
  {
14252
  set_jumps(common->caselesscmp, LABEL());
14253
  do_caselesscmp(common);
14254
  }
14255
if (common->reset_match != NULL)
14256
  {
14257
  set_jumps(common->reset_match, LABEL());
14258
  do_reset_match(common, (re->top_bracket + 1) * 2);
14259
  CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14260
  OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14261
  JUMPTO(SLJIT_JUMP, reset_match_label);
14262
  }
14263
#ifdef SUPPORT_UNICODE
14264
#if PCRE2_CODE_UNIT_WIDTH == 8
14265
if (common->utfreadchar != NULL)
14266
  {
14267
  set_jumps(common->utfreadchar, LABEL());
14268
  do_utfreadchar(common);
14269
  }
14270
if (common->utfreadtype8 != NULL)
14271
  {
14272
  set_jumps(common->utfreadtype8, LABEL());
14273
  do_utfreadtype8(common);
14274
  }
14275
if (common->utfpeakcharback != NULL)
14276
  {
14277
  set_jumps(common->utfpeakcharback, LABEL());
14278
  do_utfpeakcharback(common);
14279
  }
14280
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14281
#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14282
if (common->utfreadchar_invalid != NULL)
14283
  {
14284
  set_jumps(common->utfreadchar_invalid, LABEL());
14285
  do_utfreadchar_invalid(common);
14286
  }
14287
if (common->utfreadnewline_invalid != NULL)
14288
  {
14289
  set_jumps(common->utfreadnewline_invalid, LABEL());
14290
  do_utfreadnewline_invalid(common);
14291
  }
14292
if (common->utfmoveback_invalid)
14293
  {
14294
  set_jumps(common->utfmoveback_invalid, LABEL());
14295
  do_utfmoveback_invalid(common);
14296
  }
14297
if (common->utfpeakcharback_invalid)
14298
  {
14299
  set_jumps(common->utfpeakcharback_invalid, LABEL());
14300
  do_utfpeakcharback_invalid(common);
14301
  }
14302
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14303
if (common->getucd != NULL)
14304
  {
14305
  set_jumps(common->getucd, LABEL());
14306
  do_getucd(common);
14307
  }
14308
if (common->getucdtype != NULL)
14309
  {
14310
  set_jumps(common->getucdtype, LABEL());
14311
  do_getucdtype(common);
14312
  }
14313
#endif /* SUPPORT_UNICODE */
14314
14315
SLJIT_FREE(common->optimized_cbracket, allocator_data);
14316
SLJIT_FREE(common->private_data_ptrs, allocator_data);
14317
14318
executable_func = sljit_generate_code(compiler);
14319
executable_size = sljit_get_generated_code_size(compiler);
14320
sljit_free_compiler(compiler);
14321
14322
if (executable_func == NULL)
14323
  {
14324
  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14325
  return PCRE2_ERROR_NOMEMORY;
14326
  }
14327
14328
/* Reuse the function descriptor if possible. */
14329
if (re->executable_jit != NULL)
14330
  functions = (executable_functions *)re->executable_jit;
14331
else
14332
  {
14333
  functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14334
  if (functions == NULL)
14335
    {
14336
    /* This case is highly unlikely since we just recently
14337
    freed a lot of memory. Not impossible though. */
14338
    sljit_free_code(executable_func, NULL);
14339
    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14340
    return PCRE2_ERROR_NOMEMORY;
14341
    }
14342
  memset(functions, 0, sizeof(executable_functions));
14343
  functions->top_bracket = re->top_bracket + 1;
14344
  functions->limit_match = re->limit_match;
14345
  re->executable_jit = functions;
14346
  }
14347
14348
/* Turn mode into an index. */
14349
if (mode == PCRE2_JIT_COMPLETE)
14350
  mode = 0;
14351
else
14352
  mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14353
14354
SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14355
functions->executable_funcs[mode] = executable_func;
14356
functions->read_only_data_heads[mode] = common->read_only_data_head;
14357
functions->executable_sizes[mode] = executable_size;
14358
return 0;
14359
}
14360
14361
#endif
14362
14363
/*************************************************
14364
*        JIT compile a Regular Expression        *
14365
*************************************************/
14366
14367
/* This function used JIT to convert a previously-compiled pattern into machine
14368
code.
14369
14370
Arguments:
14371
  code          a compiled pattern
14372
  options       JIT option bits
14373
14374
Returns:        0: success or (*NOJIT) was used
14375
               <0: an error code
14376
*/
14377
14378
#define PUBLIC_JIT_COMPILE_OPTIONS \
14379
0
  (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14380
14381
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
14382
pcre2_jit_compile(pcre2_code *code, uint32_t options)
14383
0
{
14384
0
pcre2_real_code *re = (pcre2_real_code *)code;
14385
#ifdef SUPPORT_JIT
14386
executable_functions *functions;
14387
static int executable_allocator_is_working = -1;
14388
#endif
14389
14390
0
if (code == NULL)
14391
0
  return PCRE2_ERROR_NULL;
14392
14393
0
if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14394
0
  return PCRE2_ERROR_JIT_BADOPTION;
14395
14396
/* Support for invalid UTF was first introduced in JIT, with the option
14397
PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14398
compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14399
preferred feature, with the earlier option deprecated. However, for backward
14400
compatibility, if the earlier option is set, it forces the new option so that
14401
if JIT matching falls back to the interpreter, there is still support for
14402
invalid UTF. However, if this function has already been successfully called
14403
without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14404
non-invalid-supporting JIT code was compiled), give an error.
14405
14406
If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14407
actions are needed:
14408
14409
  1. Remove the definition from pcre2.h.in and from the list in
14410
     PUBLIC_JIT_COMPILE_OPTIONS above.
14411
14412
  2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14413
14414
  3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14415
14416
  4. Delete the following short block of code. The setting of "re" and
14417
     "functions" can be moved into the JIT-only block below, but if that is
14418
     done, (void)re and (void)functions will be needed in the non-JIT case, to
14419
     avoid compiler warnings.
14420
*/
14421
14422
#ifdef SUPPORT_JIT
14423
functions = (executable_functions *)re->executable_jit;
14424
#endif
14425
14426
0
if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14427
0
  {
14428
0
  if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14429
0
    {
14430
#ifdef SUPPORT_JIT
14431
    if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14432
#endif
14433
0
    re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14434
0
    }
14435
0
  }
14436
14437
/* The above tests are run with and without JIT support. This means that
14438
PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14439
interpreter support) even in the absence of JIT. But now, if there is no JIT
14440
support, give an error return. */
14441
14442
0
#ifndef SUPPORT_JIT
14443
0
return PCRE2_ERROR_JIT_BADOPTION;
14444
#else  /* SUPPORT_JIT */
14445
14446
/* There is JIT support. Do the necessary. */
14447
14448
if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14449
14450
if (executable_allocator_is_working == -1)
14451
  {
14452
  /* Checks whether the executable allocator is working. This check
14453
     might run multiple times in multi-threaded environments, but the
14454
     result should not be affected by it. */
14455
  void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14456
  if (ptr != NULL)
14457
    {
14458
    SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14459
    executable_allocator_is_working = 1;
14460
    }
14461
  else executable_allocator_is_working = 0;
14462
  }
14463
14464
if (!executable_allocator_is_working)
14465
  return PCRE2_ERROR_NOMEMORY;
14466
14467
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14468
  options |= PCRE2_JIT_INVALID_UTF;
14469
14470
if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14471
    || functions->executable_funcs[0] == NULL)) {
14472
  uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14473
  int result = jit_compile(code, options & ~excluded_options);
14474
  if (result != 0)
14475
    return result;
14476
  }
14477
14478
if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14479
    || functions->executable_funcs[1] == NULL)) {
14480
  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14481
  int result = jit_compile(code, options & ~excluded_options);
14482
  if (result != 0)
14483
    return result;
14484
  }
14485
14486
if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14487
    || functions->executable_funcs[2] == NULL)) {
14488
  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14489
  int result = jit_compile(code, options & ~excluded_options);
14490
  if (result != 0)
14491
    return result;
14492
  }
14493
14494
return 0;
14495
14496
#endif  /* SUPPORT_JIT */
14497
0
}
Unexecuted instantiation: pcre2_jit_compile_8
Unexecuted instantiation: pcre2_jit_compile_16
14498
14499
/* JIT compiler uses an all-in-one approach. This improves security,
14500
   since the code generator functions are not exported. */
14501
14502
#define INCLUDED_FROM_PCRE2_JIT_COMPILE
14503
14504
#include "pcre2_jit_match.c"
14505
#include "pcre2_jit_misc.c"
14506
14507
/* End of pcre2_jit_compile.c */