Coverage Report

Created: 2025-12-18 07:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/Userland/Libraries/LibRegex/RegexByteCode.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#include "RegexByteCode.h"
8
#include "RegexDebug.h"
9
#include <AK/BinarySearch.h>
10
#include <AK/CharacterTypes.h>
11
#include <AK/StringBuilder.h>
12
#include <LibUnicode/CharacterTypes.h>
13
14
// U+2028 LINE SEPARATOR
15
constexpr static u32 const LineSeparator { 0x2028 };
16
// U+2029 PARAGRAPH SEPARATOR
17
constexpr static u32 const ParagraphSeparator { 0x2029 };
18
19
namespace regex {
20
21
StringView OpCode::name(OpCodeId opcode_id)
22
0
{
23
0
    switch (opcode_id) {
24
0
#define __ENUMERATE_OPCODE(x) \
25
0
    case OpCodeId::x:         \
26
0
        return #x##sv;
27
0
        ENUMERATE_OPCODES
28
0
#undef __ENUMERATE_OPCODE
29
0
    default:
30
0
        VERIFY_NOT_REACHED();
31
0
        return "<Unknown>"sv;
32
0
    }
33
0
}
34
35
StringView OpCode::name() const
36
0
{
37
0
    return name(opcode_id());
38
0
}
39
40
StringView execution_result_name(ExecutionResult result)
41
0
{
42
0
    switch (result) {
43
0
#define __ENUMERATE_EXECUTION_RESULT(x) \
44
0
    case ExecutionResult::x:            \
45
0
        return #x##sv;
46
0
        ENUMERATE_EXECUTION_RESULTS
47
0
#undef __ENUMERATE_EXECUTION_RESULT
48
0
    default:
49
0
        VERIFY_NOT_REACHED();
50
0
        return "<Unknown>"sv;
51
0
    }
52
0
}
53
54
StringView opcode_id_name(OpCodeId opcode)
55
0
{
56
0
    switch (opcode) {
57
0
#define __ENUMERATE_OPCODE(x) \
58
0
    case OpCodeId::x:         \
59
0
        return #x##sv;
60
61
0
        ENUMERATE_OPCODES
62
63
0
#undef __ENUMERATE_OPCODE
64
0
    default:
65
0
        VERIFY_NOT_REACHED();
66
0
        return "<Unknown>"sv;
67
0
    }
68
0
}
69
70
StringView boundary_check_type_name(BoundaryCheckType ty)
71
0
{
72
0
    switch (ty) {
73
0
#define __ENUMERATE_BOUNDARY_CHECK_TYPE(x) \
74
0
    case BoundaryCheckType::x:             \
75
0
        return #x##sv;
76
0
        ENUMERATE_BOUNDARY_CHECK_TYPES
77
0
#undef __ENUMERATE_BOUNDARY_CHECK_TYPE
78
0
    default:
79
0
        VERIFY_NOT_REACHED();
80
0
        return "<Unknown>"sv;
81
0
    }
82
0
}
83
84
StringView character_compare_type_name(CharacterCompareType ch_compare_type)
85
0
{
86
0
    switch (ch_compare_type) {
87
0
#define __ENUMERATE_CHARACTER_COMPARE_TYPE(x) \
88
0
    case CharacterCompareType::x:             \
89
0
        return #x##sv;
90
0
        ENUMERATE_CHARACTER_COMPARE_TYPES
91
0
#undef __ENUMERATE_CHARACTER_COMPARE_TYPE
92
0
    default:
93
0
        VERIFY_NOT_REACHED();
94
0
        return "<Unknown>"sv;
95
0
    }
96
0
}
97
98
StringView character_class_name(CharClass ch_class)
99
0
{
100
0
    switch (ch_class) {
101
0
#define __ENUMERATE_CHARACTER_CLASS(x) \
102
0
    case CharClass::x:                 \
103
0
        return #x##sv;
104
0
        ENUMERATE_CHARACTER_CLASSES
105
0
#undef __ENUMERATE_CHARACTER_CLASS
106
0
    default:
107
0
        VERIFY_NOT_REACHED();
108
0
        return "<Unknown>"sv;
109
0
    }
110
0
}
111
112
static void advance_string_position(MatchState& state, RegexStringView view, Optional<u32> code_point = {})
113
59.6M
{
114
59.6M
    ++state.string_position;
115
116
59.6M
    if (view.unicode()) {
117
0
        if (!code_point.has_value() && (state.string_position_in_code_units < view.length_in_code_units()))
118
0
            code_point = view[state.string_position_in_code_units];
119
0
        if (code_point.has_value())
120
0
            state.string_position_in_code_units += view.length_of_code_point(*code_point);
121
59.6M
    } else {
122
59.6M
        ++state.string_position_in_code_units;
123
59.6M
    }
124
59.6M
}
125
126
static void advance_string_position(MatchState& state, RegexStringView, RegexStringView advance_by)
127
0
{
128
0
    state.string_position += advance_by.length();
129
0
    state.string_position_in_code_units += advance_by.length_in_code_units();
130
0
}
131
132
static void reverse_string_position(MatchState& state, RegexStringView view, size_t amount)
133
0
{
134
0
    VERIFY(state.string_position >= amount);
135
0
    state.string_position -= amount;
136
137
0
    if (view.unicode())
138
0
        state.string_position_in_code_units = view.code_unit_offset_of(state.string_position);
139
0
    else
140
0
        state.string_position_in_code_units -= amount;
141
0
}
142
143
static void save_string_position(MatchInput const& input, MatchState const& state)
144
0
{
145
0
    input.saved_positions.append(state.string_position);
146
0
    input.saved_forks_since_last_save.append(state.forks_since_last_save);
147
0
    input.saved_code_unit_positions.append(state.string_position_in_code_units);
148
0
}
149
150
static bool restore_string_position(MatchInput const& input, MatchState& state)
151
0
{
152
0
    if (input.saved_positions.is_empty())
153
0
        return false;
154
155
0
    state.string_position = input.saved_positions.take_last();
156
0
    state.string_position_in_code_units = input.saved_code_unit_positions.take_last();
157
0
    state.forks_since_last_save = input.saved_forks_since_last_save.take_last();
158
0
    return true;
159
0
}
160
161
OwnPtr<OpCode> ByteCode::s_opcodes[(size_t)OpCodeId::Last + 1];
162
bool ByteCode::s_opcodes_initialized { false };
163
size_t ByteCode::s_next_checkpoint_serial_id { 0 };
164
165
void ByteCode::ensure_opcodes_initialized()
166
78.8M
{
167
78.8M
    if (s_opcodes_initialized)
168
78.8M
        return;
169
138
    for (u32 i = (u32)OpCodeId::First; i <= (u32)OpCodeId::Last; ++i) {
170
132
        switch ((OpCodeId)i) {
171
0
#define __ENUMERATE_OPCODE(OpCode)              \
172
132
    case OpCodeId::OpCode:                      \
173
132
        s_opcodes[i] = make<OpCode_##OpCode>(); \
174
132
        break;
175
176
132
            ENUMERATE_OPCODES
177
178
132
#undef __ENUMERATE_OPCODE
179
132
        }
180
132
    }
181
6
    s_opcodes_initialized = true;
182
6
}
183
184
ALWAYS_INLINE ExecutionResult OpCode_Exit::execute(MatchInput const& input, MatchState& state) const
185
1.01M
{
186
1.01M
    if (state.string_position > input.view.length() || state.instruction_position >= m_bytecode->size())
187
1.01M
        return ExecutionResult::Succeeded;
188
189
0
    return ExecutionResult::Failed;
190
1.01M
}
191
192
ALWAYS_INLINE ExecutionResult OpCode_Save::execute(MatchInput const& input, MatchState& state) const
193
0
{
194
0
    save_string_position(input, state);
195
0
    state.forks_since_last_save = 0;
196
0
    return ExecutionResult::Continue;
197
0
}
198
199
ALWAYS_INLINE ExecutionResult OpCode_Restore::execute(MatchInput const& input, MatchState& state) const
200
0
{
201
0
    if (!restore_string_position(input, state))
202
0
        return ExecutionResult::Failed;
203
0
    return ExecutionResult::Continue;
204
0
}
205
206
ALWAYS_INLINE ExecutionResult OpCode_GoBack::execute(MatchInput const& input, MatchState& state) const
207
0
{
208
0
    if (count() > state.string_position)
209
0
        return ExecutionResult::Failed_ExecuteLowPrioForks;
210
211
0
    reverse_string_position(state, input.view, count());
212
0
    return ExecutionResult::Continue;
213
0
}
214
215
ALWAYS_INLINE ExecutionResult OpCode_FailForks::execute(MatchInput const& input, MatchState& state) const
216
0
{
217
0
    input.fail_counter += state.forks_since_last_save;
218
0
    return ExecutionResult::Failed_ExecuteLowPrioForks;
219
0
}
220
221
ALWAYS_INLINE ExecutionResult OpCode_Jump::execute(MatchInput const&, MatchState& state) const
222
0
{
223
0
    state.instruction_position += offset();
224
0
    return ExecutionResult::Continue;
225
0
}
226
227
ALWAYS_INLINE ExecutionResult OpCode_ForkJump::execute(MatchInput const&, MatchState& state) const
228
0
{
229
0
    state.fork_at_position = state.instruction_position + size() + offset();
230
0
    state.forks_since_last_save++;
231
0
    return ExecutionResult::Fork_PrioHigh;
232
0
}
233
234
ALWAYS_INLINE ExecutionResult OpCode_ForkReplaceJump::execute(MatchInput const& input, MatchState& state) const
235
0
{
236
0
    state.fork_at_position = state.instruction_position + size() + offset();
237
0
    input.fork_to_replace = state.instruction_position;
238
0
    state.forks_since_last_save++;
239
0
    return ExecutionResult::Fork_PrioHigh;
240
0
}
241
242
ALWAYS_INLINE ExecutionResult OpCode_ForkStay::execute(MatchInput const&, MatchState& state) const
243
52.7M
{
244
52.7M
    state.fork_at_position = state.instruction_position + size() + offset();
245
52.7M
    state.forks_since_last_save++;
246
52.7M
    return ExecutionResult::Fork_PrioLow;
247
52.7M
}
248
249
ALWAYS_INLINE ExecutionResult OpCode_ForkReplaceStay::execute(MatchInput const& input, MatchState& state) const
250
12.5k
{
251
12.5k
    state.fork_at_position = state.instruction_position + size() + offset();
252
12.5k
    input.fork_to_replace = state.instruction_position;
253
12.5k
    return ExecutionResult::Fork_PrioLow;
254
12.5k
}
255
256
ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(MatchInput const& input, MatchState& state) const
257
4.82M
{
258
4.82M
    auto is_at_line_boundary = [&] {
259
4.82M
        if (state.string_position == 0)
260
4.82M
            return true;
261
262
0
        if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
263
0
            auto input_view = input.view.substring_view(state.string_position - 1, 1)[0];
264
0
            return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator;
265
0
        }
266
267
0
        return false;
268
0
    }();
269
4.82M
    if (is_at_line_boundary && (input.regex_options & AllFlags::MatchNotBeginOfLine))
270
0
        return ExecutionResult::Failed_ExecuteLowPrioForks;
271
272
4.82M
    if ((is_at_line_boundary && !(input.regex_options & AllFlags::MatchNotBeginOfLine))
273
0
        || (!is_at_line_boundary && (input.regex_options & AllFlags::MatchNotBeginOfLine))
274
0
        || (is_at_line_boundary && (input.regex_options & AllFlags::Global)))
275
4.82M
        return ExecutionResult::Continue;
276
277
0
    return ExecutionResult::Failed_ExecuteLowPrioForks;
278
4.82M
}
279
280
ALWAYS_INLINE ExecutionResult OpCode_CheckBoundary::execute(MatchInput const& input, MatchState& state) const
281
0
{
282
0
    auto isword = [](auto ch) { return is_ascii_alphanumeric(ch) || ch == '_'; };
283
0
    auto is_word_boundary = [&] {
284
0
        if (state.string_position == input.view.length()) {
285
0
            return (state.string_position > 0 && isword(input.view[state.string_position_in_code_units - 1]));
286
0
        }
287
288
0
        if (state.string_position == 0) {
289
0
            return (isword(input.view[0]));
290
0
        }
291
292
0
        return !!(isword(input.view[state.string_position_in_code_units]) ^ isword(input.view[state.string_position_in_code_units - 1]));
293
0
    };
294
0
    switch (type()) {
295
0
    case BoundaryCheckType::Word: {
296
0
        if (is_word_boundary())
297
0
            return ExecutionResult::Continue;
298
0
        return ExecutionResult::Failed_ExecuteLowPrioForks;
299
0
    }
300
0
    case BoundaryCheckType::NonWord: {
301
0
        if (!is_word_boundary())
302
0
            return ExecutionResult::Continue;
303
0
        return ExecutionResult::Failed_ExecuteLowPrioForks;
304
0
    }
305
0
    }
306
0
    VERIFY_NOT_REACHED();
307
0
}
308
309
ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input, MatchState& state) const
310
4.04M
{
311
4.04M
    auto is_at_line_boundary = [&] {
312
4.04M
        if (state.string_position == input.view.length())
313
1.01M
            return true;
314
315
3.02M
        if (input.regex_options.has_flag_set(AllFlags::Multiline) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline)) {
316
0
            auto input_view = input.view.substring_view(state.string_position, 1)[0];
317
0
            return input_view == '\r' || input_view == '\n' || input_view == LineSeparator || input_view == ParagraphSeparator;
318
0
        }
319
320
3.02M
        return false;
321
3.02M
    }();
322
4.04M
    if (is_at_line_boundary && (input.regex_options & AllFlags::MatchNotEndOfLine))
323
0
        return ExecutionResult::Failed_ExecuteLowPrioForks;
324
325
4.04M
    if ((is_at_line_boundary && !(input.regex_options & AllFlags::MatchNotEndOfLine))
326
3.02M
        || (!is_at_line_boundary && (input.regex_options & AllFlags::MatchNotEndOfLine || input.regex_options & AllFlags::MatchNotBeginOfLine)))
327
1.01M
        return ExecutionResult::Continue;
328
329
3.02M
    return ExecutionResult::Failed_ExecuteLowPrioForks;
330
4.04M
}
331
332
ALWAYS_INLINE ExecutionResult OpCode_ClearCaptureGroup::execute(MatchInput const& input, MatchState& state) const
333
2.63M
{
334
2.63M
    if (input.match_index < state.capture_group_matches.size()) {
335
0
        auto& group = state.capture_group_matches.mutable_at(input.match_index);
336
0
        auto group_id = id();
337
0
        if (group_id >= group.size())
338
0
            group.resize(group_id + 1);
339
340
0
        group[group_id].reset();
341
0
    }
342
2.63M
    return ExecutionResult::Continue;
343
2.63M
}
344
345
ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(MatchInput const& input, MatchState& state) const
346
16.6M
{
347
16.6M
    if (input.match_index >= state.capture_group_matches.size()) {
348
4.92M
        state.capture_group_matches.ensure_capacity(input.match_index);
349
4.92M
        auto capacity = state.capture_group_matches.capacity();
350
9.84M
        for (size_t i = state.capture_group_matches.size(); i <= capacity; ++i)
351
4.92M
            state.capture_group_matches.empend();
352
4.92M
    }
353
354
16.6M
    if (id() >= state.capture_group_matches.at(input.match_index).size()) {
355
8.91M
        state.capture_group_matches.mutable_at(input.match_index).ensure_capacity(id());
356
8.91M
        auto capacity = state.capture_group_matches.at(input.match_index).capacity();
357
33.7M
        for (size_t i = state.capture_group_matches.at(input.match_index).size(); i <= capacity; ++i)
358
24.8M
            state.capture_group_matches.mutable_at(input.match_index).empend();
359
8.91M
    }
360
361
16.6M
    state.capture_group_matches.mutable_at(input.match_index).at(id()).left_column = state.string_position;
362
16.6M
    return ExecutionResult::Continue;
363
16.6M
}
364
365
ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput const& input, MatchState& state) const
366
10.5M
{
367
10.5M
    auto& match = state.capture_group_matches.mutable_at(input.match_index).at(id());
368
10.5M
    auto start_position = match.left_column;
369
10.5M
    if (state.string_position < start_position) {
370
0
        dbgln("Right capture group {} is before left capture group {}!", state.string_position, start_position);
371
0
        return ExecutionResult::Failed_ExecuteLowPrioForks;
372
0
    }
373
374
10.5M
    auto length = state.string_position - start_position;
375
376
10.5M
    if (start_position < match.column)
377
0
        return ExecutionResult::Continue;
378
379
10.5M
    VERIFY(start_position + length <= input.view.length());
380
381
10.5M
    auto view = input.view.substring_view(start_position, length);
382
383
10.5M
    if (input.regex_options & AllFlags::StringCopyMatches) {
384
0
        match = { view.to_byte_string(), input.line, start_position, input.global_offset + start_position }; // create a copy of the original string
385
10.5M
    } else {
386
10.5M
        match = { view, input.line, start_position, input.global_offset + start_position }; // take view to original string
387
10.5M
    }
388
389
10.5M
    return ExecutionResult::Continue;
390
10.5M
}
391
392
ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchInput const& input, MatchState& state) const
393
0
{
394
0
    auto& match = state.capture_group_matches.mutable_at(input.match_index).at(id());
395
0
    auto start_position = match.left_column;
396
0
    if (state.string_position < start_position)
397
0
        return ExecutionResult::Failed_ExecuteLowPrioForks;
398
399
0
    auto length = state.string_position - start_position;
400
401
0
    if (start_position < match.column)
402
0
        return ExecutionResult::Continue;
403
404
0
    VERIFY(start_position + length <= input.view.length());
405
406
0
    auto view = input.view.substring_view(start_position, length);
407
408
0
    if (input.regex_options & AllFlags::StringCopyMatches) {
409
0
        match = { view.to_byte_string(), name(), input.line, start_position, input.global_offset + start_position }; // create a copy of the original string
410
0
    } else {
411
0
        match = { view, name(), input.line, start_position, input.global_offset + start_position }; // take view to original string
412
0
    }
413
414
0
    return ExecutionResult::Continue;
415
0
}
416
417
ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, MatchState& state) const
418
69.5M
{
419
69.5M
    auto argument_count = arguments_count();
420
69.5M
    auto has_single_argument = argument_count == 1;
421
422
69.5M
    bool inverse { false };
423
69.5M
    bool temporary_inverse { false };
424
69.5M
    bool reset_temp_inverse { false };
425
69.5M
    struct DisjunctionState {
426
69.5M
        bool active { false };
427
69.5M
        bool is_conjunction { false };
428
69.5M
        bool fail { false };
429
69.5M
        bool inverse_matched { false };
430
69.5M
        size_t initial_position;
431
69.5M
        size_t initial_code_unit_position;
432
69.5M
        Optional<size_t> last_accepted_position {};
433
69.5M
        Optional<size_t> last_accepted_code_unit_position {};
434
69.5M
    };
435
436
69.5M
    Vector<DisjunctionState, 4> disjunction_states;
437
69.5M
    disjunction_states.empend();
438
439
99.0M
    auto current_disjunction_state = [&]() -> DisjunctionState& { return disjunction_states.last(); };
440
441
249M
    auto current_inversion_state = [&]() -> bool { return temporary_inverse ^ inverse; };
442
443
69.5M
    size_t string_position = state.string_position;
444
69.5M
    bool inverse_matched { false };
445
69.5M
    bool had_zero_length_match { false };
446
447
69.5M
    state.string_position_before_match = state.string_position;
448
449
69.5M
    size_t offset { state.instruction_position + 3 };
450
182M
    for (size_t i = 0; i < argument_count; ++i) {
451
118M
        if (state.string_position > string_position)
452
0
            break;
453
454
118M
        if (reset_temp_inverse) {
455
24.3M
            reset_temp_inverse = false;
456
24.3M
            temporary_inverse = false;
457
93.9M
        } else {
458
93.9M
            reset_temp_inverse = true;
459
93.9M
        }
460
461
118M
        auto compare_type = (CharacterCompareType)m_bytecode->at(offset++);
462
463
118M
        switch (compare_type) {
464
12.6M
        case CharacterCompareType::Inverse:
465
12.6M
            inverse = !inverse;
466
12.6M
            continue;
467
0
        case CharacterCompareType::TemporaryInverse:
468
            // If "TemporaryInverse" is given, negate the current inversion state only for the next opcode.
469
            // it follows that this cannot be the last compare element.
470
0
            VERIFY(i != arguments_count() - 1);
471
472
0
            temporary_inverse = true;
473
0
            reset_temp_inverse = false;
474
0
            continue;
475
4.91M
        case CharacterCompareType::Char: {
476
4.91M
            u32 ch = m_bytecode->at(offset++);
477
478
            // We want to compare a string that is longer or equal in length to the available string
479
4.91M
            if (input.view.length() <= state.string_position)
480
0
                return ExecutionResult::Failed_ExecuteLowPrioForks;
481
482
4.91M
            compare_char(input, state, ch, current_inversion_state(), inverse_matched);
483
4.91M
            break;
484
4.91M
        }
485
23.7M
        case CharacterCompareType::AnyChar: {
486
            // We want to compare a string that is definitely longer than the available string
487
23.7M
            if (input.view.length() <= state.string_position)
488
931k
                return ExecutionResult::Failed_ExecuteLowPrioForks;
489
490
22.8M
            auto input_view = input.view.substring_view(state.string_position, 1)[0];
491
22.8M
            auto is_equivalent_to_newline = input_view == '\n'
492
22.8M
                || (input.regex_options.has_flag_set(AllFlags::Internal_ECMA262DotSemantics)
493
22.8M
                        ? (input_view == '\r' || input_view == LineSeparator || input_view == ParagraphSeparator)
494
22.8M
                        : false);
495
496
22.8M
            if (!is_equivalent_to_newline || (input.regex_options.has_flag_set(AllFlags::SingleLine) && input.regex_options.has_flag_set(AllFlags::Internal_ConsiderNewline))) {
497
22.8M
                if (current_inversion_state())
498
0
                    inverse_matched = true;
499
22.8M
                else
500
22.8M
                    advance_string_position(state, input.view, input_view);
501
22.8M
            }
502
22.8M
            break;
503
23.7M
        }
504
0
        case CharacterCompareType::String: {
505
0
            VERIFY(!current_inversion_state());
506
507
0
            auto const& length = m_bytecode->at(offset++);
508
509
            // We want to compare a string that is definitely longer than the available string
510
0
            if (input.view.length() < state.string_position + length)
511
0
                return ExecutionResult::Failed_ExecuteLowPrioForks;
512
513
0
            Optional<ByteString> str;
514
0
            Utf16Data utf16;
515
0
            Vector<u32> data;
516
0
            data.ensure_capacity(length);
517
0
            for (size_t i = offset; i < offset + length; ++i)
518
0
                data.unchecked_append(m_bytecode->at(i));
519
520
0
            auto view = input.view.construct_as_same(data, str, utf16);
521
0
            offset += length;
522
0
            if (compare_string(input, state, view, had_zero_length_match)) {
523
0
                if (current_inversion_state())
524
0
                    inverse_matched = true;
525
0
            }
526
0
            break;
527
0
        }
528
22.0M
        case CharacterCompareType::CharClass: {
529
22.0M
            if (input.view.length() <= state.string_position_in_code_units)
530
2.86M
                return ExecutionResult::Failed_ExecuteLowPrioForks;
531
532
19.2M
            auto character_class = (CharClass)m_bytecode->at(offset++);
533
19.2M
            auto ch = input.view[state.string_position_in_code_units];
534
535
19.2M
            compare_character_class(input, state, character_class, ch, current_inversion_state(), inverse_matched);
536
19.2M
            break;
537
22.0M
        }
538
18.6M
        case CharacterCompareType::LookupTable: {
539
18.6M
            if (input.view.length() <= state.string_position)
540
925k
                return ExecutionResult::Failed_ExecuteLowPrioForks;
541
542
17.7M
            auto count = m_bytecode->at(offset++);
543
17.7M
            auto range_data = m_bytecode->template spans<4>().slice(offset, count);
544
17.7M
            offset += count;
545
546
17.7M
            auto ch = input.view[state.string_position_in_code_units];
547
548
35.8M
            auto const* matching_range = binary_search(range_data, ch, nullptr, [insensitive = input.regex_options & AllFlags::Insensitive](auto needle, CharRange range) {
549
35.8M
                auto upper_case_needle = needle;
550
35.8M
                auto lower_case_needle = needle;
551
35.8M
                if (insensitive) {
552
0
                    upper_case_needle = to_ascii_uppercase(needle);
553
0
                    lower_case_needle = to_ascii_lowercase(needle);
554
0
                }
555
556
35.8M
                if (lower_case_needle >= range.from && lower_case_needle <= range.to)
557
5.42M
                    return 0;
558
30.4M
                if (upper_case_needle >= range.from && upper_case_needle <= range.to)
559
0
                    return 0;
560
30.4M
                if (lower_case_needle > range.to || upper_case_needle > range.to)
561
25.4M
                    return 1;
562
4.97M
                return -1;
563
30.4M
            });
564
565
17.7M
            if (matching_range) {
566
2.92M
                if (current_inversion_state())
567
524
                    inverse_matched = true;
568
2.92M
                else
569
2.92M
                    advance_string_position(state, input.view, ch);
570
2.92M
            }
571
17.7M
            break;
572
18.6M
        }
573
0
        case CharacterCompareType::CharRange: {
574
0
            if (input.view.length() <= state.string_position)
575
0
                return ExecutionResult::Failed_ExecuteLowPrioForks;
576
577
0
            auto value = (CharRange)m_bytecode->at(offset++);
578
579
0
            auto from = value.from;
580
0
            auto to = value.to;
581
0
            auto ch = input.view[state.string_position_in_code_units];
582
583
0
            compare_character_range(input, state, from, to, ch, current_inversion_state(), inverse_matched);
584
0
            break;
585
0
        }
586
11.8M
        case CharacterCompareType::Reference: {
587
11.8M
            auto reference_number = (size_t)m_bytecode->at(offset++);
588
11.8M
            auto& groups = state.capture_group_matches.at(input.match_index);
589
11.8M
            if (groups.size() <= reference_number)
590
0
                return ExecutionResult::Failed_ExecuteLowPrioForks;
591
592
11.8M
            auto str = groups.at(reference_number).view;
593
594
            // We want to compare a string that is definitely longer than the available string
595
11.8M
            if (input.view.length() < state.string_position + str.length())
596
1.01M
                return ExecutionResult::Failed_ExecuteLowPrioForks;
597
598
10.8M
            if (compare_string(input, state, str, had_zero_length_match)) {
599
10.8M
                if (current_inversion_state())
600
0
                    inverse_matched = true;
601
10.8M
            }
602
10.8M
            break;
603
11.8M
        }
604
0
        case CharacterCompareType::Property: {
605
0
            auto property = static_cast<Unicode::Property>(m_bytecode->at(offset++));
606
0
            compare_property(input, state, property, current_inversion_state(), inverse_matched);
607
0
            break;
608
11.8M
        }
609
0
        case CharacterCompareType::GeneralCategory: {
610
0
            auto general_category = static_cast<Unicode::GeneralCategory>(m_bytecode->at(offset++));
611
0
            compare_general_category(input, state, general_category, current_inversion_state(), inverse_matched);
612
0
            break;
613
11.8M
        }
614
0
        case CharacterCompareType::Script: {
615
0
            auto script = static_cast<Unicode::Script>(m_bytecode->at(offset++));
616
0
            compare_script(input, state, script, current_inversion_state(), inverse_matched);
617
0
            break;
618
11.8M
        }
619
0
        case CharacterCompareType::ScriptExtension: {
620
0
            auto script = static_cast<Unicode::Script>(m_bytecode->at(offset++));
621
0
            compare_script_extension(input, state, script, current_inversion_state(), inverse_matched);
622
0
            break;
623
11.8M
        }
624
0
        case CharacterCompareType::And:
625
0
            disjunction_states.append({
626
0
                .active = true,
627
0
                .is_conjunction = current_inversion_state(),
628
0
                .fail = current_inversion_state(),
629
0
                .inverse_matched = current_inversion_state(),
630
0
                .initial_position = state.string_position,
631
0
                .initial_code_unit_position = state.string_position_in_code_units,
632
0
            });
633
0
            continue;
634
12.6M
        case CharacterCompareType::Or:
635
12.6M
            disjunction_states.append({
636
12.6M
                .active = true,
637
12.6M
                .is_conjunction = !current_inversion_state(),
638
12.6M
                .fail = !current_inversion_state(),
639
12.6M
                .inverse_matched = !current_inversion_state(),
640
12.6M
                .initial_position = state.string_position,
641
12.6M
                .initial_code_unit_position = state.string_position_in_code_units,
642
12.6M
            });
643
12.6M
            continue;
644
11.7M
        case CharacterCompareType::EndAndOr: {
645
11.7M
            auto disjunction_state = disjunction_states.take_last();
646
11.7M
            if (!disjunction_state.fail) {
647
11.7M
                state.string_position = disjunction_state.last_accepted_position.value_or(disjunction_state.initial_position);
648
11.7M
                state.string_position_in_code_units = disjunction_state.last_accepted_code_unit_position.value_or(disjunction_state.initial_code_unit_position);
649
11.7M
            }
650
11.7M
            inverse_matched = disjunction_state.inverse_matched || disjunction_state.fail;
651
11.7M
            break;
652
11.8M
        }
653
0
        default:
654
0
            warnln("Undefined comparison: {}", (int)compare_type);
655
0
            VERIFY_NOT_REACHED();
656
0
            break;
657
118M
        }
658
659
87.3M
        auto& new_disjunction_state = current_disjunction_state();
660
87.3M
        if (current_inversion_state() && (!inverse || new_disjunction_state.active) && !inverse_matched) {
661
23.4M
            advance_string_position(state, input.view);
662
23.4M
            inverse_matched = true;
663
23.4M
        }
664
665
87.3M
        if (!has_single_argument && new_disjunction_state.active) {
666
23.4M
            auto failed = (!had_zero_length_match && string_position == state.string_position) || state.string_position > input.view.length();
667
668
23.4M
            if (!failed) {
669
23.4M
                new_disjunction_state.last_accepted_position = state.string_position;
670
23.4M
                new_disjunction_state.last_accepted_code_unit_position = state.string_position_in_code_units;
671
23.4M
                new_disjunction_state.inverse_matched |= inverse_matched;
672
23.4M
            }
673
674
23.4M
            if (new_disjunction_state.is_conjunction)
675
0
                new_disjunction_state.fail = failed && new_disjunction_state.fail;
676
23.4M
            else
677
23.4M
                new_disjunction_state.fail = failed || new_disjunction_state.fail;
678
679
23.4M
            state.string_position = new_disjunction_state.initial_position;
680
23.4M
            state.string_position_in_code_units = new_disjunction_state.initial_code_unit_position;
681
23.4M
            inverse_matched = false;
682
23.4M
        }
683
87.3M
    }
684
685
63.8M
    if (!has_single_argument) {
686
11.7M
        auto& new_disjunction_state = current_disjunction_state();
687
11.7M
        if (new_disjunction_state.active) {
688
0
            if (!new_disjunction_state.fail) {
689
0
                state.string_position = new_disjunction_state.last_accepted_position.value_or(new_disjunction_state.initial_position);
690
0
                state.string_position_in_code_units = new_disjunction_state.last_accepted_code_unit_position.value_or(new_disjunction_state.initial_code_unit_position);
691
0
            }
692
0
        }
693
11.7M
    }
694
695
63.8M
    if (current_inversion_state() && !inverse_matched)
696
0
        advance_string_position(state, input.view);
697
698
63.8M
    if ((!had_zero_length_match && string_position == state.string_position) || state.string_position > input.view.length())
699
15.9M
        return ExecutionResult::Failed_ExecuteLowPrioForks;
700
701
47.9M
    return ExecutionResult::Continue;
702
63.8M
}
703
704
ALWAYS_INLINE void OpCode_Compare::compare_char(MatchInput const& input, MatchState& state, u32 ch1, bool inverse, bool& inverse_matched)
705
15.7M
{
706
15.7M
    if (state.string_position == input.view.length())
707
0
        return;
708
709
    // FIXME: Figure out how to do this if unicode() without performing a substring split first.
710
15.7M
    auto input_view = input.view.unicode()
711
15.7M
        ? input.view.substring_view(state.string_position, 1)[0]
712
15.7M
        : input.view.code_unit_at(state.string_position_in_code_units);
713
714
15.7M
    bool equal;
715
15.7M
    if (input.regex_options & AllFlags::Insensitive) {
716
0
        if (input.view.unicode())
717
0
            equal = Unicode::equals_ignoring_case(Utf32View { &input_view, 1 }, Utf32View { &ch1, 1 });
718
0
        else
719
0
            equal = to_ascii_lowercase(input_view) == to_ascii_lowercase(ch1);
720
15.7M
    } else {
721
15.7M
        equal = input_view == ch1;
722
15.7M
    }
723
724
15.7M
    if (equal) {
725
6.47M
        if (inverse)
726
0
            inverse_matched = true;
727
6.47M
        else
728
6.47M
            advance_string_position(state, input.view, ch1);
729
6.47M
    }
730
15.7M
}
731
732
ALWAYS_INLINE bool OpCode_Compare::compare_string(MatchInput const& input, MatchState& state, RegexStringView str, bool& had_zero_length_match)
733
10.8M
{
734
10.8M
    if (state.string_position + str.length() > input.view.length()) {
735
0
        if (str.is_empty()) {
736
0
            had_zero_length_match = true;
737
0
            return true;
738
0
        }
739
0
        return false;
740
0
    }
741
742
10.8M
    if (str.length() == 0) {
743
0
        had_zero_length_match = true;
744
0
        return true;
745
0
    }
746
747
10.8M
    if (str.length() == 1) {
748
10.8M
        auto inverse_matched = false;
749
10.8M
        compare_char(input, state, str[0], false, inverse_matched);
750
10.8M
        return !inverse_matched;
751
10.8M
    }
752
753
0
    auto subject = input.view.substring_view(state.string_position, str.length());
754
0
    bool equals;
755
0
    if (input.regex_options & AllFlags::Insensitive)
756
0
        equals = subject.equals_ignoring_case(str);
757
0
    else
758
0
        equals = subject.equals(str);
759
760
0
    if (equals)
761
0
        advance_string_position(state, input.view, str);
762
763
0
    return equals;
764
10.8M
}
765
766
ALWAYS_INLINE void OpCode_Compare::compare_character_class(MatchInput const& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched)
767
19.2M
{
768
19.2M
    if (matches_character_class(character_class, ch, input.regex_options & AllFlags::Insensitive)) {
769
3.93M
        if (inverse)
770
842
            inverse_matched = true;
771
3.93M
        else
772
3.93M
            advance_string_position(state, input.view, ch);
773
3.93M
    }
774
19.2M
}
775
776
bool OpCode_Compare::matches_character_class(CharClass character_class, u32 ch, bool insensitive)
777
19.3M
{
778
19.3M
    constexpr auto is_space_or_line_terminator = [](u32 code_point) {
779
19.2M
        if ((code_point == 0x0a) || (code_point == 0x0d) || (code_point == 0x2028) || (code_point == 0x2029))
780
539
            return true;
781
19.2M
        if ((code_point == 0x09) || (code_point == 0x0b) || (code_point == 0x0c) || (code_point == 0xfeff))
782
627k
            return true;
783
18.6M
        return Unicode::code_point_has_space_separator_general_category(code_point);
784
19.2M
    };
785
786
19.3M
    switch (character_class) {
787
21.4k
    case CharClass::Alnum:
788
21.4k
        return is_ascii_alphanumeric(ch);
789
6.79k
    case CharClass::Alpha:
790
6.79k
        return is_ascii_alpha(ch);
791
0
    case CharClass::Blank:
792
0
        return is_ascii_blank(ch);
793
113
    case CharClass::Cntrl:
794
113
        return is_ascii_control(ch);
795
910
    case CharClass::Digit:
796
910
        return is_ascii_digit(ch);
797
0
    case CharClass::Graph:
798
0
        return is_ascii_graphical(ch);
799
52.3k
    case CharClass::Lower:
800
52.3k
        return is_ascii_lower_alpha(ch) || (insensitive && is_ascii_upper_alpha(ch));
801
4.67k
    case CharClass::Print:
802
4.67k
        return is_ascii_printable(ch);
803
21.4k
    case CharClass::Punct:
804
21.4k
        return is_ascii_punctuation(ch);
805
19.2M
    case CharClass::Space:
806
19.2M
        return is_space_or_line_terminator(ch);
807
17.2k
    case CharClass::Upper:
808
17.2k
        return is_ascii_upper_alpha(ch) || (insensitive && is_ascii_lower_alpha(ch));
809
99
    case CharClass::Word:
810
99
        return is_ascii_alphanumeric(ch) || ch == '_';
811
6.11k
    case CharClass::Xdigit:
812
6.11k
        return is_ascii_hex_digit(ch);
813
19.3M
    }
814
815
0
    VERIFY_NOT_REACHED();
816
0
}
817
818
ALWAYS_INLINE void OpCode_Compare::compare_character_range(MatchInput const& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched)
819
0
{
820
0
    if (input.regex_options & AllFlags::Insensitive) {
821
0
        from = to_ascii_lowercase(from);
822
0
        to = to_ascii_lowercase(to);
823
0
        ch = to_ascii_lowercase(ch);
824
0
    }
825
826
0
    if (ch >= from && ch <= to) {
827
0
        if (inverse)
828
0
            inverse_matched = true;
829
0
        else
830
0
            advance_string_position(state, input.view, ch);
831
0
    }
832
0
}
833
834
ALWAYS_INLINE void OpCode_Compare::compare_property(MatchInput const& input, MatchState& state, Unicode::Property property, bool inverse, bool& inverse_matched)
835
0
{
836
0
    if (state.string_position == input.view.length())
837
0
        return;
838
839
0
    u32 code_point = input.view[state.string_position_in_code_units];
840
0
    bool equal = Unicode::code_point_has_property(code_point, property);
841
842
0
    if (equal) {
843
0
        if (inverse)
844
0
            inverse_matched = true;
845
0
        else
846
0
            advance_string_position(state, input.view, code_point);
847
0
    }
848
0
}
849
850
ALWAYS_INLINE void OpCode_Compare::compare_general_category(MatchInput const& input, MatchState& state, Unicode::GeneralCategory general_category, bool inverse, bool& inverse_matched)
851
0
{
852
0
    if (state.string_position == input.view.length())
853
0
        return;
854
855
0
    u32 code_point = input.view[state.string_position_in_code_units];
856
0
    bool equal = Unicode::code_point_has_general_category(code_point, general_category);
857
858
0
    if (equal) {
859
0
        if (inverse)
860
0
            inverse_matched = true;
861
0
        else
862
0
            advance_string_position(state, input.view, code_point);
863
0
    }
864
0
}
865
866
ALWAYS_INLINE void OpCode_Compare::compare_script(MatchInput const& input, MatchState& state, Unicode::Script script, bool inverse, bool& inverse_matched)
867
0
{
868
0
    if (state.string_position == input.view.length())
869
0
        return;
870
871
0
    u32 code_point = input.view[state.string_position_in_code_units];
872
0
    bool equal = Unicode::code_point_has_script(code_point, script);
873
874
0
    if (equal) {
875
0
        if (inverse)
876
0
            inverse_matched = true;
877
0
        else
878
0
            advance_string_position(state, input.view, code_point);
879
0
    }
880
0
}
881
882
ALWAYS_INLINE void OpCode_Compare::compare_script_extension(MatchInput const& input, MatchState& state, Unicode::Script script, bool inverse, bool& inverse_matched)
883
0
{
884
0
    if (state.string_position == input.view.length())
885
0
        return;
886
887
0
    u32 code_point = input.view[state.string_position_in_code_units];
888
0
    bool equal = Unicode::code_point_has_script_extension(code_point, script);
889
890
0
    if (equal) {
891
0
        if (inverse)
892
0
            inverse_matched = true;
893
0
        else
894
0
            advance_string_position(state, input.view, code_point);
895
0
    }
896
0
}
897
898
ByteString OpCode_Compare::arguments_string() const
899
0
{
900
0
    return ByteString::formatted("argc={}, args={} ", arguments_count(), arguments_size());
901
0
}
902
903
Vector<CompareTypeAndValuePair> OpCode_Compare::flat_compares() const
904
23.7M
{
905
23.7M
    Vector<CompareTypeAndValuePair> result;
906
907
23.7M
    size_t offset { state().instruction_position + 3 };
908
909
47.9M
    for (size_t i = 0; i < arguments_count(); ++i) {
910
24.1M
        auto compare_type = (CharacterCompareType)m_bytecode->at(offset++);
911
912
24.1M
        if (compare_type == CharacterCompareType::Char) {
913
20.8M
            auto ch = m_bytecode->at(offset++);
914
20.8M
            result.append({ compare_type, ch });
915
20.8M
        } else if (compare_type == CharacterCompareType::Reference) {
916
769
            auto ref = m_bytecode->at(offset++);
917
769
            result.append({ compare_type, ref });
918
3.35M
        } else if (compare_type == CharacterCompareType::String) {
919
750k
            auto& length = m_bytecode->at(offset++);
920
60.5M
            for (size_t k = 0; k < length; ++k)
921
59.7M
                result.append({ CharacterCompareType::Char, m_bytecode->at(offset + k) });
922
750k
            offset += length;
923
2.60M
        } else if (compare_type == CharacterCompareType::CharClass) {
924
322k
            auto character_class = m_bytecode->at(offset++);
925
322k
            result.append({ compare_type, character_class });
926
2.28M
        } else if (compare_type == CharacterCompareType::CharRange) {
927
508
            auto value = m_bytecode->at(offset++);
928
508
            result.append({ compare_type, value });
929
2.28M
        } else if (compare_type == CharacterCompareType::LookupTable) {
930
86.8k
            auto count = m_bytecode->at(offset++);
931
9.04M
            for (size_t i = 0; i < count; ++i)
932
8.95M
                result.append({ CharacterCompareType::CharRange, m_bytecode->at(offset++) });
933
2.19M
        } else if (compare_type == CharacterCompareType::GeneralCategory
934
2.19M
            || compare_type == CharacterCompareType::Property
935
2.19M
            || compare_type == CharacterCompareType::Script
936
2.19M
            || compare_type == CharacterCompareType::ScriptExtension) {
937
0
            auto value = m_bytecode->at(offset++);
938
0
            result.append({ compare_type, value });
939
2.19M
        } else {
940
2.19M
            result.append({ compare_type, 0 });
941
2.19M
        }
942
24.1M
    }
943
23.7M
    return result;
944
23.7M
}
945
946
Vector<ByteString> OpCode_Compare::variable_arguments_to_byte_string(Optional<MatchInput const&> input) const
947
0
{
948
0
    Vector<ByteString> result;
949
950
0
    size_t offset { state().instruction_position + 3 };
951
0
    RegexStringView const& view = ((input.has_value()) ? input.value().view : StringView {});
952
953
0
    for (size_t i = 0; i < arguments_count(); ++i) {
954
0
        auto compare_type = (CharacterCompareType)m_bytecode->at(offset++);
955
0
        result.empend(ByteString::formatted("type={} [{}]", (size_t)compare_type, character_compare_type_name(compare_type)));
956
957
0
        auto string_start_offset = state().string_position_before_match;
958
959
0
        if (compare_type == CharacterCompareType::Char) {
960
0
            auto ch = m_bytecode->at(offset++);
961
0
            auto is_ascii = is_ascii_printable(ch);
962
0
            if (is_ascii)
963
0
                result.empend(ByteString::formatted(" value='{:c}'", static_cast<char>(ch)));
964
0
            else
965
0
                result.empend(ByteString::formatted(" value={:x}", ch));
966
967
0
            if (!view.is_null() && view.length() > string_start_offset) {
968
0
                if (is_ascii) {
969
0
                    result.empend(ByteString::formatted(
970
0
                        " compare against: '{}'",
971
0
                        view.substring_view(string_start_offset, string_start_offset > view.length() ? 0 : 1).to_byte_string()));
972
0
                } else {
973
0
                    auto str = view.substring_view(string_start_offset, string_start_offset > view.length() ? 0 : 1).to_byte_string();
974
0
                    u8 buf[8] { 0 };
975
0
                    __builtin_memcpy(buf, str.characters(), min(str.length(), sizeof(buf)));
976
0
                    result.empend(ByteString::formatted(" compare against: {:x},{:x},{:x},{:x},{:x},{:x},{:x},{:x}",
977
0
                        buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]));
978
0
                }
979
0
            }
980
0
        } else if (compare_type == CharacterCompareType::Reference) {
981
0
            auto ref = m_bytecode->at(offset++);
982
0
            result.empend(ByteString::formatted(" number={}", ref));
983
0
            if (input.has_value()) {
984
0
                if (state().capture_group_matches.size() > input->match_index) {
985
0
                    auto& match = state().capture_group_matches[input->match_index];
986
0
                    if (match.size() > ref) {
987
0
                        auto& group = match[ref];
988
0
                        result.empend(ByteString::formatted(" left={}", group.left_column));
989
0
                        result.empend(ByteString::formatted(" right={}", group.left_column + group.view.length_in_code_units()));
990
0
                        result.empend(ByteString::formatted(" contents='{}'", group.view));
991
0
                    } else {
992
0
                        result.empend(ByteString::formatted(" (invalid ref, max={})", match.size() - 1));
993
0
                    }
994
0
                } else {
995
0
                    result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches.size() - 1));
996
0
                }
997
0
            }
998
0
        } else if (compare_type == CharacterCompareType::String) {
999
0
            auto& length = m_bytecode->at(offset++);
1000
0
            StringBuilder str_builder;
1001
0
            for (size_t i = 0; i < length; ++i)
1002
0
                str_builder.append(m_bytecode->at(offset++));
1003
0
            result.empend(ByteString::formatted(" value=\"{}\"", str_builder.string_view().substring_view(0, length)));
1004
0
            if (!view.is_null() && view.length() > state().string_position)
1005
0
                result.empend(ByteString::formatted(
1006
0
                    " compare against: \"{}\"",
1007
0
                    input.value().view.substring_view(string_start_offset, string_start_offset + length > view.length() ? 0 : length).to_byte_string()));
1008
0
        } else if (compare_type == CharacterCompareType::CharClass) {
1009
0
            auto character_class = (CharClass)m_bytecode->at(offset++);
1010
0
            result.empend(ByteString::formatted(" ch_class={} [{}]", (size_t)character_class, character_class_name(character_class)));
1011
0
            if (!view.is_null() && view.length() > state().string_position)
1012
0
                result.empend(ByteString::formatted(
1013
0
                    " compare against: '{}'",
1014
0
                    input.value().view.substring_view(string_start_offset, state().string_position > view.length() ? 0 : 1).to_byte_string()));
1015
0
        } else if (compare_type == CharacterCompareType::CharRange) {
1016
0
            auto value = (CharRange)m_bytecode->at(offset++);
1017
0
            result.empend(ByteString::formatted(" ch_range={:x}-{:x}", value.from, value.to));
1018
0
            if (!view.is_null() && view.length() > state().string_position)
1019
0
                result.empend(ByteString::formatted(
1020
0
                    " compare against: '{}'",
1021
0
                    input.value().view.substring_view(string_start_offset, state().string_position > view.length() ? 0 : 1).to_byte_string()));
1022
0
        } else if (compare_type == CharacterCompareType::LookupTable) {
1023
0
            auto count = m_bytecode->at(offset++);
1024
0
            for (size_t j = 0; j < count; ++j) {
1025
0
                auto range = (CharRange)m_bytecode->at(offset++);
1026
0
                result.append(ByteString::formatted(" {:x}-{:x}", range.from, range.to));
1027
0
            }
1028
0
            if (!view.is_null() && view.length() > state().string_position)
1029
0
                result.empend(ByteString::formatted(
1030
0
                    " compare against: '{}'",
1031
0
                    input.value().view.substring_view(string_start_offset, state().string_position > view.length() ? 0 : 1).to_byte_string()));
1032
0
        } else if (compare_type == CharacterCompareType::GeneralCategory
1033
0
            || compare_type == CharacterCompareType::Property
1034
0
            || compare_type == CharacterCompareType::Script
1035
0
            || compare_type == CharacterCompareType::ScriptExtension) {
1036
1037
0
            auto value = m_bytecode->at(offset++);
1038
0
            result.empend(ByteString::formatted(" value={}", value));
1039
0
        }
1040
0
    }
1041
0
    return result;
1042
0
}
1043
1044
ALWAYS_INLINE ExecutionResult OpCode_Repeat::execute(MatchInput const&, MatchState& state) const
1045
2.58M
{
1046
2.58M
    VERIFY(count() > 0);
1047
1048
2.58M
    if (id() >= state.repetition_marks.size())
1049
2.54M
        state.repetition_marks.resize(id() + 1);
1050
2.58M
    auto& repetition_mark = state.repetition_marks.mutable_at(id());
1051
1052
2.58M
    if (repetition_mark == count() - 1) {
1053
2.53M
        repetition_mark = 0;
1054
2.53M
    } else {
1055
48.6k
        state.instruction_position -= offset() + size();
1056
48.6k
        ++repetition_mark;
1057
48.6k
    }
1058
1059
2.58M
    return ExecutionResult::Continue;
1060
2.58M
}
1061
1062
ALWAYS_INLINE ExecutionResult OpCode_ResetRepeat::execute(MatchInput const&, MatchState& state) const
1063
4.92M
{
1064
4.92M
    if (id() >= state.repetition_marks.size())
1065
4.82M
        state.repetition_marks.resize(id() + 1);
1066
1067
4.92M
    state.repetition_marks.mutable_at(id()) = 0;
1068
4.92M
    return ExecutionResult::Continue;
1069
4.92M
}
1070
1071
ALWAYS_INLINE ExecutionResult OpCode_Checkpoint::execute(MatchInput const&, MatchState& state) const
1072
56.6M
{
1073
56.6M
    auto id = this->id();
1074
56.6M
    if (id >= state.checkpoints.size())
1075
7.36M
        state.checkpoints.resize(id + 1);
1076
1077
56.6M
    state.checkpoints[id] = state.string_position + 1;
1078
56.6M
    return ExecutionResult::Continue;
1079
56.6M
}
1080
1081
ALWAYS_INLINE ExecutionResult OpCode_JumpNonEmpty::execute(MatchInput const& input, MatchState& state) const
1082
45.8M
{
1083
45.8M
    u64 current_position = state.string_position;
1084
45.8M
    auto checkpoint_position = state.checkpoints.get(checkpoint()).value_or(0);
1085
1086
45.8M
    if (checkpoint_position != 0 && checkpoint_position != current_position + 1) {
1087
45.8M
        auto form = this->form();
1088
1089
45.8M
        if (form == OpCodeId::Jump) {
1090
39.5M
            state.instruction_position += offset();
1091
39.5M
            return ExecutionResult::Continue;
1092
39.5M
        }
1093
1094
6.29M
        state.fork_at_position = state.instruction_position + size() + offset();
1095
1096
6.29M
        if (form == OpCodeId::ForkJump) {
1097
6.29M
            state.forks_since_last_save++;
1098
6.29M
            return ExecutionResult::Fork_PrioHigh;
1099
6.29M
        }
1100
1101
0
        if (form == OpCodeId::ForkStay) {
1102
0
            state.forks_since_last_save++;
1103
0
            return ExecutionResult::Fork_PrioLow;
1104
0
        }
1105
1106
0
        if (form == OpCodeId::ForkReplaceStay) {
1107
0
            input.fork_to_replace = state.instruction_position;
1108
0
            return ExecutionResult::Fork_PrioLow;
1109
0
        }
1110
1111
0
        if (form == OpCodeId::ForkReplaceJump) {
1112
0
            input.fork_to_replace = state.instruction_position;
1113
0
            return ExecutionResult::Fork_PrioHigh;
1114
0
        }
1115
0
    }
1116
1117
0
    return ExecutionResult::Continue;
1118
45.8M
}
1119
1120
}