Coverage Report

Created: 2025-08-28 06:26

/src/serenity/Userland/Libraries/LibRegex/RegexByteCode.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#pragma once
8
9
#include "RegexBytecodeStreamOptimizer.h"
10
#include "RegexMatch.h"
11
#include "RegexOptions.h"
12
13
#include <AK/Concepts.h>
14
#include <AK/DisjointChunks.h>
15
#include <AK/Forward.h>
16
#include <AK/HashMap.h>
17
#include <AK/NonnullOwnPtr.h>
18
#include <AK/OwnPtr.h>
19
#include <AK/Traits.h>
20
#include <AK/TypeCasts.h>
21
#include <AK/Types.h>
22
#include <AK/Vector.h>
23
#include <LibUnicode/Forward.h>
24
25
namespace regex {
26
27
using ByteCodeValueType = u64;
28
29
#define ENUMERATE_OPCODES                          \
30
6
    __ENUMERATE_OPCODE(Compare)                    \
31
6
    __ENUMERATE_OPCODE(Jump)                       \
32
6
    __ENUMERATE_OPCODE(JumpNonEmpty)               \
33
6
    __ENUMERATE_OPCODE(ForkJump)                   \
34
6
    __ENUMERATE_OPCODE(ForkStay)                   \
35
6
    __ENUMERATE_OPCODE(ForkReplaceJump)            \
36
6
    __ENUMERATE_OPCODE(ForkReplaceStay)            \
37
6
    __ENUMERATE_OPCODE(FailForks)                  \
38
6
    __ENUMERATE_OPCODE(SaveLeftCaptureGroup)       \
39
6
    __ENUMERATE_OPCODE(SaveRightCaptureGroup)      \
40
6
    __ENUMERATE_OPCODE(SaveRightNamedCaptureGroup) \
41
6
    __ENUMERATE_OPCODE(CheckBegin)                 \
42
6
    __ENUMERATE_OPCODE(CheckEnd)                   \
43
6
    __ENUMERATE_OPCODE(CheckBoundary)              \
44
6
    __ENUMERATE_OPCODE(Save)                       \
45
6
    __ENUMERATE_OPCODE(Restore)                    \
46
6
    __ENUMERATE_OPCODE(GoBack)                     \
47
6
    __ENUMERATE_OPCODE(ClearCaptureGroup)          \
48
6
    __ENUMERATE_OPCODE(Repeat)                     \
49
6
    __ENUMERATE_OPCODE(ResetRepeat)                \
50
6
    __ENUMERATE_OPCODE(Checkpoint)                 \
51
6
    __ENUMERATE_OPCODE(Exit)
52
53
// clang-format off
54
enum class OpCodeId : ByteCodeValueType {
55
#define __ENUMERATE_OPCODE(x) x,
56
    ENUMERATE_OPCODES
57
#undef __ENUMERATE_OPCODE
58
59
    First = Compare,
60
    Last = Exit,
61
};
62
// clang-format on
63
64
#define ENUMERATE_CHARACTER_COMPARE_TYPES                    \
65
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Undefined)            \
66
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Inverse)              \
67
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(TemporaryInverse)     \
68
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(AnyChar)              \
69
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Char)                 \
70
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(String)               \
71
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass)            \
72
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange)            \
73
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Reference)            \
74
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Property)             \
75
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(GeneralCategory)      \
76
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Script)               \
77
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(ScriptExtension)      \
78
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(RangeExpressionDummy) \
79
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(LookupTable)          \
80
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(And)                  \
81
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Or)                   \
82
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(EndAndOr)
83
84
enum class CharacterCompareType : ByteCodeValueType {
85
#define __ENUMERATE_CHARACTER_COMPARE_TYPE(x) x,
86
    ENUMERATE_CHARACTER_COMPARE_TYPES
87
#undef __ENUMERATE_CHARACTER_COMPARE_TYPE
88
};
89
90
#define ENUMERATE_CHARACTER_CLASSES    \
91
0
    __ENUMERATE_CHARACTER_CLASS(Alnum) \
92
0
    __ENUMERATE_CHARACTER_CLASS(Cntrl) \
93
0
    __ENUMERATE_CHARACTER_CLASS(Lower) \
94
0
    __ENUMERATE_CHARACTER_CLASS(Space) \
95
0
    __ENUMERATE_CHARACTER_CLASS(Alpha) \
96
0
    __ENUMERATE_CHARACTER_CLASS(Digit) \
97
0
    __ENUMERATE_CHARACTER_CLASS(Print) \
98
0
    __ENUMERATE_CHARACTER_CLASS(Upper) \
99
0
    __ENUMERATE_CHARACTER_CLASS(Blank) \
100
0
    __ENUMERATE_CHARACTER_CLASS(Graph) \
101
0
    __ENUMERATE_CHARACTER_CLASS(Punct) \
102
0
    __ENUMERATE_CHARACTER_CLASS(Word)  \
103
0
    __ENUMERATE_CHARACTER_CLASS(Xdigit)
104
105
enum class CharClass : ByteCodeValueType {
106
#define __ENUMERATE_CHARACTER_CLASS(x) x,
107
    ENUMERATE_CHARACTER_CLASSES
108
#undef __ENUMERATE_CHARACTER_CLASS
109
};
110
111
#define ENUMERATE_BOUNDARY_CHECK_TYPES    \
112
0
    __ENUMERATE_BOUNDARY_CHECK_TYPE(Word) \
113
0
    __ENUMERATE_BOUNDARY_CHECK_TYPE(NonWord)
114
115
enum class BoundaryCheckType : ByteCodeValueType {
116
#define __ENUMERATE_BOUNDARY_CHECK_TYPE(x) x,
117
    ENUMERATE_BOUNDARY_CHECK_TYPES
118
#undef __ENUMERATE_BOUNDARY_CHECK_TYPE
119
};
120
121
struct CharRange {
122
    u32 const from;
123
    u32 const to;
124
125
    CharRange(u64 value)
126
48.7M
        : from(value >> 32)
127
48.7M
        , to(value & 0xffffffff)
128
48.7M
    {
129
48.7M
    }
130
131
    CharRange(u32 from, u32 to)
132
85.8M
        : from(from)
133
85.8M
        , to(to)
134
85.8M
    {
135
85.8M
    }
136
137
15.5M
    operator ByteCodeValueType() const { return ((u64)from << 32) | to; }
138
};
139
140
struct CompareTypeAndValuePair {
141
    CharacterCompareType type;
142
    ByteCodeValueType value;
143
};
144
145
class OpCode;
146
147
class ByteCode : public DisjointChunks<ByteCodeValueType> {
148
    using Base = DisjointChunks<ByteCodeValueType>;
149
150
public:
151
    ByteCode()
152
96.1M
    {
153
96.1M
        ensure_opcodes_initialized();
154
96.1M
    }
155
156
3.63M
    ByteCode(ByteCode const&) = default;
157
99.7M
    virtual ~ByteCode() = default;
158
159
1.54M
    ByteCode& operator=(ByteCode&&) = default;
160
    ByteCode& operator=(Base&& value)
161
0
    {
162
0
        static_cast<Base&>(*this) = move(value);
163
0
        return *this;
164
0
    }
165
166
    template<typename... Args>
167
    void empend(Args&&... args)
168
257M
    {
169
257M
        if (is_empty())
170
45.5M
            Base::append({});
171
257M
        Base::last_chunk().empend(forward<Args>(args)...);
172
257M
    }
void regex::ByteCode::empend<unsigned long>(unsigned long&&)
Line
Count
Source
168
201M
    {
169
201M
        if (is_empty())
170
45.5M
            Base::append({});
171
201M
        Base::last_chunk().empend(forward<Args>(args)...);
172
201M
    }
void regex::ByteCode::empend<unsigned long&>(unsigned long&)
Line
Count
Source
168
56.0M
    {
169
56.0M
        if (is_empty())
170
25
            Base::append({});
171
56.0M
        Base::last_chunk().empend(forward<Args>(args)...);
172
56.0M
    }
void regex::ByteCode::empend<unsigned int>(unsigned int&&)
Line
Count
Source
168
96.1k
    {
169
96.1k
        if (is_empty())
170
0
            Base::append({});
171
96.1k
        Base::last_chunk().empend(forward<Args>(args)...);
172
96.1k
    }
173
    template<typename T>
174
    void append(T&& value)
175
92.8M
    {
176
92.8M
        if (is_empty())
177
44.0M
            Base::append({});
178
92.8M
        Base::last_chunk().append(forward<T>(value));
179
92.8M
    }
void regex::ByteCode::append<unsigned long>(unsigned long&&)
Line
Count
Source
175
45.1M
    {
176
45.1M
        if (is_empty())
177
44.0M
            Base::append({});
178
45.1M
        Base::last_chunk().append(forward<T>(value));
179
45.1M
    }
void regex::ByteCode::append<regex::CharRange>(regex::CharRange&&)
Line
Count
Source
175
9.33M
    {
176
9.33M
        if (is_empty())
177
0
            Base::append({});
178
9.33M
        Base::last_chunk().append(forward<T>(value));
179
9.33M
    }
void regex::ByteCode::append<int>(int&&)
Line
Count
Source
175
601k
    {
176
601k
        if (is_empty())
177
0
            Base::append({});
178
601k
        Base::last_chunk().append(forward<T>(value));
179
601k
    }
void regex::ByteCode::append<unsigned long&>(unsigned long&)
Line
Count
Source
175
37.7M
    {
176
37.7M
        if (is_empty())
177
0
            Base::append({});
178
37.7M
        Base::last_chunk().append(forward<T>(value));
179
37.7M
    }
180
    template<typename T>
181
    void prepend(T&& value)
182
925k
    {
183
925k
        if (is_empty())
184
0
            return append(forward<T>(value));
185
925k
        Base::first_chunk().prepend(forward<T>(value));
186
925k
    }
187
188
    void append(Span<ByteCodeValueType const> value)
189
346k
    {
190
346k
        if (is_empty())
191
0
            Base::append({});
192
346k
        auto& last = Base::last_chunk();
193
346k
        last.ensure_capacity(value.size());
194
346k
        for (auto v : value)
195
22.0M
            last.unchecked_append(v);
196
346k
    }
197
198
    void ensure_capacity(size_t capacity)
199
347k
    {
200
347k
        if (is_empty())
201
399
            Base::append({});
202
347k
        Base::last_chunk().ensure_capacity(capacity);
203
347k
    }
204
205
    void last_chunk() const = delete;
206
    void first_chunk() const = delete;
207
208
    void insert_bytecode_compare_values(Vector<CompareTypeAndValuePair>&& pairs)
209
43.6M
    {
210
43.6M
        Optimizer::append_character_class(*this, move(pairs));
211
43.6M
    }
212
213
    void insert_bytecode_check_boundary(BoundaryCheckType type)
214
20.3k
    {
215
20.3k
        ByteCode bytecode;
216
20.3k
        bytecode.empend((ByteCodeValueType)OpCodeId::CheckBoundary);
217
20.3k
        bytecode.empend((ByteCodeValueType)type);
218
219
20.3k
        extend(move(bytecode));
220
20.3k
    }
221
222
    void insert_bytecode_clear_capture_group(size_t index)
223
11.0M
    {
224
11.0M
        empend(static_cast<ByteCodeValueType>(OpCodeId::ClearCaptureGroup));
225
11.0M
        empend(index);
226
11.0M
    }
227
228
    void insert_bytecode_compare_string(StringView view)
229
1.25M
    {
230
1.25M
        empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
231
1.25M
        empend(static_cast<u64>(1)); // number of arguments
232
1.25M
        empend(2 + view.length());   // size of arguments
233
1.25M
        empend(static_cast<ByteCodeValueType>(CharacterCompareType::String));
234
1.25M
        insert_string(view);
235
1.25M
    }
236
237
    void insert_bytecode_group_capture_left(size_t capture_groups_count)
238
289k
    {
239
289k
        empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftCaptureGroup));
240
289k
        empend(capture_groups_count);
241
289k
    }
242
243
    void insert_bytecode_group_capture_right(size_t capture_groups_count)
244
285k
    {
245
285k
        empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightCaptureGroup));
246
285k
        empend(capture_groups_count);
247
285k
    }
248
249
    void insert_bytecode_group_capture_right(size_t capture_groups_count, StringView name)
250
1.23k
    {
251
1.23k
        empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightNamedCaptureGroup));
252
1.23k
        empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
253
1.23k
        empend(name.length());
254
1.23k
        empend(capture_groups_count);
255
1.23k
    }
256
257
    enum class LookAroundType {
258
        LookAhead,
259
        LookBehind,
260
        NegatedLookAhead,
261
        NegatedLookBehind,
262
    };
263
    void insert_bytecode_lookaround(ByteCode&& lookaround_body, LookAroundType type, size_t match_length = 0)
264
1.37k
    {
265
        // FIXME: The save stack will grow infinitely with repeated failures
266
        //        as we do not discard that on failure (we don't necessarily know how many to pop with the current architecture).
267
1.37k
        switch (type) {
268
1
        case LookAroundType::LookAhead: {
269
            // SAVE
270
            // REGEXP BODY
271
            // RESTORE
272
1
            empend((ByteCodeValueType)OpCodeId::Save);
273
1
            extend(move(lookaround_body));
274
1
            empend((ByteCodeValueType)OpCodeId::Restore);
275
1
            return;
276
0
        }
277
7
        case LookAroundType::NegatedLookAhead: {
278
            // JUMP _A
279
            // LABEL _L
280
            // REGEXP BODY
281
            // FAIL
282
            // LABEL _A
283
            // SAVE
284
            // FORKJUMP _L
285
            // RESTORE
286
7
            auto body_length = lookaround_body.size();
287
7
            empend((ByteCodeValueType)OpCodeId::Jump);
288
7
            empend((ByteCodeValueType)body_length + 1); // JUMP to label _A
289
7
            extend(move(lookaround_body));
290
7
            empend((ByteCodeValueType)OpCodeId::FailForks);
291
7
            empend((ByteCodeValueType)OpCodeId::Save);
292
7
            empend((ByteCodeValueType)OpCodeId::ForkJump);
293
7
            empend((ByteCodeValueType) - (body_length + 4)); // JUMP to label _L
294
7
            empend((ByteCodeValueType)OpCodeId::Restore);
295
7
            return;
296
0
        }
297
8
        case LookAroundType::LookBehind:
298
            // SAVE
299
            // GOBACK match_length(BODY)
300
            // REGEXP BODY
301
            // RESTORE
302
8
            empend((ByteCodeValueType)OpCodeId::Save);
303
8
            empend((ByteCodeValueType)OpCodeId::GoBack);
304
8
            empend((ByteCodeValueType)match_length);
305
8
            extend(move(lookaround_body));
306
8
            empend((ByteCodeValueType)OpCodeId::Restore);
307
8
            return;
308
1.36k
        case LookAroundType::NegatedLookBehind: {
309
            // JUMP _A
310
            // LABEL _L
311
            // GOBACK match_length(BODY)
312
            // REGEXP BODY
313
            // FAIL
314
            // LABEL _A
315
            // SAVE
316
            // FORKJUMP _L
317
            // RESTORE
318
1.36k
            auto body_length = lookaround_body.size();
319
1.36k
            empend((ByteCodeValueType)OpCodeId::Jump);
320
1.36k
            empend((ByteCodeValueType)body_length + 3); // JUMP to label _A
321
1.36k
            empend((ByteCodeValueType)OpCodeId::GoBack);
322
1.36k
            empend((ByteCodeValueType)match_length);
323
1.36k
            extend(move(lookaround_body));
324
1.36k
            empend((ByteCodeValueType)OpCodeId::FailForks);
325
1.36k
            empend((ByteCodeValueType)OpCodeId::Save);
326
1.36k
            empend((ByteCodeValueType)OpCodeId::ForkJump);
327
1.36k
            empend((ByteCodeValueType) - (body_length + 6)); // JUMP to label _L
328
1.36k
            empend((ByteCodeValueType)OpCodeId::Restore);
329
1.36k
            return;
330
0
        }
331
1.37k
        }
332
333
0
        VERIFY_NOT_REACHED();
334
0
    }
335
336
    void insert_bytecode_alternation(ByteCode&& left, ByteCode&& right)
337
1.18k
    {
338
339
        // FORKJUMP _ALT
340
        // REGEXP ALT2
341
        // JUMP  _END
342
        // LABEL _ALT
343
        // REGEXP ALT1
344
        // LABEL _END
345
346
        // Optimisation: Eliminate extra work by unifying common pre-and-postfix exprs.
347
1.18k
        Optimizer::append_alternation(*this, move(left), move(right));
348
1.18k
    }
349
350
    template<Integral T>
351
    static void transform_bytecode_repetition_min_max(ByteCode& bytecode_to_repeat, T minimum, Optional<T> maximum, size_t min_repetition_mark_id, size_t max_repetition_mark_id, bool greedy = true)
352
3.70k
    {
353
3.70k
        if (!maximum.has_value()) {
354
2.05k
            if (minimum == 0)
355
221
                return transform_bytecode_repetition_any(bytecode_to_repeat, greedy);
356
1.83k
            if (minimum == 1)
357
65
                return transform_bytecode_repetition_min_one(bytecode_to_repeat, greedy);
358
1.83k
        }
359
360
3.42k
        ByteCode new_bytecode;
361
3.42k
        new_bytecode.insert_bytecode_repetition_n(bytecode_to_repeat, minimum, min_repetition_mark_id);
362
363
3.42k
        if (maximum.has_value()) {
364
            // (REPEAT REGEXP MIN)
365
            // LABEL _MAX_LOOP            |
366
            // FORK END                   |
367
            // REGEXP                     |
368
            // REPEAT _MAX_LOOP MAX-MIN   | if max > min
369
            // FORK END                   |
370
            // REGEXP                     |
371
            // LABEL END                  |
372
            // RESET _MAX_LOOP            |
373
1.65k
            auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkStay : OpCodeId::ForkJump);
374
1.65k
            if (maximum.value() > minimum) {
375
1.58k
                new_bytecode.empend(jump_kind);
376
1.58k
                new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
377
1.58k
                auto pre_loop_fork_jump_index = new_bytecode.size();
378
1.58k
                new_bytecode.extend(bytecode_to_repeat);
379
1.58k
                auto repetitions = maximum.value() - minimum;
380
1.58k
                auto fork_jump_address = new_bytecode.size();
381
1.58k
                if (repetitions > 1) {
382
1.52k
                    new_bytecode.empend((ByteCodeValueType)OpCodeId::Repeat);
383
1.52k
                    new_bytecode.empend(bytecode_to_repeat.size() + 2);
384
1.52k
                    new_bytecode.empend(static_cast<ByteCodeValueType>(repetitions - 1));
385
1.52k
                    new_bytecode.empend(max_repetition_mark_id);
386
1.52k
                    new_bytecode.empend(jump_kind);
387
1.52k
                    new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
388
1.52k
                    auto post_loop_fork_jump_index = new_bytecode.size();
389
1.52k
                    new_bytecode.extend(bytecode_to_repeat);
390
1.52k
                    fork_jump_address = new_bytecode.size();
391
392
1.52k
                    new_bytecode[post_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - post_loop_fork_jump_index);
393
394
1.52k
                    new_bytecode.empend((ByteCodeValueType)OpCodeId::ResetRepeat);
395
1.52k
                    new_bytecode.empend((ByteCodeValueType)max_repetition_mark_id);
396
1.52k
                }
397
1.58k
                new_bytecode[pre_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - pre_loop_fork_jump_index);
398
1.58k
            }
399
1.77k
        } else {
400
            // no maximum value set, repeat finding if possible:
401
            // (REPEAT REGEXP MIN)
402
            // LABEL _START
403
            // CHECKPOINT _C
404
            // REGEXP
405
            // JUMP_NONEMPTY _C _START FORK
406
407
            // Note: This is only safe because REPEAT will leave one iteration outside (see repetition_n)
408
1.77k
            auto checkpoint = s_next_checkpoint_serial_id++;
409
1.77k
            new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)OpCodeId::Checkpoint);
410
1.77k
            new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)checkpoint);
411
412
1.77k
            auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkJump : OpCodeId::ForkStay);
413
1.77k
            new_bytecode.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
414
1.77k
            new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 2); // Jump to the last iteration
415
1.77k
            new_bytecode.empend(checkpoint);                         // if _C is not empty.
416
1.77k
            new_bytecode.empend(jump_kind);
417
1.77k
        }
418
419
3.42k
        bytecode_to_repeat = move(new_bytecode);
420
3.42k
    }
_ZN5regex8ByteCode37transform_bytecode_repetition_min_maxITkN2AK8Concepts8IntegralEjEEvRS0_T_NS2_8OptionalIS5_EEmmb
Line
Count
Source
352
592
    {
353
592
        if (!maximum.has_value()) {
354
52
            if (minimum == 0)
355
12
                return transform_bytecode_repetition_any(bytecode_to_repeat, greedy);
356
40
            if (minimum == 1)
357
4
                return transform_bytecode_repetition_min_one(bytecode_to_repeat, greedy);
358
40
        }
359
360
576
        ByteCode new_bytecode;
361
576
        new_bytecode.insert_bytecode_repetition_n(bytecode_to_repeat, minimum, min_repetition_mark_id);
362
363
576
        if (maximum.has_value()) {
364
            // (REPEAT REGEXP MIN)
365
            // LABEL _MAX_LOOP            |
366
            // FORK END                   |
367
            // REGEXP                     |
368
            // REPEAT _MAX_LOOP MAX-MIN   | if max > min
369
            // FORK END                   |
370
            // REGEXP                     |
371
            // LABEL END                  |
372
            // RESET _MAX_LOOP            |
373
540
            auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkStay : OpCodeId::ForkJump);
374
540
            if (maximum.value() > minimum) {
375
539
                new_bytecode.empend(jump_kind);
376
539
                new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
377
539
                auto pre_loop_fork_jump_index = new_bytecode.size();
378
539
                new_bytecode.extend(bytecode_to_repeat);
379
539
                auto repetitions = maximum.value() - minimum;
380
539
                auto fork_jump_address = new_bytecode.size();
381
539
                if (repetitions > 1) {
382
477
                    new_bytecode.empend((ByteCodeValueType)OpCodeId::Repeat);
383
477
                    new_bytecode.empend(bytecode_to_repeat.size() + 2);
384
477
                    new_bytecode.empend(static_cast<ByteCodeValueType>(repetitions - 1));
385
477
                    new_bytecode.empend(max_repetition_mark_id);
386
477
                    new_bytecode.empend(jump_kind);
387
477
                    new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
388
477
                    auto post_loop_fork_jump_index = new_bytecode.size();
389
477
                    new_bytecode.extend(bytecode_to_repeat);
390
477
                    fork_jump_address = new_bytecode.size();
391
392
477
                    new_bytecode[post_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - post_loop_fork_jump_index);
393
394
477
                    new_bytecode.empend((ByteCodeValueType)OpCodeId::ResetRepeat);
395
477
                    new_bytecode.empend((ByteCodeValueType)max_repetition_mark_id);
396
477
                }
397
539
                new_bytecode[pre_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - pre_loop_fork_jump_index);
398
539
            }
399
540
        } else {
400
            // no maximum value set, repeat finding if possible:
401
            // (REPEAT REGEXP MIN)
402
            // LABEL _START
403
            // CHECKPOINT _C
404
            // REGEXP
405
            // JUMP_NONEMPTY _C _START FORK
406
407
            // Note: This is only safe because REPEAT will leave one iteration outside (see repetition_n)
408
36
            auto checkpoint = s_next_checkpoint_serial_id++;
409
36
            new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)OpCodeId::Checkpoint);
410
36
            new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)checkpoint);
411
412
36
            auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkJump : OpCodeId::ForkStay);
413
36
            new_bytecode.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
414
36
            new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 2); // Jump to the last iteration
415
36
            new_bytecode.empend(checkpoint);                         // if _C is not empty.
416
36
            new_bytecode.empend(jump_kind);
417
36
        }
418
419
576
        bytecode_to_repeat = move(new_bytecode);
420
576
    }
_ZN5regex8ByteCode37transform_bytecode_repetition_min_maxITkN2AK8Concepts8IntegralEmEEvRS0_T_NS2_8OptionalIS5_EEmmb
Line
Count
Source
352
3.11k
    {
353
3.11k
        if (!maximum.has_value()) {
354
2.00k
            if (minimum == 0)
355
209
                return transform_bytecode_repetition_any(bytecode_to_repeat, greedy);
356
1.79k
            if (minimum == 1)
357
61
                return transform_bytecode_repetition_min_one(bytecode_to_repeat, greedy);
358
1.79k
        }
359
360
2.84k
        ByteCode new_bytecode;
361
2.84k
        new_bytecode.insert_bytecode_repetition_n(bytecode_to_repeat, minimum, min_repetition_mark_id);
362
363
2.84k
        if (maximum.has_value()) {
364
            // (REPEAT REGEXP MIN)
365
            // LABEL _MAX_LOOP            |
366
            // FORK END                   |
367
            // REGEXP                     |
368
            // REPEAT _MAX_LOOP MAX-MIN   | if max > min
369
            // FORK END                   |
370
            // REGEXP                     |
371
            // LABEL END                  |
372
            // RESET _MAX_LOOP            |
373
1.11k
            auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkStay : OpCodeId::ForkJump);
374
1.11k
            if (maximum.value() > minimum) {
375
1.04k
                new_bytecode.empend(jump_kind);
376
1.04k
                new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
377
1.04k
                auto pre_loop_fork_jump_index = new_bytecode.size();
378
1.04k
                new_bytecode.extend(bytecode_to_repeat);
379
1.04k
                auto repetitions = maximum.value() - minimum;
380
1.04k
                auto fork_jump_address = new_bytecode.size();
381
1.04k
                if (repetitions > 1) {
382
1.04k
                    new_bytecode.empend((ByteCodeValueType)OpCodeId::Repeat);
383
1.04k
                    new_bytecode.empend(bytecode_to_repeat.size() + 2);
384
1.04k
                    new_bytecode.empend(static_cast<ByteCodeValueType>(repetitions - 1));
385
1.04k
                    new_bytecode.empend(max_repetition_mark_id);
386
1.04k
                    new_bytecode.empend(jump_kind);
387
1.04k
                    new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
388
1.04k
                    auto post_loop_fork_jump_index = new_bytecode.size();
389
1.04k
                    new_bytecode.extend(bytecode_to_repeat);
390
1.04k
                    fork_jump_address = new_bytecode.size();
391
392
1.04k
                    new_bytecode[post_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - post_loop_fork_jump_index);
393
394
1.04k
                    new_bytecode.empend((ByteCodeValueType)OpCodeId::ResetRepeat);
395
1.04k
                    new_bytecode.empend((ByteCodeValueType)max_repetition_mark_id);
396
1.04k
                }
397
1.04k
                new_bytecode[pre_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - pre_loop_fork_jump_index);
398
1.04k
            }
399
1.73k
        } else {
400
            // no maximum value set, repeat finding if possible:
401
            // (REPEAT REGEXP MIN)
402
            // LABEL _START
403
            // CHECKPOINT _C
404
            // REGEXP
405
            // JUMP_NONEMPTY _C _START FORK
406
407
            // Note: This is only safe because REPEAT will leave one iteration outside (see repetition_n)
408
1.73k
            auto checkpoint = s_next_checkpoint_serial_id++;
409
1.73k
            new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)OpCodeId::Checkpoint);
410
1.73k
            new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)checkpoint);
411
412
1.73k
            auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkJump : OpCodeId::ForkStay);
413
1.73k
            new_bytecode.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
414
1.73k
            new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 2); // Jump to the last iteration
415
1.73k
            new_bytecode.empend(checkpoint);                         // if _C is not empty.
416
1.73k
            new_bytecode.empend(jump_kind);
417
1.73k
        }
418
419
2.84k
        bytecode_to_repeat = move(new_bytecode);
420
2.84k
    }
421
422
    template<Integral T>
423
    void insert_bytecode_repetition_n(ByteCode& bytecode_to_repeat, T n, size_t repetition_mark_id)
424
6.83k
    {
425
        // LABEL _LOOP
426
        // REGEXP
427
        // REPEAT _LOOP N-1
428
        // REGEXP
429
6.83k
        if (n == 0)
430
663
            return;
431
432
        // Note: this bytecode layout allows callers to repeat the last REGEXP instruction without the
433
        // REPEAT instruction forcing another loop.
434
6.17k
        extend(bytecode_to_repeat);
435
436
6.17k
        if (n > 1) {
437
5.99k
            empend(static_cast<ByteCodeValueType>(OpCodeId::Repeat));
438
5.99k
            empend(bytecode_to_repeat.size());
439
5.99k
            empend(static_cast<ByteCodeValueType>(n - 1));
440
5.99k
            empend(repetition_mark_id);
441
442
5.99k
            extend(bytecode_to_repeat);
443
5.99k
        }
444
6.17k
    }
_ZN5regex8ByteCode28insert_bytecode_repetition_nITkN2AK8Concepts8IntegralEjEEvRS0_T_m
Line
Count
Source
424
3.98k
    {
425
        // LABEL _LOOP
426
        // REGEXP
427
        // REPEAT _LOOP N-1
428
        // REGEXP
429
3.98k
        if (n == 0)
430
657
            return;
431
432
        // Note: this bytecode layout allows callers to repeat the last REGEXP instruction without the
433
        // REPEAT instruction forcing another loop.
434
3.33k
        extend(bytecode_to_repeat);
435
436
3.33k
        if (n > 1) {
437
3.15k
            empend(static_cast<ByteCodeValueType>(OpCodeId::Repeat));
438
3.15k
            empend(bytecode_to_repeat.size());
439
3.15k
            empend(static_cast<ByteCodeValueType>(n - 1));
440
3.15k
            empend(repetition_mark_id);
441
442
3.15k
            extend(bytecode_to_repeat);
443
3.15k
        }
444
3.33k
    }
_ZN5regex8ByteCode28insert_bytecode_repetition_nITkN2AK8Concepts8IntegralEmEEvRS0_T_m
Line
Count
Source
424
2.84k
    {
425
        // LABEL _LOOP
426
        // REGEXP
427
        // REPEAT _LOOP N-1
428
        // REGEXP
429
2.84k
        if (n == 0)
430
6
            return;
431
432
        // Note: this bytecode layout allows callers to repeat the last REGEXP instruction without the
433
        // REPEAT instruction forcing another loop.
434
2.84k
        extend(bytecode_to_repeat);
435
436
2.84k
        if (n > 1) {
437
2.84k
            empend(static_cast<ByteCodeValueType>(OpCodeId::Repeat));
438
2.84k
            empend(bytecode_to_repeat.size());
439
2.84k
            empend(static_cast<ByteCodeValueType>(n - 1));
440
2.84k
            empend(repetition_mark_id);
441
442
2.84k
            extend(bytecode_to_repeat);
443
2.84k
        }
444
2.84k
    }
445
446
    static void transform_bytecode_repetition_min_one(ByteCode& bytecode_to_repeat, bool greedy)
447
462k
    {
448
        // LABEL _START = -bytecode_to_repeat.size()
449
        // CHECKPOINT _C
450
        // REGEXP
451
        // JUMP_NONEMPTY _C _START FORKSTAY (FORKJUMP -> Greedy)
452
453
462k
        auto checkpoint = s_next_checkpoint_serial_id++;
454
462k
        bytecode_to_repeat.prepend((ByteCodeValueType)checkpoint);
455
462k
        bytecode_to_repeat.prepend((ByteCodeValueType)OpCodeId::Checkpoint);
456
457
462k
        bytecode_to_repeat.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
458
462k
        bytecode_to_repeat.empend(-bytecode_to_repeat.size() - 3); // Jump to the _START label...
459
462k
        bytecode_to_repeat.empend(checkpoint);                     // ...if _C is not empty
460
461
462k
        if (greedy)
462
462k
            bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
463
8
        else
464
8
            bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
465
462k
    }
466
467
    static void transform_bytecode_repetition_any(ByteCode& bytecode_to_repeat, bool greedy)
468
270k
    {
469
        // LABEL _START
470
        // FORKJUMP _END  (FORKSTAY -> Greedy)
471
        // CHECKPOINT _C
472
        // REGEXP
473
        // JUMP_NONEMPTY _C _START JUMP
474
        // LABEL _END
475
476
        // LABEL _START = m_bytes.size();
477
270k
        ByteCode bytecode;
478
479
270k
        if (greedy)
480
270k
            bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
481
19
        else
482
19
            bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
483
484
270k
        bytecode.empend(bytecode_to_repeat.size() + 2 + 4); // Jump to the _END label
485
486
270k
        auto checkpoint = s_next_checkpoint_serial_id++;
487
270k
        bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Checkpoint));
488
270k
        bytecode.empend(static_cast<ByteCodeValueType>(checkpoint));
489
490
270k
        bytecode.extend(bytecode_to_repeat);
491
492
270k
        bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::JumpNonEmpty));
493
270k
        bytecode.empend(-bytecode.size() - 3); // Jump(...) to the _START label...
494
270k
        bytecode.empend(checkpoint);           // ...only if _C passes.
495
270k
        bytecode.empend((ByteCodeValueType)OpCodeId::Jump);
496
        // LABEL _END = bytecode.size()
497
498
270k
        bytecode_to_repeat = move(bytecode);
499
270k
    }
500
501
    static void transform_bytecode_repetition_zero_or_one(ByteCode& bytecode_to_repeat, bool greedy)
502
1.25M
    {
503
        // FORKJUMP _END (FORKSTAY -> Greedy)
504
        // REGEXP
505
        // LABEL _END
506
1.25M
        ByteCode bytecode;
507
508
1.25M
        if (greedy)
509
1.21M
            bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
510
32.4k
        else
511
32.4k
            bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
512
513
1.25M
        bytecode.empend(bytecode_to_repeat.size()); // Jump to the _END label
514
515
1.25M
        bytecode.extend(move(bytecode_to_repeat));
516
        // LABEL _END = bytecode.size()
517
518
1.25M
        bytecode_to_repeat = move(bytecode);
519
1.25M
    }
520
521
    OpCode& get_opcode(MatchState& state) const;
522
523
14.5k
    static void reset_checkpoint_serial_id() { s_next_checkpoint_serial_id = 0; }
524
525
private:
526
    void insert_string(StringView view)
527
1.25M
    {
528
1.25M
        empend((ByteCodeValueType)view.length());
529
91.3M
        for (size_t i = 0; i < view.length(); ++i)
530
90.0M
            empend((ByteCodeValueType)view[i]);
531
1.25M
    }
532
533
    void ensure_opcodes_initialized();
534
    ALWAYS_INLINE OpCode& get_opcode_by_id(OpCodeId id) const;
535
    static OwnPtr<OpCode> s_opcodes[(size_t)OpCodeId::Last + 1];
536
    static bool s_opcodes_initialized;
537
    static size_t s_next_checkpoint_serial_id;
538
};
539
540
#define ENUMERATE_EXECUTION_RESULTS                          \
541
0
    __ENUMERATE_EXECUTION_RESULT(Continue)                   \
542
0
    __ENUMERATE_EXECUTION_RESULT(Fork_PrioHigh)              \
543
0
    __ENUMERATE_EXECUTION_RESULT(Fork_PrioLow)               \
544
0
    __ENUMERATE_EXECUTION_RESULT(Failed)                     \
545
0
    __ENUMERATE_EXECUTION_RESULT(Failed_ExecuteLowPrioForks) \
546
0
    __ENUMERATE_EXECUTION_RESULT(Succeeded)
547
548
enum class ExecutionResult : u8 {
549
#define __ENUMERATE_EXECUTION_RESULT(x) x,
550
    ENUMERATE_EXECUTION_RESULTS
551
#undef __ENUMERATE_EXECUTION_RESULT
552
};
553
554
StringView execution_result_name(ExecutionResult result);
555
StringView opcode_id_name(OpCodeId opcode_id);
556
StringView boundary_check_type_name(BoundaryCheckType);
557
StringView character_compare_type_name(CharacterCompareType result);
558
StringView character_class_name(CharClass ch_class);
559
560
class OpCode {
561
public:
562
132
    OpCode() = default;
563
0
    virtual ~OpCode() = default;
564
565
    virtual OpCodeId opcode_id() const = 0;
566
    virtual size_t size() const = 0;
567
    virtual ExecutionResult execute(MatchInput const& input, MatchState& state) const = 0;
568
569
    ALWAYS_INLINE ByteCodeValueType argument(size_t offset) const
570
587M
    {
571
587M
        return m_bytecode->at(state().instruction_position + 1 + offset);
572
587M
    }
573
574
    ALWAYS_INLINE StringView name() const;
575
    static StringView name(OpCodeId);
576
577
363M
    ALWAYS_INLINE void set_state(MatchState& state) { m_state = &state; }
578
579
363M
    ALWAYS_INLINE void set_bytecode(ByteCode& bytecode) { m_bytecode = &bytecode; }
580
581
    ALWAYS_INLINE MatchState const& state() const
582
608M
    {
583
608M
        VERIFY(m_state);
584
608M
        return *m_state;
585
608M
    }
586
587
    ByteString to_byte_string() const
588
0
    {
589
0
        return ByteString::formatted("[{:#02X}] {}", (int)opcode_id(), name(opcode_id()));
590
0
    }
591
592
    virtual ByteString arguments_string() const = 0;
593
594
0
    ALWAYS_INLINE ByteCode const& bytecode() const { return *m_bytecode; }
595
596
protected:
597
    ByteCode* m_bytecode { nullptr };
598
    MatchState* m_state { nullptr };
599
};
600
601
class OpCode_Exit final : public OpCode {
602
public:
603
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
604
4.30k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Exit; }
605
770k
    ALWAYS_INLINE size_t size() const override { return 1; }
606
0
    ByteString arguments_string() const override { return ByteString::empty(); }
607
};
608
609
class OpCode_FailForks final : public OpCode {
610
public:
611
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
612
22.6k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::FailForks; }
613
39.5k
    ALWAYS_INLINE size_t size() const override { return 1; }
614
0
    ByteString arguments_string() const override { return ByteString::empty(); }
615
};
616
617
class OpCode_Save final : public OpCode {
618
public:
619
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
620
19.8k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Save; }
621
19.9k
    ALWAYS_INLINE size_t size() const override { return 1; }
622
0
    ByteString arguments_string() const override { return ByteString::empty(); }
623
};
624
625
class OpCode_Restore final : public OpCode {
626
public:
627
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
628
19.8k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Restore; }
629
19.9k
    ALWAYS_INLINE size_t size() const override { return 1; }
630
0
    ByteString arguments_string() const override { return ByteString::empty(); }
631
};
632
633
class OpCode_GoBack final : public OpCode {
634
public:
635
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
636
1.34k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::GoBack; }
637
1.41k
    ALWAYS_INLINE size_t size() const override { return 2; }
638
0
    ALWAYS_INLINE size_t count() const { return argument(0); }
639
0
    ByteString arguments_string() const override { return ByteString::formatted("count={}", count()); }
640
};
641
642
class OpCode_Jump final : public OpCode {
643
public:
644
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
645
2.47M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Jump; }
646
8.87M
    ALWAYS_INLINE size_t size() const override { return 2; }
647
3.83M
    ALWAYS_INLINE ssize_t offset() const { return argument(0); }
648
    ByteString arguments_string() const override
649
0
    {
650
0
        return ByteString::formatted("offset={} [&{}]", offset(), state().instruction_position + size() + offset());
651
0
    }
652
};
653
654
class OpCode_ForkJump : public OpCode {
655
public:
656
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
657
1.81M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkJump; }
658
3.54M
    ALWAYS_INLINE size_t size() const override { return 2; }
659
1.50M
    ALWAYS_INLINE ssize_t offset() const { return argument(0); }
660
    ByteString arguments_string() const override
661
0
    {
662
0
        return ByteString::formatted("offset={} [&{}], sp: {}", offset(), state().instruction_position + size() + offset(), state().string_position);
663
0
    }
664
};
665
666
class OpCode_ForkReplaceJump final : public OpCode_ForkJump {
667
public:
668
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
669
0
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkReplaceJump; }
670
};
671
672
class OpCode_ForkStay : public OpCode {
673
public:
674
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
675
8.07M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkStay; }
676
156M
    ALWAYS_INLINE size_t size() const override { return 2; }
677
53.4M
    ALWAYS_INLINE ssize_t offset() const { return argument(0); }
678
    ByteString arguments_string() const override
679
0
    {
680
0
        return ByteString::formatted("offset={} [&{}], sp: {}", offset(), state().instruction_position + size() + offset(), state().string_position);
681
0
    }
682
};
683
684
class OpCode_ForkReplaceStay final : public OpCode_ForkStay {
685
public:
686
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
687
0
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkReplaceStay; }
688
};
689
690
class OpCode_CheckBegin final : public OpCode {
691
public:
692
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
693
192k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckBegin; }
694
5.76M
    ALWAYS_INLINE size_t size() const override { return 1; }
695
0
    ByteString arguments_string() const override { return ByteString::empty(); }
696
};
697
698
class OpCode_CheckEnd final : public OpCode {
699
public:
700
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
701
107k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckEnd; }
702
3.99M
    ALWAYS_INLINE size_t size() const override { return 1; }
703
0
    ByteString arguments_string() const override { return ByteString::empty(); }
704
};
705
706
class OpCode_CheckBoundary final : public OpCode {
707
public:
708
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
709
12.2k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckBoundary; }
710
13.3k
    ALWAYS_INLINE size_t size() const override { return 2; }
711
0
    ALWAYS_INLINE size_t arguments_count() const { return 1; }
712
0
    ALWAYS_INLINE BoundaryCheckType type() const { return static_cast<BoundaryCheckType>(argument(0)); }
713
0
    ByteString arguments_string() const override { return ByteString::formatted("kind={} ({})", (long unsigned int)argument(0), boundary_check_type_name(type())); }
714
};
715
716
class OpCode_ClearCaptureGroup final : public OpCode {
717
public:
718
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
719
11.2M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ClearCaptureGroup; }
720
14.3M
    ALWAYS_INLINE size_t size() const override { return 2; }
721
0
    ALWAYS_INLINE size_t id() const { return argument(0); }
722
0
    ByteString arguments_string() const override { return ByteString::formatted("id={}", id()); }
723
};
724
725
class OpCode_SaveLeftCaptureGroup final : public OpCode {
726
public:
727
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
728
796k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveLeftCaptureGroup; }
729
16.4M
    ALWAYS_INLINE size_t size() const override { return 2; }
730
39.2M
    ALWAYS_INLINE size_t id() const { return argument(0); }
731
0
    ByteString arguments_string() const override { return ByteString::formatted("id={}", id()); }
732
};
733
734
class OpCode_SaveRightCaptureGroup final : public OpCode {
735
public:
736
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
737
739k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightCaptureGroup; }
738
10.6M
    ALWAYS_INLINE size_t size() const override { return 2; }
739
9.48M
    ALWAYS_INLINE size_t id() const { return argument(0); }
740
0
    ByteString arguments_string() const override { return ByteString::formatted("id={}", id()); }
741
};
742
743
class OpCode_SaveRightNamedCaptureGroup final : public OpCode {
744
public:
745
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
746
57
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightNamedCaptureGroup; }
747
49
    ALWAYS_INLINE size_t size() const override { return 4; }
748
0
    ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
749
0
    ALWAYS_INLINE size_t length() const { return argument(1); }
750
0
    ALWAYS_INLINE size_t id() const { return argument(2); }
751
    ByteString arguments_string() const override
752
0
    {
753
0
        return ByteString::formatted("name={}, length={}", name(), length());
754
0
    }
755
};
756
757
class OpCode_Compare final : public OpCode {
758
public:
759
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
760
76.4M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Compare; }
761
167M
    ALWAYS_INLINE size_t size() const override { return arguments_size() + 3; }
762
107M
    ALWAYS_INLINE size_t arguments_count() const { return argument(0); }
763
167M
    ALWAYS_INLINE size_t arguments_size() const { return argument(1); }
764
    ByteString arguments_string() const override;
765
    Vector<ByteString> variable_arguments_to_byte_string(Optional<MatchInput const&> input = {}) const;
766
    Vector<CompareTypeAndValuePair> flat_compares() const;
767
    static bool matches_character_class(CharClass, u32, bool insensitive);
768
769
private:
770
    ALWAYS_INLINE static void compare_char(MatchInput const& input, MatchState& state, u32 ch1, bool inverse, bool& inverse_matched);
771
    ALWAYS_INLINE static bool compare_string(MatchInput const& input, MatchState& state, RegexStringView str, bool& had_zero_length_match);
772
    ALWAYS_INLINE static void compare_character_class(MatchInput const& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched);
773
    ALWAYS_INLINE static void compare_character_range(MatchInput const& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched);
774
    ALWAYS_INLINE static void compare_property(MatchInput const& input, MatchState& state, Unicode::Property property, bool inverse, bool& inverse_matched);
775
    ALWAYS_INLINE static void compare_general_category(MatchInput const& input, MatchState& state, Unicode::GeneralCategory general_category, bool inverse, bool& inverse_matched);
776
    ALWAYS_INLINE static void compare_script(MatchInput const& input, MatchState& state, Unicode::Script script, bool inverse, bool& inverse_matched);
777
    ALWAYS_INLINE static void compare_script_extension(MatchInput const& input, MatchState& state, Unicode::Script script, bool inverse, bool& inverse_matched);
778
};
779
780
class OpCode_Repeat : public OpCode {
781
public:
782
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
783
1.12M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Repeat; }
784
2.82M
    ALWAYS_INLINE size_t size() const override { return 4; }
785
383k
    ALWAYS_INLINE size_t offset() const { return argument(0); }
786
3.79M
    ALWAYS_INLINE u64 count() const { return argument(1); }
787
5.66M
    ALWAYS_INLINE size_t id() const { return argument(2); }
788
    ByteString arguments_string() const override
789
0
    {
790
0
        auto reps = id() < state().repetition_marks.size() ? state().repetition_marks.at(id()) : 0;
791
0
        return ByteString::formatted("offset={} [&{}] count={} id={} rep={}, sp: {}",
792
0
            static_cast<ssize_t>(offset()),
793
0
            state().instruction_position - offset(),
794
0
            count() + 1,
795
0
            id(),
796
0
            reps + 1,
797
0
            state().string_position);
798
0
    }
799
};
800
801
class OpCode_ResetRepeat : public OpCode {
802
public:
803
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
804
170k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ResetRepeat; }
805
5.33M
    ALWAYS_INLINE size_t size() const override { return 2; }
806
15.0M
    ALWAYS_INLINE size_t id() const { return argument(0); }
807
    ByteString arguments_string() const override
808
0
    {
809
0
        auto reps = id() < state().repetition_marks.size() ? state().repetition_marks.at(id()) : 0;
810
0
        return ByteString::formatted("id={} rep={}", id(), reps + 1);
811
0
    }
812
};
813
814
class OpCode_Checkpoint final : public OpCode {
815
public:
816
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
817
1.39M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Checkpoint; }
818
53.4M
    ALWAYS_INLINE size_t size() const override { return 2; }
819
51.0M
    ALWAYS_INLINE size_t id() const { return argument(0); }
820
0
    ByteString arguments_string() const override { return ByteString::formatted("id={}", id()); }
821
};
822
823
class OpCode_JumpNonEmpty final : public OpCode {
824
public:
825
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
826
1.95M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::JumpNonEmpty; }
827
56.9M
    ALWAYS_INLINE size_t size() const override { return 4; }
828
43.8M
    ALWAYS_INLINE ssize_t offset() const { return argument(0); }
829
42.1M
    ALWAYS_INLINE ssize_t checkpoint() const { return argument(1); }
830
42.9M
    ALWAYS_INLINE OpCodeId form() const { return (OpCodeId)argument(2); }
831
    ByteString arguments_string() const override
832
0
    {
833
0
        return ByteString::formatted("{} offset={} [&{}], cp={}",
834
0
            opcode_id_name(form()),
835
0
            offset(), state().instruction_position + size() + offset(),
836
0
            checkpoint());
837
0
    }
838
};
839
840
ALWAYS_INLINE OpCode& ByteCode::get_opcode(regex::MatchState& state) const
841
363M
{
842
363M
    OpCodeId opcode_id;
843
363M
    if (auto opcode_ptr = static_cast<DisjointChunks<ByteCodeValueType> const&>(*this).find(state.instruction_position))
844
363M
        opcode_id = (OpCodeId)*opcode_ptr;
845
772k
    else
846
772k
        opcode_id = OpCodeId::Exit;
847
848
363M
    auto& opcode = get_opcode_by_id(opcode_id);
849
363M
    opcode.set_state(state);
850
363M
    return opcode;
851
363M
}
852
853
ALWAYS_INLINE OpCode& ByteCode::get_opcode_by_id(OpCodeId id) const
854
363M
{
855
363M
    VERIFY(id >= OpCodeId::First && id <= OpCodeId::Last);
856
857
363M
    auto& opcode = s_opcodes[(u32)id];
858
363M
    opcode->set_bytecode(*const_cast<ByteCode*>(this));
859
363M
    return *opcode;
860
363M
}
861
862
template<typename T>
863
bool is(OpCode const&);
864
865
template<typename T>
866
ALWAYS_INLINE bool is(OpCode const&)
867
{
868
    return false;
869
}
870
871
template<typename T>
872
ALWAYS_INLINE bool is(OpCode const* opcode)
873
{
874
    return is<T>(*opcode);
875
}
876
877
template<>
878
ALWAYS_INLINE bool is<OpCode_ForkStay>(OpCode const& opcode)
879
0
{
880
0
    return opcode.opcode_id() == OpCodeId::ForkStay;
881
0
}
882
883
template<>
884
ALWAYS_INLINE bool is<OpCode_Exit>(OpCode const& opcode)
885
0
{
886
0
    return opcode.opcode_id() == OpCodeId::Exit;
887
0
}
888
889
template<>
890
ALWAYS_INLINE bool is<OpCode_Compare>(OpCode const& opcode)
891
0
{
892
0
    return opcode.opcode_id() == OpCodeId::Compare;
893
0
}
894
895
template<typename T>
896
ALWAYS_INLINE T const& to(OpCode const& opcode)
897
0
{
898
0
    return verify_cast<T>(opcode);
899
0
}
900
901
template<typename T>
902
ALWAYS_INLINE T* to(OpCode* opcode)
903
{
904
    return verify_cast<T>(opcode);
905
}
906
907
template<typename T>
908
ALWAYS_INLINE T const* to(OpCode const* opcode)
909
{
910
    return verify_cast<T>(opcode);
911
}
912
913
template<typename T>
914
ALWAYS_INLINE T& to(OpCode& opcode)
915
0
{
916
0
    return verify_cast<T>(opcode);
917
0
}
918
919
}