Coverage Report

Created: 2026-02-16 07:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/Userland/Libraries/LibRegex/RegexByteCode.h
Line
Count
Source
1
/*
2
 * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#pragma once
8
9
#include "RegexBytecodeStreamOptimizer.h"
10
#include "RegexMatch.h"
11
#include "RegexOptions.h"
12
13
#include <AK/Concepts.h>
14
#include <AK/DisjointChunks.h>
15
#include <AK/Forward.h>
16
#include <AK/HashMap.h>
17
#include <AK/NonnullOwnPtr.h>
18
#include <AK/OwnPtr.h>
19
#include <AK/Traits.h>
20
#include <AK/TypeCasts.h>
21
#include <AK/Types.h>
22
#include <AK/Vector.h>
23
#include <LibUnicode/Forward.h>
24
25
namespace regex {
26
27
using ByteCodeValueType = u64;
28
29
#define ENUMERATE_OPCODES                          \
30
6
    __ENUMERATE_OPCODE(Compare)                    \
31
6
    __ENUMERATE_OPCODE(Jump)                       \
32
6
    __ENUMERATE_OPCODE(JumpNonEmpty)               \
33
6
    __ENUMERATE_OPCODE(ForkJump)                   \
34
6
    __ENUMERATE_OPCODE(ForkStay)                   \
35
6
    __ENUMERATE_OPCODE(ForkReplaceJump)            \
36
6
    __ENUMERATE_OPCODE(ForkReplaceStay)            \
37
6
    __ENUMERATE_OPCODE(FailForks)                  \
38
6
    __ENUMERATE_OPCODE(SaveLeftCaptureGroup)       \
39
6
    __ENUMERATE_OPCODE(SaveRightCaptureGroup)      \
40
6
    __ENUMERATE_OPCODE(SaveRightNamedCaptureGroup) \
41
6
    __ENUMERATE_OPCODE(CheckBegin)                 \
42
6
    __ENUMERATE_OPCODE(CheckEnd)                   \
43
6
    __ENUMERATE_OPCODE(CheckBoundary)              \
44
6
    __ENUMERATE_OPCODE(Save)                       \
45
6
    __ENUMERATE_OPCODE(Restore)                    \
46
6
    __ENUMERATE_OPCODE(GoBack)                     \
47
6
    __ENUMERATE_OPCODE(ClearCaptureGroup)          \
48
6
    __ENUMERATE_OPCODE(Repeat)                     \
49
6
    __ENUMERATE_OPCODE(ResetRepeat)                \
50
6
    __ENUMERATE_OPCODE(Checkpoint)                 \
51
6
    __ENUMERATE_OPCODE(Exit)
52
53
// clang-format off
54
enum class OpCodeId : ByteCodeValueType {
55
#define __ENUMERATE_OPCODE(x) x,
56
    ENUMERATE_OPCODES
57
#undef __ENUMERATE_OPCODE
58
59
    First = Compare,
60
    Last = Exit,
61
};
62
// clang-format on
63
64
#define ENUMERATE_CHARACTER_COMPARE_TYPES                    \
65
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Undefined)            \
66
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Inverse)              \
67
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(TemporaryInverse)     \
68
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(AnyChar)              \
69
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Char)                 \
70
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(String)               \
71
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(CharClass)            \
72
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(CharRange)            \
73
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Reference)            \
74
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Property)             \
75
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(GeneralCategory)      \
76
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Script)               \
77
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(ScriptExtension)      \
78
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(RangeExpressionDummy) \
79
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(LookupTable)          \
80
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(And)                  \
81
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(Or)                   \
82
0
    __ENUMERATE_CHARACTER_COMPARE_TYPE(EndAndOr)
83
84
enum class CharacterCompareType : ByteCodeValueType {
85
#define __ENUMERATE_CHARACTER_COMPARE_TYPE(x) x,
86
    ENUMERATE_CHARACTER_COMPARE_TYPES
87
#undef __ENUMERATE_CHARACTER_COMPARE_TYPE
88
};
89
90
#define ENUMERATE_CHARACTER_CLASSES    \
91
0
    __ENUMERATE_CHARACTER_CLASS(Alnum) \
92
0
    __ENUMERATE_CHARACTER_CLASS(Cntrl) \
93
0
    __ENUMERATE_CHARACTER_CLASS(Lower) \
94
0
    __ENUMERATE_CHARACTER_CLASS(Space) \
95
0
    __ENUMERATE_CHARACTER_CLASS(Alpha) \
96
0
    __ENUMERATE_CHARACTER_CLASS(Digit) \
97
0
    __ENUMERATE_CHARACTER_CLASS(Print) \
98
0
    __ENUMERATE_CHARACTER_CLASS(Upper) \
99
0
    __ENUMERATE_CHARACTER_CLASS(Blank) \
100
0
    __ENUMERATE_CHARACTER_CLASS(Graph) \
101
0
    __ENUMERATE_CHARACTER_CLASS(Punct) \
102
0
    __ENUMERATE_CHARACTER_CLASS(Word)  \
103
0
    __ENUMERATE_CHARACTER_CLASS(Xdigit)
104
105
enum class CharClass : ByteCodeValueType {
106
#define __ENUMERATE_CHARACTER_CLASS(x) x,
107
    ENUMERATE_CHARACTER_CLASSES
108
#undef __ENUMERATE_CHARACTER_CLASS
109
};
110
111
#define ENUMERATE_BOUNDARY_CHECK_TYPES    \
112
0
    __ENUMERATE_BOUNDARY_CHECK_TYPE(Word) \
113
0
    __ENUMERATE_BOUNDARY_CHECK_TYPE(NonWord)
114
115
enum class BoundaryCheckType : ByteCodeValueType {
116
#define __ENUMERATE_BOUNDARY_CHECK_TYPE(x) x,
117
    ENUMERATE_BOUNDARY_CHECK_TYPES
118
#undef __ENUMERATE_BOUNDARY_CHECK_TYPE
119
};
120
121
struct CharRange {
122
    u32 const from;
123
    u32 const to;
124
125
    CharRange(u64 value)
126
44.0M
        : from(value >> 32)
127
44.0M
        , to(value & 0xffffffff)
128
44.0M
    {
129
44.0M
    }
130
131
    CharRange(u32 from, u32 to)
132
73.3M
        : from(from)
133
73.3M
        , to(to)
134
73.3M
    {
135
73.3M
    }
136
137
13.9M
    operator ByteCodeValueType() const { return ((u64)from << 32) | to; }
138
};
139
140
struct CompareTypeAndValuePair {
141
    CharacterCompareType type;
142
    ByteCodeValueType value;
143
};
144
145
class OpCode;
146
147
class ByteCode : public DisjointChunks<ByteCodeValueType> {
148
    using Base = DisjointChunks<ByteCodeValueType>;
149
150
public:
151
    ByteCode()
152
124M
    {
153
124M
        ensure_opcodes_initialized();
154
124M
    }
155
156
51.1M
    ByteCode(ByteCode const&) = default;
157
175M
    virtual ~ByteCode() = default;
158
159
2.02M
    ByteCode& operator=(ByteCode&&) = default;
160
    ByteCode& operator=(Base&& value)
161
0
    {
162
0
        static_cast<Base&>(*this) = move(value);
163
0
        return *this;
164
0
    }
165
166
    template<typename... Args>
167
    void empend(Args&&... args)
168
267M
    {
169
267M
        if (is_empty())
170
42.6M
            Base::append({});
171
267M
        Base::last_chunk().empend(forward<Args>(args)...);
172
267M
    }
void regex::ByteCode::empend<unsigned long>(unsigned long&&)
Line
Count
Source
168
209M
    {
169
209M
        if (is_empty())
170
42.6M
            Base::append({});
171
209M
        Base::last_chunk().empend(forward<Args>(args)...);
172
209M
    }
void regex::ByteCode::empend<unsigned long&>(unsigned long&)
Line
Count
Source
168
57.7M
    {
169
57.7M
        if (is_empty())
170
1.00k
            Base::append({});
171
57.7M
        Base::last_chunk().empend(forward<Args>(args)...);
172
57.7M
    }
void regex::ByteCode::empend<unsigned int>(unsigned int&&)
Line
Count
Source
168
238k
    {
169
238k
        if (is_empty())
170
0
            Base::append({});
171
238k
        Base::last_chunk().empend(forward<Args>(args)...);
172
238k
    }
173
    template<typename T>
174
    void append(T&& value)
175
124M
    {
176
124M
        if (is_empty())
177
46.5M
            Base::append({});
178
124M
        Base::last_chunk().append(forward<T>(value));
179
124M
    }
void regex::ByteCode::append<unsigned long>(unsigned long&&)
Line
Count
Source
175
63.7M
    {
176
63.7M
        if (is_empty())
177
46.5M
            Base::append({});
178
63.7M
        Base::last_chunk().append(forward<T>(value));
179
63.7M
    }
void regex::ByteCode::append<regex::CharRange>(regex::CharRange&&)
Line
Count
Source
175
8.56M
    {
176
8.56M
        if (is_empty())
177
0
            Base::append({});
178
8.56M
        Base::last_chunk().append(forward<T>(value));
179
8.56M
    }
void regex::ByteCode::append<int>(int&&)
Line
Count
Source
175
7.47M
    {
176
7.47M
        if (is_empty())
177
0
            Base::append({});
178
7.47M
        Base::last_chunk().append(forward<T>(value));
179
7.47M
    }
void regex::ByteCode::append<unsigned long&>(unsigned long&)
Line
Count
Source
175
45.1M
    {
176
45.1M
        if (is_empty())
177
0
            Base::append({});
178
45.1M
        Base::last_chunk().append(forward<T>(value));
179
45.1M
    }
180
    template<typename T>
181
    void prepend(T&& value)
182
1.42M
    {
183
1.42M
        if (is_empty())
184
365
            return append(forward<T>(value));
185
1.42M
        Base::first_chunk().prepend(forward<T>(value));
186
1.42M
    }
187
188
    void append(Span<ByteCodeValueType const> value)
189
250k
    {
190
250k
        if (is_empty())
191
0
            Base::append({});
192
250k
        auto& last = Base::last_chunk();
193
250k
        last.ensure_capacity(value.size());
194
250k
        for (auto v : value)
195
37.6M
            last.unchecked_append(v);
196
250k
    }
197
198
    void ensure_capacity(size_t capacity)
199
251k
    {
200
251k
        if (is_empty())
201
752
            Base::append({});
202
251k
        Base::last_chunk().ensure_capacity(capacity);
203
251k
    }
204
205
    void last_chunk() const = delete;
206
    void first_chunk() const = delete;
207
208
    void insert_bytecode_compare_values(Vector<CompareTypeAndValuePair>&& pairs)
209
39.3M
    {
210
39.3M
        Optimizer::append_character_class(*this, move(pairs));
211
39.3M
    }
212
213
    void insert_bytecode_check_boundary(BoundaryCheckType type)
214
218k
    {
215
218k
        ByteCode bytecode;
216
218k
        bytecode.empend((ByteCodeValueType)OpCodeId::CheckBoundary);
217
218k
        bytecode.empend((ByteCodeValueType)type);
218
219
218k
        extend(move(bytecode));
220
218k
    }
221
222
    void insert_bytecode_clear_capture_group(size_t index)
223
14.6M
    {
224
14.6M
        empend(static_cast<ByteCodeValueType>(OpCodeId::ClearCaptureGroup));
225
14.6M
        empend(index);
226
14.6M
    }
227
228
    void insert_bytecode_compare_string(StringView view)
229
1.58M
    {
230
1.58M
        empend(static_cast<ByteCodeValueType>(OpCodeId::Compare));
231
1.58M
        empend(static_cast<u64>(1)); // number of arguments
232
1.58M
        empend(2 + view.length());   // size of arguments
233
1.58M
        empend(static_cast<ByteCodeValueType>(CharacterCompareType::String));
234
1.58M
        insert_string(view);
235
1.58M
    }
236
237
    void insert_bytecode_group_capture_left(size_t capture_groups_count)
238
1.03M
    {
239
1.03M
        empend(static_cast<ByteCodeValueType>(OpCodeId::SaveLeftCaptureGroup));
240
1.03M
        empend(capture_groups_count);
241
1.03M
    }
242
243
    void insert_bytecode_group_capture_right(size_t capture_groups_count)
244
1.03M
    {
245
1.03M
        empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightCaptureGroup));
246
1.03M
        empend(capture_groups_count);
247
1.03M
    }
248
249
    void insert_bytecode_group_capture_right(size_t capture_groups_count, StringView name)
250
311
    {
251
311
        empend(static_cast<ByteCodeValueType>(OpCodeId::SaveRightNamedCaptureGroup));
252
311
        empend(reinterpret_cast<ByteCodeValueType>(name.characters_without_null_termination()));
253
311
        empend(name.length());
254
311
        empend(capture_groups_count);
255
311
    }
256
257
    enum class LookAroundType {
258
        LookAhead,
259
        LookBehind,
260
        NegatedLookAhead,
261
        NegatedLookBehind,
262
    };
263
    void insert_bytecode_lookaround(ByteCode&& lookaround_body, LookAroundType type, size_t match_length = 0)
264
1.44k
    {
265
        // FIXME: The save stack will grow infinitely with repeated failures
266
        //        as we do not discard that on failure (we don't necessarily know how many to pop with the current architecture).
267
1.44k
        switch (type) {
268
0
        case LookAroundType::LookAhead: {
269
            // SAVE
270
            // REGEXP BODY
271
            // RESTORE
272
0
            empend((ByteCodeValueType)OpCodeId::Save);
273
0
            extend(move(lookaround_body));
274
0
            empend((ByteCodeValueType)OpCodeId::Restore);
275
0
            return;
276
0
        }
277
18
        case LookAroundType::NegatedLookAhead: {
278
            // JUMP _A
279
            // LABEL _L
280
            // REGEXP BODY
281
            // FAIL
282
            // LABEL _A
283
            // SAVE
284
            // FORKJUMP _L
285
            // RESTORE
286
18
            auto body_length = lookaround_body.size();
287
18
            empend((ByteCodeValueType)OpCodeId::Jump);
288
18
            empend((ByteCodeValueType)body_length + 1); // JUMP to label _A
289
18
            extend(move(lookaround_body));
290
18
            empend((ByteCodeValueType)OpCodeId::FailForks);
291
18
            empend((ByteCodeValueType)OpCodeId::Save);
292
18
            empend((ByteCodeValueType)OpCodeId::ForkJump);
293
18
            empend((ByteCodeValueType) - (body_length + 4)); // JUMP to label _L
294
18
            empend((ByteCodeValueType)OpCodeId::Restore);
295
18
            return;
296
0
        }
297
38
        case LookAroundType::LookBehind:
298
            // SAVE
299
            // GOBACK match_length(BODY)
300
            // REGEXP BODY
301
            // RESTORE
302
38
            empend((ByteCodeValueType)OpCodeId::Save);
303
38
            empend((ByteCodeValueType)OpCodeId::GoBack);
304
38
            empend((ByteCodeValueType)match_length);
305
38
            extend(move(lookaround_body));
306
38
            empend((ByteCodeValueType)OpCodeId::Restore);
307
38
            return;
308
1.39k
        case LookAroundType::NegatedLookBehind: {
309
            // JUMP _A
310
            // LABEL _L
311
            // GOBACK match_length(BODY)
312
            // REGEXP BODY
313
            // FAIL
314
            // LABEL _A
315
            // SAVE
316
            // FORKJUMP _L
317
            // RESTORE
318
1.39k
            auto body_length = lookaround_body.size();
319
1.39k
            empend((ByteCodeValueType)OpCodeId::Jump);
320
1.39k
            empend((ByteCodeValueType)body_length + 3); // JUMP to label _A
321
1.39k
            empend((ByteCodeValueType)OpCodeId::GoBack);
322
1.39k
            empend((ByteCodeValueType)match_length);
323
1.39k
            extend(move(lookaround_body));
324
1.39k
            empend((ByteCodeValueType)OpCodeId::FailForks);
325
1.39k
            empend((ByteCodeValueType)OpCodeId::Save);
326
1.39k
            empend((ByteCodeValueType)OpCodeId::ForkJump);
327
1.39k
            empend((ByteCodeValueType) - (body_length + 6)); // JUMP to label _L
328
1.39k
            empend((ByteCodeValueType)OpCodeId::Restore);
329
1.39k
            return;
330
0
        }
331
1.44k
        }
332
333
0
        VERIFY_NOT_REACHED();
334
0
    }
335
336
    void insert_bytecode_alternation(ByteCode&& left, ByteCode&& right)
337
910
    {
338
339
        // FORKJUMP _ALT
340
        // REGEXP ALT2
341
        // JUMP  _END
342
        // LABEL _ALT
343
        // REGEXP ALT1
344
        // LABEL _END
345
346
        // Optimisation: Eliminate extra work by unifying common pre-and-postfix exprs.
347
910
        Optimizer::append_alternation(*this, move(left), move(right));
348
910
    }
349
350
    template<Integral T>
351
    static void transform_bytecode_repetition_min_max(ByteCode& bytecode_to_repeat, T minimum, Optional<T> maximum, size_t min_repetition_mark_id, size_t max_repetition_mark_id, bool greedy = true)
352
16.8k
    {
353
16.8k
        if (!maximum.has_value()) {
354
6.93k
            if (minimum == 0)
355
417
                return transform_bytecode_repetition_any(bytecode_to_repeat, greedy);
356
6.51k
            if (minimum == 1)
357
1.63k
                return transform_bytecode_repetition_min_one(bytecode_to_repeat, greedy);
358
6.51k
        }
359
360
14.8k
        ByteCode new_bytecode;
361
14.8k
        new_bytecode.insert_bytecode_repetition_n(bytecode_to_repeat, minimum, min_repetition_mark_id);
362
363
14.8k
        if (maximum.has_value()) {
364
            // (REPEAT REGEXP MIN)
365
            // LABEL _MAX_LOOP            |
366
            // FORK END                   |
367
            // REGEXP                     |
368
            // REPEAT _MAX_LOOP MAX-MIN   | if max > min
369
            // FORK END                   |
370
            // REGEXP                     |
371
            // LABEL END                  |
372
            // RESET _MAX_LOOP            |
373
9.94k
            auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkStay : OpCodeId::ForkJump);
374
9.94k
            if (maximum.value() > minimum) {
375
9.54k
                new_bytecode.empend(jump_kind);
376
9.54k
                new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
377
9.54k
                auto pre_loop_fork_jump_index = new_bytecode.size();
378
9.54k
                new_bytecode.extend(bytecode_to_repeat);
379
9.54k
                auto repetitions = maximum.value() - minimum;
380
9.54k
                auto fork_jump_address = new_bytecode.size();
381
9.54k
                if (repetitions > 1) {
382
8.82k
                    new_bytecode.empend((ByteCodeValueType)OpCodeId::Repeat);
383
8.82k
                    new_bytecode.empend(bytecode_to_repeat.size() + 2);
384
8.82k
                    new_bytecode.empend(static_cast<ByteCodeValueType>(repetitions - 1));
385
8.82k
                    new_bytecode.empend(max_repetition_mark_id);
386
8.82k
                    new_bytecode.empend(jump_kind);
387
8.82k
                    new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
388
8.82k
                    auto post_loop_fork_jump_index = new_bytecode.size();
389
8.82k
                    new_bytecode.extend(bytecode_to_repeat);
390
8.82k
                    fork_jump_address = new_bytecode.size();
391
392
8.82k
                    new_bytecode[post_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - post_loop_fork_jump_index);
393
394
8.82k
                    new_bytecode.empend((ByteCodeValueType)OpCodeId::ResetRepeat);
395
8.82k
                    new_bytecode.empend((ByteCodeValueType)max_repetition_mark_id);
396
8.82k
                }
397
9.54k
                new_bytecode[pre_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - pre_loop_fork_jump_index);
398
9.54k
            }
399
9.94k
        } else {
400
            // no maximum value set, repeat finding if possible:
401
            // (REPEAT REGEXP MIN)
402
            // LABEL _START
403
            // CHECKPOINT _C
404
            // REGEXP
405
            // JUMP_NONEMPTY _C _START FORK
406
407
            // Note: This is only safe because REPEAT will leave one iteration outside (see repetition_n)
408
4.87k
            auto checkpoint = s_next_checkpoint_serial_id++;
409
4.87k
            new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)OpCodeId::Checkpoint);
410
4.87k
            new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)checkpoint);
411
412
4.87k
            auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkJump : OpCodeId::ForkStay);
413
4.87k
            new_bytecode.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
414
4.87k
            new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 2); // Jump to the last iteration
415
4.87k
            new_bytecode.empend(checkpoint);                         // if _C is not empty.
416
4.87k
            new_bytecode.empend(jump_kind);
417
4.87k
        }
418
419
14.8k
        bytecode_to_repeat = move(new_bytecode);
420
14.8k
    }
_ZN5regex8ByteCode37transform_bytecode_repetition_min_maxITkN2AK8Concepts8IntegralEjEEvRS0_T_NS2_8OptionalIS5_EEmmb
Line
Count
Source
352
377
    {
353
377
        if (!maximum.has_value()) {
354
29
            if (minimum == 0)
355
1
                return transform_bytecode_repetition_any(bytecode_to_repeat, greedy);
356
28
            if (minimum == 1)
357
2
                return transform_bytecode_repetition_min_one(bytecode_to_repeat, greedy);
358
28
        }
359
360
374
        ByteCode new_bytecode;
361
374
        new_bytecode.insert_bytecode_repetition_n(bytecode_to_repeat, minimum, min_repetition_mark_id);
362
363
374
        if (maximum.has_value()) {
364
            // (REPEAT REGEXP MIN)
365
            // LABEL _MAX_LOOP            |
366
            // FORK END                   |
367
            // REGEXP                     |
368
            // REPEAT _MAX_LOOP MAX-MIN   | if max > min
369
            // FORK END                   |
370
            // REGEXP                     |
371
            // LABEL END                  |
372
            // RESET _MAX_LOOP            |
373
348
            auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkStay : OpCodeId::ForkJump);
374
348
            if (maximum.value() > minimum) {
375
348
                new_bytecode.empend(jump_kind);
376
348
                new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
377
348
                auto pre_loop_fork_jump_index = new_bytecode.size();
378
348
                new_bytecode.extend(bytecode_to_repeat);
379
348
                auto repetitions = maximum.value() - minimum;
380
348
                auto fork_jump_address = new_bytecode.size();
381
348
                if (repetitions > 1) {
382
316
                    new_bytecode.empend((ByteCodeValueType)OpCodeId::Repeat);
383
316
                    new_bytecode.empend(bytecode_to_repeat.size() + 2);
384
316
                    new_bytecode.empend(static_cast<ByteCodeValueType>(repetitions - 1));
385
316
                    new_bytecode.empend(max_repetition_mark_id);
386
316
                    new_bytecode.empend(jump_kind);
387
316
                    new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
388
316
                    auto post_loop_fork_jump_index = new_bytecode.size();
389
316
                    new_bytecode.extend(bytecode_to_repeat);
390
316
                    fork_jump_address = new_bytecode.size();
391
392
316
                    new_bytecode[post_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - post_loop_fork_jump_index);
393
394
316
                    new_bytecode.empend((ByteCodeValueType)OpCodeId::ResetRepeat);
395
316
                    new_bytecode.empend((ByteCodeValueType)max_repetition_mark_id);
396
316
                }
397
348
                new_bytecode[pre_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - pre_loop_fork_jump_index);
398
348
            }
399
348
        } else {
400
            // no maximum value set, repeat finding if possible:
401
            // (REPEAT REGEXP MIN)
402
            // LABEL _START
403
            // CHECKPOINT _C
404
            // REGEXP
405
            // JUMP_NONEMPTY _C _START FORK
406
407
            // Note: This is only safe because REPEAT will leave one iteration outside (see repetition_n)
408
26
            auto checkpoint = s_next_checkpoint_serial_id++;
409
26
            new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)OpCodeId::Checkpoint);
410
26
            new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)checkpoint);
411
412
26
            auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkJump : OpCodeId::ForkStay);
413
26
            new_bytecode.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
414
26
            new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 2); // Jump to the last iteration
415
26
            new_bytecode.empend(checkpoint);                         // if _C is not empty.
416
26
            new_bytecode.empend(jump_kind);
417
26
        }
418
419
374
        bytecode_to_repeat = move(new_bytecode);
420
374
    }
_ZN5regex8ByteCode37transform_bytecode_repetition_min_maxITkN2AK8Concepts8IntegralEmEEvRS0_T_NS2_8OptionalIS5_EEmmb
Line
Count
Source
352
16.5k
    {
353
16.5k
        if (!maximum.has_value()) {
354
6.90k
            if (minimum == 0)
355
416
                return transform_bytecode_repetition_any(bytecode_to_repeat, greedy);
356
6.48k
            if (minimum == 1)
357
1.63k
                return transform_bytecode_repetition_min_one(bytecode_to_repeat, greedy);
358
6.48k
        }
359
360
14.4k
        ByteCode new_bytecode;
361
14.4k
        new_bytecode.insert_bytecode_repetition_n(bytecode_to_repeat, minimum, min_repetition_mark_id);
362
363
14.4k
        if (maximum.has_value()) {
364
            // (REPEAT REGEXP MIN)
365
            // LABEL _MAX_LOOP            |
366
            // FORK END                   |
367
            // REGEXP                     |
368
            // REPEAT _MAX_LOOP MAX-MIN   | if max > min
369
            // FORK END                   |
370
            // REGEXP                     |
371
            // LABEL END                  |
372
            // RESET _MAX_LOOP            |
373
9.60k
            auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkStay : OpCodeId::ForkJump);
374
9.60k
            if (maximum.value() > minimum) {
375
9.19k
                new_bytecode.empend(jump_kind);
376
9.19k
                new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
377
9.19k
                auto pre_loop_fork_jump_index = new_bytecode.size();
378
9.19k
                new_bytecode.extend(bytecode_to_repeat);
379
9.19k
                auto repetitions = maximum.value() - minimum;
380
9.19k
                auto fork_jump_address = new_bytecode.size();
381
9.19k
                if (repetitions > 1) {
382
8.50k
                    new_bytecode.empend((ByteCodeValueType)OpCodeId::Repeat);
383
8.50k
                    new_bytecode.empend(bytecode_to_repeat.size() + 2);
384
8.50k
                    new_bytecode.empend(static_cast<ByteCodeValueType>(repetitions - 1));
385
8.50k
                    new_bytecode.empend(max_repetition_mark_id);
386
8.50k
                    new_bytecode.empend(jump_kind);
387
8.50k
                    new_bytecode.empend((ByteCodeValueType)0); // Placeholder for the jump target.
388
8.50k
                    auto post_loop_fork_jump_index = new_bytecode.size();
389
8.50k
                    new_bytecode.extend(bytecode_to_repeat);
390
8.50k
                    fork_jump_address = new_bytecode.size();
391
392
8.50k
                    new_bytecode[post_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - post_loop_fork_jump_index);
393
394
8.50k
                    new_bytecode.empend((ByteCodeValueType)OpCodeId::ResetRepeat);
395
8.50k
                    new_bytecode.empend((ByteCodeValueType)max_repetition_mark_id);
396
8.50k
                }
397
9.19k
                new_bytecode[pre_loop_fork_jump_index - 1] = (ByteCodeValueType)(fork_jump_address - pre_loop_fork_jump_index);
398
9.19k
            }
399
9.60k
        } else {
400
            // no maximum value set, repeat finding if possible:
401
            // (REPEAT REGEXP MIN)
402
            // LABEL _START
403
            // CHECKPOINT _C
404
            // REGEXP
405
            // JUMP_NONEMPTY _C _START FORK
406
407
            // Note: This is only safe because REPEAT will leave one iteration outside (see repetition_n)
408
4.85k
            auto checkpoint = s_next_checkpoint_serial_id++;
409
4.85k
            new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)OpCodeId::Checkpoint);
410
4.85k
            new_bytecode.insert(new_bytecode.size() - bytecode_to_repeat.size(), (ByteCodeValueType)checkpoint);
411
412
4.85k
            auto jump_kind = static_cast<ByteCodeValueType>(greedy ? OpCodeId::ForkJump : OpCodeId::ForkStay);
413
4.85k
            new_bytecode.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
414
4.85k
            new_bytecode.empend(-bytecode_to_repeat.size() - 4 - 2); // Jump to the last iteration
415
4.85k
            new_bytecode.empend(checkpoint);                         // if _C is not empty.
416
4.85k
            new_bytecode.empend(jump_kind);
417
4.85k
        }
418
419
14.4k
        bytecode_to_repeat = move(new_bytecode);
420
14.4k
    }
421
422
    template<Integral T>
423
    void insert_bytecode_repetition_n(ByteCode& bytecode_to_repeat, T n, size_t repetition_mark_id)
424
18.0k
    {
425
        // LABEL _LOOP
426
        // REGEXP
427
        // REPEAT _LOOP N-1
428
        // REGEXP
429
18.0k
        if (n == 0)
430
1.70k
            return;
431
432
        // Note: this bytecode layout allows callers to repeat the last REGEXP instruction without the
433
        // REPEAT instruction forcing another loop.
434
16.3k
        extend(bytecode_to_repeat);
435
436
16.3k
        if (n > 1) {
437
15.1k
            empend(static_cast<ByteCodeValueType>(OpCodeId::Repeat));
438
15.1k
            empend(bytecode_to_repeat.size());
439
15.1k
            empend(static_cast<ByteCodeValueType>(n - 1));
440
15.1k
            empend(repetition_mark_id);
441
442
15.1k
            extend(bytecode_to_repeat);
443
15.1k
        }
444
16.3k
    }
_ZN5regex8ByteCode28insert_bytecode_repetition_nITkN2AK8Concepts8IntegralEjEEvRS0_T_m
Line
Count
Source
424
3.56k
    {
425
        // LABEL _LOOP
426
        // REGEXP
427
        // REPEAT _LOOP N-1
428
        // REGEXP
429
3.56k
        if (n == 0)
430
432
            return;
431
432
        // Note: this bytecode layout allows callers to repeat the last REGEXP instruction without the
433
        // REPEAT instruction forcing another loop.
434
3.12k
        extend(bytecode_to_repeat);
435
436
3.12k
        if (n > 1) {
437
3.01k
            empend(static_cast<ByteCodeValueType>(OpCodeId::Repeat));
438
3.01k
            empend(bytecode_to_repeat.size());
439
3.01k
            empend(static_cast<ByteCodeValueType>(n - 1));
440
3.01k
            empend(repetition_mark_id);
441
442
3.01k
            extend(bytecode_to_repeat);
443
3.01k
        }
444
3.12k
    }
_ZN5regex8ByteCode28insert_bytecode_repetition_nITkN2AK8Concepts8IntegralEmEEvRS0_T_m
Line
Count
Source
424
14.4k
    {
425
        // LABEL _LOOP
426
        // REGEXP
427
        // REPEAT _LOOP N-1
428
        // REGEXP
429
14.4k
        if (n == 0)
430
1.27k
            return;
431
432
        // Note: this bytecode layout allows callers to repeat the last REGEXP instruction without the
433
        // REPEAT instruction forcing another loop.
434
13.1k
        extend(bytecode_to_repeat);
435
436
13.1k
        if (n > 1) {
437
12.0k
            empend(static_cast<ByteCodeValueType>(OpCodeId::Repeat));
438
12.0k
            empend(bytecode_to_repeat.size());
439
12.0k
            empend(static_cast<ByteCodeValueType>(n - 1));
440
12.0k
            empend(repetition_mark_id);
441
442
12.0k
            extend(bytecode_to_repeat);
443
12.0k
        }
444
13.1k
    }
445
446
    static void transform_bytecode_repetition_min_one(ByteCode& bytecode_to_repeat, bool greedy)
447
712k
    {
448
        // LABEL _START = -bytecode_to_repeat.size()
449
        // CHECKPOINT _C
450
        // REGEXP
451
        // JUMP_NONEMPTY _C _START FORKSTAY (FORKJUMP -> Greedy)
452
453
712k
        auto checkpoint = s_next_checkpoint_serial_id++;
454
712k
        bytecode_to_repeat.prepend((ByteCodeValueType)checkpoint);
455
712k
        bytecode_to_repeat.prepend((ByteCodeValueType)OpCodeId::Checkpoint);
456
457
712k
        bytecode_to_repeat.empend((ByteCodeValueType)OpCodeId::JumpNonEmpty);
458
712k
        bytecode_to_repeat.empend(-bytecode_to_repeat.size() - 3); // Jump to the _START label...
459
712k
        bytecode_to_repeat.empend(checkpoint);                     // ...if _C is not empty
460
461
712k
        if (greedy)
462
712k
            bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
463
5
        else
464
5
            bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
465
712k
    }
466
467
    static void transform_bytecode_repetition_any(ByteCode& bytecode_to_repeat, bool greedy)
468
726k
    {
469
        // LABEL _START
470
        // FORKJUMP _END  (FORKSTAY -> Greedy)
471
        // CHECKPOINT _C
472
        // REGEXP
473
        // JUMP_NONEMPTY _C _START JUMP
474
        // LABEL _END
475
476
        // LABEL _START = m_bytes.size();
477
726k
        ByteCode bytecode;
478
479
726k
        if (greedy)
480
726k
            bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
481
31
        else
482
31
            bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
483
484
726k
        bytecode.empend(bytecode_to_repeat.size() + 2 + 4); // Jump to the _END label
485
486
726k
        auto checkpoint = s_next_checkpoint_serial_id++;
487
726k
        bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::Checkpoint));
488
726k
        bytecode.empend(static_cast<ByteCodeValueType>(checkpoint));
489
490
726k
        bytecode.extend(bytecode_to_repeat);
491
492
726k
        bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::JumpNonEmpty));
493
726k
        bytecode.empend(-bytecode.size() - 3); // Jump(...) to the _START label...
494
726k
        bytecode.empend(checkpoint);           // ...only if _C passes.
495
726k
        bytecode.empend((ByteCodeValueType)OpCodeId::Jump);
496
        // LABEL _END = bytecode.size()
497
498
726k
        bytecode_to_repeat = move(bytecode);
499
726k
    }
500
501
    static void transform_bytecode_repetition_zero_or_one(ByteCode& bytecode_to_repeat, bool greedy)
502
1.27M
    {
503
        // FORKJUMP _END (FORKSTAY -> Greedy)
504
        // REGEXP
505
        // LABEL _END
506
1.27M
        ByteCode bytecode;
507
508
1.27M
        if (greedy)
509
1.24M
            bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
510
31.6k
        else
511
31.6k
            bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
512
513
1.27M
        bytecode.empend(bytecode_to_repeat.size()); // Jump to the _END label
514
515
1.27M
        bytecode.extend(move(bytecode_to_repeat));
516
        // LABEL _END = bytecode.size()
517
518
1.27M
        bytecode_to_repeat = move(bytecode);
519
1.27M
    }
520
521
    OpCode& get_opcode(MatchState& state) const;
522
523
7.41k
    static void reset_checkpoint_serial_id() { s_next_checkpoint_serial_id = 0; }
524
525
private:
526
    void insert_string(StringView view)
527
1.58M
    {
528
1.58M
        empend((ByteCodeValueType)view.length());
529
96.7M
        for (size_t i = 0; i < view.length(); ++i)
530
95.1M
            empend((ByteCodeValueType)view[i]);
531
1.58M
    }
532
533
    void ensure_opcodes_initialized();
534
    ALWAYS_INLINE OpCode& get_opcode_by_id(OpCodeId id) const;
535
    static OwnPtr<OpCode> s_opcodes[(size_t)OpCodeId::Last + 1];
536
    static bool s_opcodes_initialized;
537
    static size_t s_next_checkpoint_serial_id;
538
};
539
540
#define ENUMERATE_EXECUTION_RESULTS                          \
541
0
    __ENUMERATE_EXECUTION_RESULT(Continue)                   \
542
0
    __ENUMERATE_EXECUTION_RESULT(Fork_PrioHigh)              \
543
0
    __ENUMERATE_EXECUTION_RESULT(Fork_PrioLow)               \
544
0
    __ENUMERATE_EXECUTION_RESULT(Failed)                     \
545
0
    __ENUMERATE_EXECUTION_RESULT(Failed_ExecuteLowPrioForks) \
546
0
    __ENUMERATE_EXECUTION_RESULT(Succeeded)
547
548
enum class ExecutionResult : u8 {
549
#define __ENUMERATE_EXECUTION_RESULT(x) x,
550
    ENUMERATE_EXECUTION_RESULTS
551
#undef __ENUMERATE_EXECUTION_RESULT
552
};
553
554
StringView execution_result_name(ExecutionResult result);
555
StringView opcode_id_name(OpCodeId opcode_id);
556
StringView boundary_check_type_name(BoundaryCheckType);
557
StringView character_compare_type_name(CharacterCompareType result);
558
StringView character_class_name(CharClass ch_class);
559
560
class OpCode {
561
public:
562
132
    OpCode() = default;
563
0
    virtual ~OpCode() = default;
564
565
    virtual OpCodeId opcode_id() const = 0;
566
    virtual size_t size() const = 0;
567
    virtual ExecutionResult execute(MatchInput const& input, MatchState& state) const = 0;
568
569
    ALWAYS_INLINE ByteCodeValueType argument(size_t offset) const
570
739M
    {
571
739M
        return m_bytecode->at(state().instruction_position + 1 + offset);
572
739M
    }
573
574
    ALWAYS_INLINE StringView name() const;
575
    static StringView name(OpCodeId);
576
577
532M
    ALWAYS_INLINE void set_state(MatchState& state) { m_state = &state; }
578
579
532M
    ALWAYS_INLINE void set_bytecode(ByteCode& bytecode) { m_bytecode = &bytecode; }
580
581
    ALWAYS_INLINE MatchState const& state() const
582
760M
    {
583
760M
        VERIFY(m_state);
584
760M
        return *m_state;
585
760M
    }
586
587
    ByteString to_byte_string() const
588
0
    {
589
0
        return ByteString::formatted("[{:#02X}] {}", (int)opcode_id(), name(opcode_id()));
590
0
    }
591
592
    virtual ByteString arguments_string() const = 0;
593
594
0
    ALWAYS_INLINE ByteCode const& bytecode() const { return *m_bytecode; }
595
596
protected:
597
    ByteCode* m_bytecode { nullptr };
598
    MatchState* m_state { nullptr };
599
};
600
601
class OpCode_Exit final : public OpCode {
602
public:
603
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
604
8.84k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Exit; }
605
1.00M
    ALWAYS_INLINE size_t size() const override { return 1; }
606
0
    ByteString arguments_string() const override { return ByteString::empty(); }
607
};
608
609
class OpCode_FailForks final : public OpCode {
610
public:
611
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
612
23.0k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::FailForks; }
613
39.6k
    ALWAYS_INLINE size_t size() const override { return 1; }
614
0
    ByteString arguments_string() const override { return ByteString::empty(); }
615
};
616
617
class OpCode_Save final : public OpCode {
618
public:
619
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
620
19.8k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Save; }
621
19.9k
    ALWAYS_INLINE size_t size() const override { return 1; }
622
0
    ByteString arguments_string() const override { return ByteString::empty(); }
623
};
624
625
class OpCode_Restore final : public OpCode {
626
public:
627
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
628
19.8k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Restore; }
629
19.9k
    ALWAYS_INLINE size_t size() const override { return 1; }
630
0
    ByteString arguments_string() const override { return ByteString::empty(); }
631
};
632
633
class OpCode_GoBack final : public OpCode {
634
public:
635
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
636
1.38k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::GoBack; }
637
1.47k
    ALWAYS_INLINE size_t size() const override { return 2; }
638
0
    ALWAYS_INLINE size_t count() const { return argument(0); }
639
0
    ByteString arguments_string() const override { return ByteString::formatted("count={}", count()); }
640
};
641
642
class OpCode_Jump final : public OpCode {
643
public:
644
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
645
9.46M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Jump; }
646
36.2M
    ALWAYS_INLINE size_t size() const override { return 2; }
647
17.4M
    ALWAYS_INLINE ssize_t offset() const { return argument(0); }
648
    ByteString arguments_string() const override
649
0
    {
650
0
        return ByteString::formatted("offset={} [&{}]", offset(), state().instruction_position + size() + offset());
651
0
    }
652
};
653
654
class OpCode_ForkJump : public OpCode {
655
public:
656
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
657
1.94M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkJump; }
658
3.57M
    ALWAYS_INLINE size_t size() const override { return 2; }
659
1.53M
    ALWAYS_INLINE ssize_t offset() const { return argument(0); }
660
    ByteString arguments_string() const override
661
0
    {
662
0
        return ByteString::formatted("offset={} [&{}], sp: {}", offset(), state().instruction_position + size() + offset(), state().string_position);
663
0
    }
664
};
665
666
class OpCode_ForkReplaceJump final : public OpCode_ForkJump {
667
public:
668
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
669
0
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkReplaceJump; }
670
};
671
672
class OpCode_ForkStay : public OpCode {
673
public:
674
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
675
58.4M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkStay; }
676
208M
    ALWAYS_INLINE size_t size() const override { return 2; }
677
74.7M
    ALWAYS_INLINE ssize_t offset() const { return argument(0); }
678
    ByteString arguments_string() const override
679
0
    {
680
0
        return ByteString::formatted("offset={} [&{}], sp: {}", offset(), state().instruction_position + size() + offset(), state().string_position);
681
0
    }
682
};
683
684
class OpCode_ForkReplaceStay final : public OpCode_ForkStay {
685
public:
686
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
687
0
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ForkReplaceStay; }
688
};
689
690
class OpCode_CheckBegin final : public OpCode {
691
public:
692
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
693
177k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckBegin; }
694
5.22M
    ALWAYS_INLINE size_t size() const override { return 1; }
695
0
    ByteString arguments_string() const override { return ByteString::empty(); }
696
};
697
698
class OpCode_CheckEnd final : public OpCode {
699
public:
700
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
701
57.8k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckEnd; }
702
3.44M
    ALWAYS_INLINE size_t size() const override { return 1; }
703
0
    ByteString arguments_string() const override { return ByteString::empty(); }
704
};
705
706
class OpCode_CheckBoundary final : public OpCode {
707
public:
708
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
709
171k
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::CheckBoundary; }
710
174k
    ALWAYS_INLINE size_t size() const override { return 2; }
711
0
    ALWAYS_INLINE size_t arguments_count() const { return 1; }
712
0
    ALWAYS_INLINE BoundaryCheckType type() const { return static_cast<BoundaryCheckType>(argument(0)); }
713
0
    ByteString arguments_string() const override { return ByteString::formatted("kind={} ({})", (long unsigned int)argument(0), boundary_check_type_name(type())); }
714
};
715
716
class OpCode_ClearCaptureGroup final : public OpCode {
717
public:
718
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
719
13.2M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ClearCaptureGroup; }
720
16.4M
    ALWAYS_INLINE size_t size() const override { return 2; }
721
0
    ALWAYS_INLINE size_t id() const { return argument(0); }
722
0
    ByteString arguments_string() const override { return ByteString::formatted("id={}", id()); }
723
};
724
725
class OpCode_SaveLeftCaptureGroup final : public OpCode {
726
public:
727
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
728
10.8M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveLeftCaptureGroup; }
729
25.1M
    ALWAYS_INLINE size_t size() const override { return 2; }
730
36.9M
    ALWAYS_INLINE size_t id() const { return argument(0); }
731
0
    ByteString arguments_string() const override { return ByteString::formatted("id={}", id()); }
732
};
733
734
class OpCode_SaveRightCaptureGroup final : public OpCode {
735
public:
736
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
737
10.3M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightCaptureGroup; }
738
20.3M
    ALWAYS_INLINE size_t size() const override { return 2; }
739
9.66M
    ALWAYS_INLINE size_t id() const { return argument(0); }
740
0
    ByteString arguments_string() const override { return ByteString::formatted("id={}", id()); }
741
};
742
743
class OpCode_SaveRightNamedCaptureGroup final : public OpCode {
744
public:
745
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
746
63
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::SaveRightNamedCaptureGroup; }
747
59
    ALWAYS_INLINE size_t size() const override { return 4; }
748
0
    ALWAYS_INLINE StringView name() const { return { reinterpret_cast<char*>(argument(0)), length() }; }
749
0
    ALWAYS_INLINE size_t length() const { return argument(1); }
750
0
    ALWAYS_INLINE size_t id() const { return argument(2); }
751
    ByteString arguments_string() const override
752
0
    {
753
0
        return ByteString::formatted("name={}, length={}", name(), length());
754
0
    }
755
};
756
757
class OpCode_Compare final : public OpCode {
758
public:
759
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
760
129M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Compare; }
761
211M
    ALWAYS_INLINE size_t size() const override { return arguments_size() + 3; }
762
158M
    ALWAYS_INLINE size_t arguments_count() const { return argument(0); }
763
211M
    ALWAYS_INLINE size_t arguments_size() const { return argument(1); }
764
    ByteString arguments_string() const override;
765
    Vector<ByteString> variable_arguments_to_byte_string(Optional<MatchInput const&> input = {}) const;
766
    Vector<CompareTypeAndValuePair> flat_compares() const;
767
    static bool matches_character_class(CharClass, u32, bool insensitive);
768
769
private:
770
    ALWAYS_INLINE static void compare_char(MatchInput const& input, MatchState& state, u32 ch1, bool inverse, bool& inverse_matched);
771
    ALWAYS_INLINE static bool compare_string(MatchInput const& input, MatchState& state, RegexStringView str, bool& had_zero_length_match);
772
    ALWAYS_INLINE static void compare_character_class(MatchInput const& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched);
773
    ALWAYS_INLINE static void compare_character_range(MatchInput const& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched);
774
    ALWAYS_INLINE static void compare_property(MatchInput const& input, MatchState& state, Unicode::Property property, bool inverse, bool& inverse_matched);
775
    ALWAYS_INLINE static void compare_general_category(MatchInput const& input, MatchState& state, Unicode::GeneralCategory general_category, bool inverse, bool& inverse_matched);
776
    ALWAYS_INLINE static void compare_script(MatchInput const& input, MatchState& state, Unicode::Script script, bool inverse, bool& inverse_matched);
777
    ALWAYS_INLINE static void compare_script_extension(MatchInput const& input, MatchState& state, Unicode::Script script, bool inverse, bool& inverse_matched);
778
};
779
780
class OpCode_Repeat : public OpCode {
781
public:
782
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
783
15.2M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Repeat; }
784
16.9M
    ALWAYS_INLINE size_t size() const override { return 4; }
785
7.30M
    ALWAYS_INLINE size_t offset() const { return argument(0); }
786
4.36M
    ALWAYS_INLINE u64 count() const { return argument(1); }
787
6.53M
    ALWAYS_INLINE size_t id() const { return argument(2); }
788
    ByteString arguments_string() const override
789
0
    {
790
0
        auto reps = id() < state().repetition_marks.size() ? state().repetition_marks.at(id()) : 0;
791
0
        return ByteString::formatted("offset={} [&{}] count={} id={} rep={}, sp: {}",
792
0
            static_cast<ssize_t>(offset()),
793
0
            state().instruction_position - offset(),
794
0
            count() + 1,
795
0
            id(),
796
0
            reps + 1,
797
0
            state().string_position);
798
0
    }
799
};
800
801
class OpCode_ResetRepeat : public OpCode {
802
public:
803
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
804
3.69M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::ResetRepeat; }
805
8.33M
    ALWAYS_INLINE size_t size() const override { return 2; }
806
13.5M
    ALWAYS_INLINE size_t id() const { return argument(0); }
807
    ByteString arguments_string() const override
808
0
    {
809
0
        auto reps = id() < state().repetition_marks.size() ? state().repetition_marks.at(id()) : 0;
810
0
        return ByteString::formatted("id={} rep={}", id(), reps + 1);
811
0
    }
812
};
813
814
class OpCode_Checkpoint final : public OpCode {
815
public:
816
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
817
23.5M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::Checkpoint; }
818
71.3M
    ALWAYS_INLINE size_t size() const override { return 2; }
819
46.8M
    ALWAYS_INLINE size_t id() const { return argument(0); }
820
0
    ByteString arguments_string() const override { return ByteString::formatted("id={}", id()); }
821
};
822
823
class OpCode_JumpNonEmpty final : public OpCode {
824
public:
825
    ExecutionResult execute(MatchInput const& input, MatchState& state) const override;
826
32.8M
    ALWAYS_INLINE OpCodeId opcode_id() const override { return OpCodeId::JumpNonEmpty; }
827
113M
    ALWAYS_INLINE size_t size() const override { return 4; }
828
62.4M
    ALWAYS_INLINE ssize_t offset() const { return argument(0); }
829
36.7M
    ALWAYS_INLINE ssize_t checkpoint() const { return argument(1); }
830
50.9M
    ALWAYS_INLINE OpCodeId form() const { return (OpCodeId)argument(2); }
831
    ByteString arguments_string() const override
832
0
    {
833
0
        return ByteString::formatted("{} offset={} [&{}], cp={}",
834
0
            opcode_id_name(form()),
835
0
            offset(), state().instruction_position + size() + offset(),
836
0
            checkpoint());
837
0
    }
838
};
839
840
ALWAYS_INLINE OpCode& ByteCode::get_opcode(regex::MatchState& state) const
841
532M
{
842
532M
    OpCodeId opcode_id;
843
532M
    if (auto opcode_ptr = static_cast<DisjointChunks<ByteCodeValueType> const&>(*this).find(state.instruction_position))
844
531M
        opcode_id = (OpCodeId)*opcode_ptr;
845
1.00M
    else
846
1.00M
        opcode_id = OpCodeId::Exit;
847
848
532M
    auto& opcode = get_opcode_by_id(opcode_id);
849
532M
    opcode.set_state(state);
850
532M
    return opcode;
851
532M
}
852
853
ALWAYS_INLINE OpCode& ByteCode::get_opcode_by_id(OpCodeId id) const
854
532M
{
855
532M
    VERIFY(id >= OpCodeId::First && id <= OpCodeId::Last);
856
857
532M
    auto& opcode = s_opcodes[(u32)id];
858
532M
    opcode->set_bytecode(*const_cast<ByteCode*>(this));
859
532M
    return *opcode;
860
532M
}
861
862
template<typename T>
863
bool is(OpCode const&);
864
865
template<typename T>
866
ALWAYS_INLINE bool is(OpCode const&)
867
{
868
    return false;
869
}
870
871
template<typename T>
872
ALWAYS_INLINE bool is(OpCode const* opcode)
873
{
874
    return is<T>(*opcode);
875
}
876
877
template<>
878
ALWAYS_INLINE bool is<OpCode_ForkStay>(OpCode const& opcode)
879
0
{
880
0
    return opcode.opcode_id() == OpCodeId::ForkStay;
881
0
}
882
883
template<>
884
ALWAYS_INLINE bool is<OpCode_Exit>(OpCode const& opcode)
885
0
{
886
0
    return opcode.opcode_id() == OpCodeId::Exit;
887
0
}
888
889
template<>
890
ALWAYS_INLINE bool is<OpCode_Compare>(OpCode const& opcode)
891
0
{
892
0
    return opcode.opcode_id() == OpCodeId::Compare;
893
0
}
894
895
template<typename T>
896
ALWAYS_INLINE T const& to(OpCode const& opcode)
897
0
{
898
0
    return verify_cast<T>(opcode);
899
0
}
900
901
template<typename T>
902
ALWAYS_INLINE T* to(OpCode* opcode)
903
{
904
    return verify_cast<T>(opcode);
905
}
906
907
template<typename T>
908
ALWAYS_INLINE T const* to(OpCode const* opcode)
909
{
910
    return verify_cast<T>(opcode);
911
}
912
913
template<typename T>
914
ALWAYS_INLINE T& to(OpCode& opcode)
915
0
{
916
0
    return verify_cast<T>(opcode);
917
0
}
918
919
}