/src/serenity/Userland/Libraries/LibRegex/RegexOptimizer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #include <AK/QuickSort.h> |
8 | | #include <AK/RedBlackTree.h> |
9 | | #include <AK/Stack.h> |
10 | | #include <LibRegex/Regex.h> |
11 | | #include <LibRegex/RegexBytecodeStreamOptimizer.h> |
12 | | #if REGEX_DEBUG |
13 | | # include <AK/ScopeGuard.h> |
14 | | # include <AK/ScopeLogger.h> |
15 | | #endif |
16 | | |
17 | | namespace regex { |
18 | | |
19 | | using Detail::Block; |
20 | | |
21 | | template<typename Parser> |
22 | | void Regex<Parser>::run_optimization_passes() |
23 | 18 | { |
24 | 18 | parser_result.bytecode.flatten(); |
25 | | |
26 | | // Rewrite fork loops as atomic groups |
27 | | // e.g. a*b -> (ATOMIC a*)b |
28 | 18 | attempt_rewrite_loops_as_atomic_groups(split_basic_blocks(parser_result.bytecode)); |
29 | | |
30 | 18 | parser_result.bytecode.flatten(); |
31 | 18 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::run_optimization_passes() Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::run_optimization_passes() regex::Regex<regex::ECMA262Parser>::run_optimization_passes() Line | Count | Source | 23 | 18 | { | 24 | 18 | parser_result.bytecode.flatten(); | 25 | | | 26 | | // Rewrite fork loops as atomic groups | 27 | | // e.g. a*b -> (ATOMIC a*)b | 28 | 18 | attempt_rewrite_loops_as_atomic_groups(split_basic_blocks(parser_result.bytecode)); | 29 | | | 30 | 18 | parser_result.bytecode.flatten(); | 31 | 18 | } |
|
32 | | |
33 | | template<typename Parser> |
34 | | typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCode const& bytecode) |
35 | 18 | { |
36 | 18 | BasicBlockList block_boundaries; |
37 | 18 | size_t end_of_last_block = 0; |
38 | | |
39 | 18 | auto bytecode_size = bytecode.size(); |
40 | | |
41 | 18 | MatchState state; |
42 | 18 | state.instruction_position = 0; |
43 | 18 | auto check_jump = [&]<typename T>(OpCode const& opcode) { |
44 | 4 | auto& op = static_cast<T const&>(opcode); |
45 | 4 | ssize_t jump_offset = op.size() + op.offset(); |
46 | 4 | if (jump_offset >= 0) { |
47 | 4 | block_boundaries.append({ end_of_last_block, state.instruction_position }); |
48 | 4 | end_of_last_block = state.instruction_position + opcode.size(); |
49 | 4 | } else { |
50 | | // This op jumps back, see if that's within this "block". |
51 | 0 | if (jump_offset + state.instruction_position > end_of_last_block) { |
52 | | // Split the block! |
53 | 0 | block_boundaries.append({ end_of_last_block, jump_offset + state.instruction_position }); |
54 | 0 | block_boundaries.append({ jump_offset + state.instruction_position, state.instruction_position }); |
55 | 0 | end_of_last_block = state.instruction_position + opcode.size(); |
56 | 0 | } else { |
57 | | // Nope, it's just a jump to another block |
58 | 0 | block_boundaries.append({ end_of_last_block, state.instruction_position }); |
59 | 0 | end_of_last_block = state.instruction_position + opcode.size(); |
60 | 0 | } |
61 | 0 | } |
62 | 4 | }; Unexecuted instantiation: _ZZN5regex5RegexINS_16PosixBasicParserEE18split_basic_blocksERKNS_8ByteCodeEENKUlTyRKNS_6OpCodeEE_clINS_11OpCode_JumpEEEDaS8_ Unexecuted instantiation: _ZZN5regex5RegexINS_16PosixBasicParserEE18split_basic_blocksERKNS_8ByteCodeEENKUlTyRKNS_6OpCodeEE_clINS_19OpCode_JumpNonEmptyEEEDaS8_ Unexecuted instantiation: _ZZN5regex5RegexINS_16PosixBasicParserEE18split_basic_blocksERKNS_8ByteCodeEENKUlTyRKNS_6OpCodeEE_clINS_15OpCode_ForkJumpEEEDaS8_ Unexecuted instantiation: _ZZN5regex5RegexINS_16PosixBasicParserEE18split_basic_blocksERKNS_8ByteCodeEENKUlTyRKNS_6OpCodeEE_clINS_15OpCode_ForkStayEEEDaS8_ Unexecuted instantiation: _ZZN5regex5RegexINS_19PosixExtendedParserEE18split_basic_blocksERKNS_8ByteCodeEENKUlTyRKNS_6OpCodeEE_clINS_11OpCode_JumpEEEDaS8_ Unexecuted instantiation: _ZZN5regex5RegexINS_19PosixExtendedParserEE18split_basic_blocksERKNS_8ByteCodeEENKUlTyRKNS_6OpCodeEE_clINS_19OpCode_JumpNonEmptyEEEDaS8_ Unexecuted instantiation: _ZZN5regex5RegexINS_19PosixExtendedParserEE18split_basic_blocksERKNS_8ByteCodeEENKUlTyRKNS_6OpCodeEE_clINS_15OpCode_ForkJumpEEEDaS8_ Unexecuted instantiation: _ZZN5regex5RegexINS_19PosixExtendedParserEE18split_basic_blocksERKNS_8ByteCodeEENKUlTyRKNS_6OpCodeEE_clINS_15OpCode_ForkStayEEEDaS8_ Unexecuted instantiation: _ZZN5regex5RegexINS_13ECMA262ParserEE18split_basic_blocksERKNS_8ByteCodeEENKUlTyRKNS_6OpCodeEE_clINS_11OpCode_JumpEEEDaS8_ Unexecuted instantiation: _ZZN5regex5RegexINS_13ECMA262ParserEE18split_basic_blocksERKNS_8ByteCodeEENKUlTyRKNS_6OpCodeEE_clINS_19OpCode_JumpNonEmptyEEEDaS8_ Unexecuted instantiation: _ZZN5regex5RegexINS_13ECMA262ParserEE18split_basic_blocksERKNS_8ByteCodeEENKUlTyRKNS_6OpCodeEE_clINS_15OpCode_ForkJumpEEEDaS8_ _ZZN5regex5RegexINS_13ECMA262ParserEE18split_basic_blocksERKNS_8ByteCodeEENKUlTyRKNS_6OpCodeEE_clINS_15OpCode_ForkStayEEEDaS8_ Line | Count | Source | 43 | 4 | auto check_jump = [&]<typename T>(OpCode const& opcode) { | 44 | 4 | auto& op = static_cast<T const&>(opcode); | 45 | 4 | ssize_t jump_offset = op.size() + op.offset(); | 46 | 4 | if (jump_offset >= 0) { | 47 | 4 | block_boundaries.append({ end_of_last_block, state.instruction_position }); | 48 | 4 | end_of_last_block = state.instruction_position + opcode.size(); | 49 | 4 | } else { | 50 | | // This op jumps back, see if that's within this "block". | 51 | 0 | if (jump_offset + state.instruction_position > end_of_last_block) { | 52 | | // Split the block! | 53 | 0 | block_boundaries.append({ end_of_last_block, jump_offset + state.instruction_position }); | 54 | 0 | block_boundaries.append({ jump_offset + state.instruction_position, state.instruction_position }); | 55 | 0 | end_of_last_block = state.instruction_position + opcode.size(); | 56 | 0 | } else { | 57 | | // Nope, it's just a jump to another block | 58 | 0 | block_boundaries.append({ end_of_last_block, state.instruction_position }); | 59 | 0 | end_of_last_block = state.instruction_position + opcode.size(); | 60 | 0 | } | 61 | 0 | } | 62 | 4 | }; |
|
63 | 44.0k | for (;;) { |
64 | 44.0k | auto& opcode = bytecode.get_opcode(state); |
65 | | |
66 | 44.0k | switch (opcode.opcode_id()) { |
67 | 0 | case OpCodeId::Jump: |
68 | 0 | check_jump.template operator()<OpCode_Jump>(opcode); |
69 | 0 | break; |
70 | 0 | case OpCodeId::JumpNonEmpty: |
71 | 0 | check_jump.template operator()<OpCode_JumpNonEmpty>(opcode); |
72 | 0 | break; |
73 | 0 | case OpCodeId::ForkJump: |
74 | 0 | check_jump.template operator()<OpCode_ForkJump>(opcode); |
75 | 0 | break; |
76 | 4 | case OpCodeId::ForkStay: |
77 | 4 | check_jump.template operator()<OpCode_ForkStay>(opcode); |
78 | 4 | break; |
79 | 0 | case OpCodeId::FailForks: |
80 | 0 | block_boundaries.append({ end_of_last_block, state.instruction_position }); |
81 | 0 | end_of_last_block = state.instruction_position + opcode.size(); |
82 | 0 | break; |
83 | 0 | case OpCodeId::Repeat: { |
84 | | // Repeat produces two blocks, one containing its repeated expr, and one after that. |
85 | 0 | auto repeat_start = state.instruction_position - static_cast<OpCode_Repeat const&>(opcode).offset(); |
86 | 0 | if (repeat_start > end_of_last_block) |
87 | 0 | block_boundaries.append({ end_of_last_block, repeat_start }); |
88 | 0 | block_boundaries.append({ repeat_start, state.instruction_position }); |
89 | 0 | end_of_last_block = state.instruction_position + opcode.size(); |
90 | 0 | break; |
91 | 0 | } |
92 | 44.0k | default: |
93 | 44.0k | break; |
94 | 44.0k | } |
95 | | |
96 | 44.0k | auto next_ip = state.instruction_position + opcode.size(); |
97 | 44.0k | if (next_ip < bytecode_size) |
98 | 44.0k | state.instruction_position = next_ip; |
99 | 18 | else |
100 | 18 | break; |
101 | 44.0k | } |
102 | | |
103 | 18 | if (end_of_last_block < bytecode_size) |
104 | 5 | block_boundaries.append({ end_of_last_block, bytecode_size }); |
105 | | |
106 | 18 | quick_sort(block_boundaries, [](auto& a, auto& b) { return a.start < b.start; }); Unexecuted instantiation: auto regex::Regex<regex::PosixBasicParser>::split_basic_blocks(regex::ByteCode const&)::{lambda(auto:1&, auto:2&)#1}::operator()<regex::Detail::Block, regex::Detail>(regex::Detail::Block&, regex::Detail&) const Unexecuted instantiation: auto regex::Regex<regex::PosixExtendedParser>::split_basic_blocks(regex::ByteCode const&)::{lambda(auto:1&, auto:2&)#1}::operator()<regex::Detail::Block, regex::Detail>(regex::Detail::Block&, regex::Detail&) const auto regex::Regex<regex::ECMA262Parser>::split_basic_blocks(regex::ByteCode const&)::{lambda(auto:1&, auto:2&)#1}::operator()<regex::Detail::Block, regex::Detail>(regex::Detail::Block&, regex::Detail&) const Line | Count | Source | 106 | 8 | quick_sort(block_boundaries, [](auto& a, auto& b) { return a.start < b.start; }); |
|
107 | | |
108 | 18 | return block_boundaries; |
109 | 18 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::split_basic_blocks(regex::ByteCode const&) Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::split_basic_blocks(regex::ByteCode const&) regex::Regex<regex::ECMA262Parser>::split_basic_blocks(regex::ByteCode const&) Line | Count | Source | 35 | 18 | { | 36 | 18 | BasicBlockList block_boundaries; | 37 | 18 | size_t end_of_last_block = 0; | 38 | | | 39 | 18 | auto bytecode_size = bytecode.size(); | 40 | | | 41 | 18 | MatchState state; | 42 | 18 | state.instruction_position = 0; | 43 | 18 | auto check_jump = [&]<typename T>(OpCode const& opcode) { | 44 | 18 | auto& op = static_cast<T const&>(opcode); | 45 | 18 | ssize_t jump_offset = op.size() + op.offset(); | 46 | 18 | if (jump_offset >= 0) { | 47 | 18 | block_boundaries.append({ end_of_last_block, state.instruction_position }); | 48 | 18 | end_of_last_block = state.instruction_position + opcode.size(); | 49 | 18 | } else { | 50 | | // This op jumps back, see if that's within this "block". | 51 | 18 | if (jump_offset + state.instruction_position > end_of_last_block) { | 52 | | // Split the block! | 53 | 18 | block_boundaries.append({ end_of_last_block, jump_offset + state.instruction_position }); | 54 | 18 | block_boundaries.append({ jump_offset + state.instruction_position, state.instruction_position }); | 55 | 18 | end_of_last_block = state.instruction_position + opcode.size(); | 56 | 18 | } else { | 57 | | // Nope, it's just a jump to another block | 58 | 18 | block_boundaries.append({ end_of_last_block, state.instruction_position }); | 59 | 18 | end_of_last_block = state.instruction_position + opcode.size(); | 60 | 18 | } | 61 | 18 | } | 62 | 18 | }; | 63 | 44.0k | for (;;) { | 64 | 44.0k | auto& opcode = bytecode.get_opcode(state); | 65 | | | 66 | 44.0k | switch (opcode.opcode_id()) { | 67 | 0 | case OpCodeId::Jump: | 68 | 0 | check_jump.template operator()<OpCode_Jump>(opcode); | 69 | 0 | break; | 70 | 0 | case OpCodeId::JumpNonEmpty: | 71 | 0 | check_jump.template operator()<OpCode_JumpNonEmpty>(opcode); | 72 | 0 | break; | 73 | 0 | case OpCodeId::ForkJump: | 74 | 0 | check_jump.template operator()<OpCode_ForkJump>(opcode); | 75 | 0 | break; | 76 | 4 | case OpCodeId::ForkStay: | 77 | 4 | check_jump.template operator()<OpCode_ForkStay>(opcode); | 78 | 4 | break; | 79 | 0 | case OpCodeId::FailForks: | 80 | 0 | block_boundaries.append({ end_of_last_block, state.instruction_position }); | 81 | 0 | end_of_last_block = state.instruction_position + opcode.size(); | 82 | 0 | break; | 83 | 0 | case OpCodeId::Repeat: { | 84 | | // Repeat produces two blocks, one containing its repeated expr, and one after that. | 85 | 0 | auto repeat_start = state.instruction_position - static_cast<OpCode_Repeat const&>(opcode).offset(); | 86 | 0 | if (repeat_start > end_of_last_block) | 87 | 0 | block_boundaries.append({ end_of_last_block, repeat_start }); | 88 | 0 | block_boundaries.append({ repeat_start, state.instruction_position }); | 89 | 0 | end_of_last_block = state.instruction_position + opcode.size(); | 90 | 0 | break; | 91 | 0 | } | 92 | 44.0k | default: | 93 | 44.0k | break; | 94 | 44.0k | } | 95 | | | 96 | 44.0k | auto next_ip = state.instruction_position + opcode.size(); | 97 | 44.0k | if (next_ip < bytecode_size) | 98 | 44.0k | state.instruction_position = next_ip; | 99 | 18 | else | 100 | 18 | break; | 101 | 44.0k | } | 102 | | | 103 | 18 | if (end_of_last_block < bytecode_size) | 104 | 5 | block_boundaries.append({ end_of_last_block, bytecode_size }); | 105 | | | 106 | 18 | quick_sort(block_boundaries, [](auto& a, auto& b) { return a.start < b.start; }); | 107 | | | 108 | 18 | return block_boundaries; | 109 | 18 | } |
|
110 | | |
111 | | enum class AtomicRewritePreconditionResult { |
112 | | SatisfiedWithProperHeader, |
113 | | SatisfiedWithEmptyHeader, |
114 | | NotSatisfied, |
115 | | }; |
116 | | static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block const& repeated_block, Block const& following_block) |
117 | 0 | { |
118 | 0 | Vector<Vector<CompareTypeAndValuePair>> repeated_values; |
119 | 0 | HashTable<size_t> active_capture_groups; |
120 | 0 | MatchState state; |
121 | 0 | for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) { |
122 | 0 | auto& opcode = bytecode.get_opcode(state); |
123 | 0 | switch (opcode.opcode_id()) { |
124 | 0 | case OpCodeId::Compare: { |
125 | 0 | auto compares = static_cast<OpCode_Compare const&>(opcode).flat_compares(); |
126 | 0 | if (repeated_values.is_empty() && any_of(compares, [](auto& compare) { return compare.type == CharacterCompareType::AnyChar; })) |
127 | 0 | return AtomicRewritePreconditionResult::NotSatisfied; |
128 | 0 | repeated_values.append(move(compares)); |
129 | 0 | break; |
130 | 0 | } |
131 | 0 | case OpCodeId::CheckBegin: |
132 | 0 | case OpCodeId::CheckEnd: |
133 | 0 | if (repeated_values.is_empty()) |
134 | 0 | return AtomicRewritePreconditionResult::SatisfiedWithProperHeader; |
135 | 0 | break; |
136 | 0 | case OpCodeId::CheckBoundary: |
137 | | // FIXME: What should we do with these? for now, let's fail. |
138 | 0 | return AtomicRewritePreconditionResult::NotSatisfied; |
139 | 0 | case OpCodeId::Restore: |
140 | 0 | case OpCodeId::GoBack: |
141 | 0 | return AtomicRewritePreconditionResult::NotSatisfied; |
142 | 0 | case OpCodeId::SaveRightCaptureGroup: |
143 | 0 | active_capture_groups.set(static_cast<OpCode_SaveRightCaptureGroup const&>(opcode).id()); |
144 | 0 | break; |
145 | 0 | case OpCodeId::SaveLeftCaptureGroup: |
146 | 0 | active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id()); |
147 | 0 | break; |
148 | 0 | default: |
149 | 0 | break; |
150 | 0 | } |
151 | | |
152 | 0 | state.instruction_position += opcode.size(); |
153 | 0 | } |
154 | 0 | dbgln_if(REGEX_DEBUG, "Found {} entries in reference", repeated_values.size()); |
155 | 0 | dbgln_if(REGEX_DEBUG, "Found {} active capture groups", active_capture_groups.size()); |
156 | |
|
157 | 0 | bool following_block_has_at_least_one_compare = false; |
158 | | // Find the first compare in the following block, it must NOT match any of the values in `repeated_values'. |
159 | 0 | for (state.instruction_position = following_block.start; state.instruction_position < following_block.end;) { |
160 | 0 | auto& opcode = bytecode.get_opcode(state); |
161 | 0 | switch (opcode.opcode_id()) { |
162 | | // Note: These have to exist since we're effectively repeating the following block as well |
163 | 0 | case OpCodeId::SaveRightCaptureGroup: |
164 | 0 | active_capture_groups.set(static_cast<OpCode_SaveRightCaptureGroup const&>(opcode).id()); |
165 | 0 | break; |
166 | 0 | case OpCodeId::SaveLeftCaptureGroup: |
167 | 0 | active_capture_groups.set(static_cast<OpCode_SaveLeftCaptureGroup const&>(opcode).id()); |
168 | 0 | break; |
169 | 0 | case OpCodeId::Compare: { |
170 | 0 | following_block_has_at_least_one_compare = true; |
171 | | // We found a compare, let's see what it has. |
172 | 0 | auto compares = static_cast<OpCode_Compare const&>(opcode).flat_compares(); |
173 | 0 | if (compares.is_empty()) |
174 | 0 | break; |
175 | | |
176 | 0 | if (any_of(compares, [&](auto& compare) { |
177 | 0 | return compare.type == CharacterCompareType::AnyChar |
178 | 0 | || (compare.type == CharacterCompareType::Reference && active_capture_groups.contains(compare.value)); |
179 | 0 | })) |
180 | 0 | return AtomicRewritePreconditionResult::NotSatisfied; |
181 | | |
182 | 0 | for (auto& repeated_value : repeated_values) { |
183 | | // FIXME: This is too naive! |
184 | 0 | if (any_of(repeated_value, [](auto& compare) { return compare.type == CharacterCompareType::AnyChar; })) |
185 | 0 | return AtomicRewritePreconditionResult::NotSatisfied; |
186 | | |
187 | 0 | for (auto& repeated_compare : repeated_value) { |
188 | | // FIXME: This is too naive! it will miss _tons_ of cases since it doesn't check ranges! |
189 | 0 | if (any_of(compares, [&](auto& compare) { return compare.type == repeated_compare.type && compare.value == repeated_compare.value; })) |
190 | 0 | return AtomicRewritePreconditionResult::NotSatisfied; |
191 | 0 | } |
192 | 0 | } |
193 | 0 | return AtomicRewritePreconditionResult::SatisfiedWithProperHeader; |
194 | 0 | } |
195 | 0 | case OpCodeId::CheckBegin: |
196 | 0 | case OpCodeId::CheckEnd: |
197 | 0 | return AtomicRewritePreconditionResult::SatisfiedWithProperHeader; // Nothing can match the end! |
198 | 0 | case OpCodeId::CheckBoundary: |
199 | | // FIXME: What should we do with these? For now, consider them a failure. |
200 | 0 | return AtomicRewritePreconditionResult::NotSatisfied; |
201 | 0 | default: |
202 | 0 | break; |
203 | 0 | } |
204 | | |
205 | 0 | state.instruction_position += opcode.size(); |
206 | 0 | } |
207 | | |
208 | 0 | if (following_block_has_at_least_one_compare) |
209 | 0 | return AtomicRewritePreconditionResult::SatisfiedWithProperHeader; |
210 | 0 | return AtomicRewritePreconditionResult::SatisfiedWithEmptyHeader; |
211 | 0 | } |
212 | | |
213 | | template<typename Parser> |
214 | | void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const& basic_blocks) |
215 | 18 | { |
216 | 18 | auto& bytecode = parser_result.bytecode; |
217 | 18 | if constexpr (REGEX_DEBUG) { |
218 | 18 | RegexDebug dbg; |
219 | 18 | dbg.print_bytecode(*this); |
220 | 18 | for (auto const& block : basic_blocks) |
221 | 18 | dbgln("block from {} to {}", block.start, block.end); |
222 | 18 | } |
223 | | |
224 | | // A pattern such as: |
225 | | // bb0 | RE0 |
226 | | // | ForkX bb0 |
227 | | // ------------------------- |
228 | | // bb1 | RE1 |
229 | | // can be rewritten as: |
230 | | // ------------------------- |
231 | | // bb0 | RE0 |
232 | | // | ForkReplaceX bb0 |
233 | | // ------------------------- |
234 | | // bb1 | RE1 |
235 | | // provided that first(RE1) not-in end(RE0), which is to say |
236 | | // that RE1 cannot start with whatever RE0 has matched (ever). |
237 | | // |
238 | | // Alternatively, a second form of this pattern can also occur: |
239 | | // bb0 | * |
240 | | // | ForkX bb2 |
241 | | // ------------------------ |
242 | | // bb1 | RE0 |
243 | | // | Jump bb0 |
244 | | // ------------------------ |
245 | | // bb2 | RE1 |
246 | | // which can be transformed (with the same preconditions) to: |
247 | | // bb0 | * |
248 | | // | ForkReplaceX bb2 |
249 | | // ------------------------ |
250 | | // bb1 | RE0 |
251 | | // | Jump bb0 |
252 | | // ------------------------ |
253 | | // bb2 | RE1 |
254 | | |
255 | 18 | enum class AlternateForm { |
256 | 18 | DirectLoopWithoutHeader, // loop without proper header, a block forking to itself. i.e. the first form. |
257 | 18 | DirectLoopWithoutHeaderAndEmptyFollow, // loop without proper header, a block forking to itself. i.e. the first form but with RE1 being empty. |
258 | 18 | DirectLoopWithHeader, // loop with proper header, i.e. the second form. |
259 | 18 | }; |
260 | 18 | struct CandidateBlock { |
261 | 18 | Block forking_block; |
262 | 18 | Optional<Block> new_target_block; |
263 | 18 | AlternateForm form; |
264 | 18 | }; |
265 | 18 | Vector<CandidateBlock> candidate_blocks; |
266 | | |
267 | 18 | auto is_an_eligible_jump = [](OpCode const& opcode, size_t ip, size_t block_start, AlternateForm alternate_form) { |
268 | 13 | switch (opcode.opcode_id()) { |
269 | 0 | case OpCodeId::JumpNonEmpty: { |
270 | 0 | auto const& op = static_cast<OpCode_JumpNonEmpty const&>(opcode); |
271 | 0 | auto form = op.form(); |
272 | 0 | if (form != OpCodeId::Jump && alternate_form == AlternateForm::DirectLoopWithHeader) |
273 | 0 | return false; |
274 | 0 | if (form != OpCodeId::ForkJump && form != OpCodeId::ForkStay && alternate_form == AlternateForm::DirectLoopWithoutHeader) |
275 | 0 | return false; |
276 | 0 | return op.offset() + ip + opcode.size() == block_start; |
277 | 0 | } |
278 | 0 | case OpCodeId::ForkJump: |
279 | 0 | if (alternate_form == AlternateForm::DirectLoopWithHeader) |
280 | 0 | return false; |
281 | 0 | return static_cast<OpCode_ForkJump const&>(opcode).offset() + ip + opcode.size() == block_start; |
282 | 7 | case OpCodeId::ForkStay: |
283 | 7 | if (alternate_form == AlternateForm::DirectLoopWithHeader) |
284 | 3 | return false; |
285 | 4 | return static_cast<OpCode_ForkStay const&>(opcode).offset() + ip + opcode.size() == block_start; |
286 | 0 | case OpCodeId::Jump: |
287 | | // Infinite loop does *not* produce forks. |
288 | 0 | if (alternate_form == AlternateForm::DirectLoopWithoutHeader) |
289 | 0 | return false; |
290 | 0 | if (alternate_form == AlternateForm::DirectLoopWithHeader) |
291 | 0 | return static_cast<OpCode_Jump const&>(opcode).offset() + ip + opcode.size() == block_start; |
292 | 0 | VERIFY_NOT_REACHED(); |
293 | 6 | default: |
294 | 6 | return false; |
295 | 13 | } |
296 | 13 | }; Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::{lambda(regex::OpCode const&, unsigned long, unsigned long, regex::Regex<regex::PosixBasicParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::AlternateForm)#1}::operator()(regex::OpCode const&, unsigned long, unsigned long, regex::Regex<regex::PosixBasicParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::AlternateForm) const Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::{lambda(regex::OpCode const&, unsigned long, unsigned long, regex::Regex<regex::PosixExtendedParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::AlternateForm)#1}::operator()(regex::OpCode const&, unsigned long, unsigned long, regex::Regex<regex::PosixExtendedParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::AlternateForm) const regex::Regex<regex::ECMA262Parser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::{lambda(regex::OpCode const&, unsigned long, unsigned long, regex::Regex<regex::ECMA262Parser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::AlternateForm)#1}::operator()(regex::OpCode const&, unsigned long, unsigned long, regex::Regex<regex::ECMA262Parser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::AlternateForm) const Line | Count | Source | 267 | 13 | auto is_an_eligible_jump = [](OpCode const& opcode, size_t ip, size_t block_start, AlternateForm alternate_form) { | 268 | 13 | switch (opcode.opcode_id()) { | 269 | 0 | case OpCodeId::JumpNonEmpty: { | 270 | 0 | auto const& op = static_cast<OpCode_JumpNonEmpty const&>(opcode); | 271 | 0 | auto form = op.form(); | 272 | 0 | if (form != OpCodeId::Jump && alternate_form == AlternateForm::DirectLoopWithHeader) | 273 | 0 | return false; | 274 | 0 | if (form != OpCodeId::ForkJump && form != OpCodeId::ForkStay && alternate_form == AlternateForm::DirectLoopWithoutHeader) | 275 | 0 | return false; | 276 | 0 | return op.offset() + ip + opcode.size() == block_start; | 277 | 0 | } | 278 | 0 | case OpCodeId::ForkJump: | 279 | 0 | if (alternate_form == AlternateForm::DirectLoopWithHeader) | 280 | 0 | return false; | 281 | 0 | return static_cast<OpCode_ForkJump const&>(opcode).offset() + ip + opcode.size() == block_start; | 282 | 7 | case OpCodeId::ForkStay: | 283 | 7 | if (alternate_form == AlternateForm::DirectLoopWithHeader) | 284 | 3 | return false; | 285 | 4 | return static_cast<OpCode_ForkStay const&>(opcode).offset() + ip + opcode.size() == block_start; | 286 | 0 | case OpCodeId::Jump: | 287 | | // Infinite loop does *not* produce forks. | 288 | 0 | if (alternate_form == AlternateForm::DirectLoopWithoutHeader) | 289 | 0 | return false; | 290 | 0 | if (alternate_form == AlternateForm::DirectLoopWithHeader) | 291 | 0 | return static_cast<OpCode_Jump const&>(opcode).offset() + ip + opcode.size() == block_start; | 292 | 0 | VERIFY_NOT_REACHED(); | 293 | 6 | default: | 294 | 6 | return false; | 295 | 13 | } | 296 | 13 | }; |
|
297 | 27 | for (size_t i = 0; i < basic_blocks.size(); ++i) { |
298 | 9 | auto forking_block = basic_blocks[i]; |
299 | 9 | Optional<Block> fork_fallback_block; |
300 | 9 | if (i + 1 < basic_blocks.size()) |
301 | 4 | fork_fallback_block = basic_blocks[i + 1]; |
302 | 9 | MatchState state; |
303 | | // Check if the last instruction in this block is a jump to the block itself: |
304 | 9 | { |
305 | 9 | state.instruction_position = forking_block.end; |
306 | 9 | auto& opcode = bytecode.get_opcode(state); |
307 | 9 | if (is_an_eligible_jump(opcode, state.instruction_position, forking_block.start, AlternateForm::DirectLoopWithoutHeader)) { |
308 | | // We've found RE0 (and RE1 is just the following block, if any), let's see if the precondition applies. |
309 | | // if RE1 is empty, there's no first(RE1), so this is an automatic pass. |
310 | 0 | if (!fork_fallback_block.has_value() || fork_fallback_block->end == fork_fallback_block->start) { |
311 | 0 | candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeader }); |
312 | 0 | break; |
313 | 0 | } |
314 | | |
315 | 0 | auto precondition = block_satisfies_atomic_rewrite_precondition(bytecode, forking_block, *fork_fallback_block); |
316 | 0 | if (precondition == AtomicRewritePreconditionResult::SatisfiedWithProperHeader) { |
317 | 0 | candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeader }); |
318 | 0 | break; |
319 | 0 | } |
320 | 0 | if (precondition == AtomicRewritePreconditionResult::SatisfiedWithEmptyHeader) { |
321 | 0 | candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeaderAndEmptyFollow }); |
322 | 0 | break; |
323 | 0 | } |
324 | 0 | } |
325 | 9 | } |
326 | | // Check if the last instruction in the last block is a direct jump to this block |
327 | 9 | if (fork_fallback_block.has_value()) { |
328 | 4 | state.instruction_position = fork_fallback_block->end; |
329 | 4 | auto& opcode = bytecode.get_opcode(state); |
330 | 4 | if (is_an_eligible_jump(opcode, state.instruction_position, forking_block.start, AlternateForm::DirectLoopWithHeader)) { |
331 | | // We've found bb1 and bb0, let's just make sure that bb0 forks to bb2. |
332 | 0 | state.instruction_position = forking_block.end; |
333 | 0 | auto& opcode = bytecode.get_opcode(state); |
334 | 0 | if (opcode.opcode_id() == OpCodeId::ForkJump || opcode.opcode_id() == OpCodeId::ForkStay) { |
335 | 0 | Optional<Block> block_following_fork_fallback; |
336 | 0 | if (i + 2 < basic_blocks.size()) |
337 | 0 | block_following_fork_fallback = basic_blocks[i + 2]; |
338 | 0 | if (!block_following_fork_fallback.has_value() |
339 | 0 | || block_satisfies_atomic_rewrite_precondition(bytecode, *fork_fallback_block, *block_following_fork_fallback) != AtomicRewritePreconditionResult::NotSatisfied) { |
340 | 0 | candidate_blocks.append({ forking_block, {}, AlternateForm::DirectLoopWithHeader }); |
341 | 0 | break; |
342 | 0 | } |
343 | 0 | } |
344 | 0 | } |
345 | 4 | } |
346 | 9 | } |
347 | | |
348 | 18 | dbgln_if(REGEX_DEBUG, "Found {} candidate blocks", candidate_blocks.size()); |
349 | 18 | if (candidate_blocks.is_empty()) { |
350 | 18 | dbgln_if(REGEX_DEBUG, "Failed to find anything for {}", pattern_value); |
351 | 18 | return; |
352 | 18 | } |
353 | | |
354 | 0 | RedBlackTree<size_t, size_t> needed_patches; |
355 | | |
356 | | // Reverse the blocks, so we can patch the bytecode without messing with the latter patches. |
357 | 0 | quick_sort(candidate_blocks, [](auto& a, auto& b) { return b.forking_block.start > a.forking_block.start; }); Unexecuted instantiation: auto regex::Regex<regex::PosixBasicParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::{lambda(auto:1&, auto:2&)#1}::operator()<regex::Regex<regex::PosixBasicParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::CandidateBlock, {lambda(auto:1&, auto:2&)#1}::operator()>(regex::Regex<regex::PosixBasicParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::CandidateBlock&, {lambda(auto:1&, auto:2&)#1}::operator()&) const Unexecuted instantiation: auto regex::Regex<regex::PosixExtendedParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::{lambda(auto:1&, auto:2&)#1}::operator()<regex::Regex<regex::PosixExtendedParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::CandidateBlock, {lambda(auto:1&, auto:2&)#1}::operator()>(regex::Regex<regex::PosixExtendedParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::CandidateBlock&, {lambda(auto:1&, auto:2&)#1}::operator()&) const Unexecuted instantiation: auto regex::Regex<regex::ECMA262Parser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::{lambda(auto:1&, auto:2&)#1}::operator()<regex::Regex<regex::ECMA262Parser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::CandidateBlock, {lambda(auto:1&, auto:2&)#1}::operator()>(regex::Regex<regex::ECMA262Parser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::CandidateBlock&, {lambda(auto:1&, auto:2&)#1}::operator()&) const |
358 | 0 | for (auto& candidate : candidate_blocks) { |
359 | | // Note that both forms share a ForkReplace patch in forking_block. |
360 | | // Patch the ForkX in forking_block to be a ForkReplaceX instead. |
361 | 0 | auto& opcode_id = bytecode[candidate.forking_block.end]; |
362 | 0 | if (opcode_id == (ByteCodeValueType)OpCodeId::ForkStay) { |
363 | 0 | opcode_id = (ByteCodeValueType)OpCodeId::ForkReplaceStay; |
364 | 0 | } else if (opcode_id == (ByteCodeValueType)OpCodeId::ForkJump) { |
365 | 0 | opcode_id = (ByteCodeValueType)OpCodeId::ForkReplaceJump; |
366 | 0 | } else if (opcode_id == (ByteCodeValueType)OpCodeId::JumpNonEmpty) { |
367 | 0 | auto& jump_opcode_id = bytecode[candidate.forking_block.end + 3]; |
368 | 0 | if (jump_opcode_id == (ByteCodeValueType)OpCodeId::ForkStay) |
369 | 0 | jump_opcode_id = (ByteCodeValueType)OpCodeId::ForkReplaceStay; |
370 | 0 | else if (jump_opcode_id == (ByteCodeValueType)OpCodeId::ForkJump) |
371 | 0 | jump_opcode_id = (ByteCodeValueType)OpCodeId::ForkReplaceJump; |
372 | 0 | else |
373 | 0 | VERIFY_NOT_REACHED(); |
374 | 0 | } else { |
375 | 0 | VERIFY_NOT_REACHED(); |
376 | 0 | } |
377 | 0 | } |
378 | |
|
379 | 0 | if (!needed_patches.is_empty()) { |
380 | 0 | MatchState state; |
381 | 0 | auto bytecode_size = bytecode.size(); |
382 | 0 | state.instruction_position = 0; |
383 | 0 | struct Patch { |
384 | 0 | ssize_t value; |
385 | 0 | size_t offset; |
386 | 0 | bool should_negate { false }; |
387 | 0 | }; |
388 | 0 | for (;;) { |
389 | 0 | if (state.instruction_position >= bytecode_size) |
390 | 0 | break; |
391 | | |
392 | 0 | auto& opcode = bytecode.get_opcode(state); |
393 | 0 | Stack<Patch, 2> patch_points; |
394 | |
|
395 | 0 | switch (opcode.opcode_id()) { |
396 | 0 | case OpCodeId::Jump: |
397 | 0 | patch_points.push({ static_cast<OpCode_Jump const&>(opcode).offset(), state.instruction_position + 1 }); |
398 | 0 | break; |
399 | 0 | case OpCodeId::JumpNonEmpty: |
400 | 0 | patch_points.push({ static_cast<OpCode_JumpNonEmpty const&>(opcode).offset(), state.instruction_position + 1 }); |
401 | 0 | patch_points.push({ static_cast<OpCode_JumpNonEmpty const&>(opcode).checkpoint(), state.instruction_position + 2 }); |
402 | 0 | break; |
403 | 0 | case OpCodeId::ForkJump: |
404 | 0 | patch_points.push({ static_cast<OpCode_ForkJump const&>(opcode).offset(), state.instruction_position + 1 }); |
405 | 0 | break; |
406 | 0 | case OpCodeId::ForkStay: |
407 | 0 | patch_points.push({ static_cast<OpCode_ForkStay const&>(opcode).offset(), state.instruction_position + 1 }); |
408 | 0 | break; |
409 | 0 | case OpCodeId::Repeat: |
410 | 0 | patch_points.push({ -(ssize_t) static_cast<OpCode_Repeat const&>(opcode).offset(), state.instruction_position + 1, true }); |
411 | 0 | break; |
412 | 0 | default: |
413 | 0 | break; |
414 | 0 | } |
415 | | |
416 | 0 | while (!patch_points.is_empty()) { |
417 | 0 | auto& patch_point = patch_points.top(); |
418 | 0 | auto target_offset = patch_point.value + state.instruction_position + opcode.size(); |
419 | |
|
420 | 0 | constexpr auto do_patch = [](auto& patch_it, auto& patch_point, auto& target_offset, auto& bytecode, auto ip) { |
421 | 0 | if (patch_it.key() == ip) |
422 | 0 | return; |
423 | | |
424 | 0 | if (patch_point.value < 0 && target_offset <= patch_it.key() && ip > patch_it.key()) |
425 | 0 | bytecode[patch_point.offset] += (patch_point.should_negate ? 1 : -1) * (*patch_it); |
426 | 0 | else if (patch_point.value > 0 && target_offset >= patch_it.key() && ip < patch_it.key()) |
427 | 0 | bytecode[patch_point.offset] += (patch_point.should_negate ? -1 : 1) * (*patch_it); |
428 | 0 | }; Unexecuted instantiation: auto regex::Regex<regex::PosixBasicParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::{lambda(auto:1&, auto:2&, auto:3&, auto:4&, auto:5)#1}::operator()<AK::RedBlackTreeIterator<AK::RedBlackTree<unsigned long, unsigned long> const, unsigned long const>, regex::Regex<regex::PosixBasicParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::Patch, unsigned long, regex::ByteCode, unsigned long>(AK::RedBlackTreeIterator<AK::RedBlackTree<unsigned long, unsigned long> const, unsigned long const>&, regex::Regex<regex::PosixBasicParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::Patch&, unsigned long&, regex::ByteCode&, unsigned long) const Unexecuted instantiation: auto regex::Regex<regex::PosixExtendedParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::{lambda(auto:1&, auto:2&, auto:3&, auto:4&, auto:5)#1}::operator()<AK::RedBlackTreeIterator<AK::RedBlackTree<unsigned long, unsigned long> const, unsigned long const>, regex::Regex<regex::PosixExtendedParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::Patch, unsigned long, regex::ByteCode, unsigned long>(AK::RedBlackTreeIterator<AK::RedBlackTree<unsigned long, unsigned long> const, unsigned long const>&, regex::Regex<regex::PosixExtendedParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::Patch&, unsigned long&, regex::ByteCode&, unsigned long) const Unexecuted instantiation: auto regex::Regex<regex::ECMA262Parser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::{lambda(auto:1&, auto:2&, auto:3&, auto:4&, auto:5)#1}::operator()<AK::RedBlackTreeIterator<AK::RedBlackTree<unsigned long, unsigned long> const, unsigned long const>, regex::Regex<regex::ECMA262Parser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::Patch, unsigned long, regex::ByteCode, unsigned long>(AK::RedBlackTreeIterator<AK::RedBlackTree<unsigned long, unsigned long> const, unsigned long const>&, regex::Regex<regex::ECMA262Parser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&)::Patch&, unsigned long&, regex::ByteCode&, unsigned long) const |
429 | |
|
430 | 0 | if (auto patch_it = needed_patches.find_largest_not_above_iterator(target_offset); !patch_it.is_end()) |
431 | 0 | do_patch(patch_it, patch_point, target_offset, bytecode, state.instruction_position); |
432 | 0 | else if (auto patch_it = needed_patches.find_largest_not_above_iterator(state.instruction_position); !patch_it.is_end()) |
433 | 0 | do_patch(patch_it, patch_point, target_offset, bytecode, state.instruction_position); |
434 | |
|
435 | 0 | patch_points.pop(); |
436 | 0 | } |
437 | |
|
438 | 0 | state.instruction_position += opcode.size(); |
439 | 0 | } |
440 | 0 | } |
441 | | |
442 | 0 | if constexpr (REGEX_DEBUG) { |
443 | 0 | warnln("Transformed to:"); |
444 | 0 | RegexDebug dbg; |
445 | 0 | dbg.print_bytecode(*this); |
446 | 0 | } |
447 | 0 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&) Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&) regex::Regex<regex::ECMA262Parser>::attempt_rewrite_loops_as_atomic_groups(AK::Vector<regex::Detail::Block, 0ul> const&) Line | Count | Source | 215 | 18 | { | 216 | 18 | auto& bytecode = parser_result.bytecode; | 217 | 18 | if constexpr (REGEX_DEBUG) { | 218 | 18 | RegexDebug dbg; | 219 | 18 | dbg.print_bytecode(*this); | 220 | 18 | for (auto const& block : basic_blocks) | 221 | 18 | dbgln("block from {} to {}", block.start, block.end); | 222 | 18 | } | 223 | | | 224 | | // A pattern such as: | 225 | | // bb0 | RE0 | 226 | | // | ForkX bb0 | 227 | | // ------------------------- | 228 | | // bb1 | RE1 | 229 | | // can be rewritten as: | 230 | | // ------------------------- | 231 | | // bb0 | RE0 | 232 | | // | ForkReplaceX bb0 | 233 | | // ------------------------- | 234 | | // bb1 | RE1 | 235 | | // provided that first(RE1) not-in end(RE0), which is to say | 236 | | // that RE1 cannot start with whatever RE0 has matched (ever). | 237 | | // | 238 | | // Alternatively, a second form of this pattern can also occur: | 239 | | // bb0 | * | 240 | | // | ForkX bb2 | 241 | | // ------------------------ | 242 | | // bb1 | RE0 | 243 | | // | Jump bb0 | 244 | | // ------------------------ | 245 | | // bb2 | RE1 | 246 | | // which can be transformed (with the same preconditions) to: | 247 | | // bb0 | * | 248 | | // | ForkReplaceX bb2 | 249 | | // ------------------------ | 250 | | // bb1 | RE0 | 251 | | // | Jump bb0 | 252 | | // ------------------------ | 253 | | // bb2 | RE1 | 254 | | | 255 | 18 | enum class AlternateForm { | 256 | 18 | DirectLoopWithoutHeader, // loop without proper header, a block forking to itself. i.e. the first form. | 257 | 18 | DirectLoopWithoutHeaderAndEmptyFollow, // loop without proper header, a block forking to itself. i.e. the first form but with RE1 being empty. | 258 | 18 | DirectLoopWithHeader, // loop with proper header, i.e. the second form. | 259 | 18 | }; | 260 | 18 | struct CandidateBlock { | 261 | 18 | Block forking_block; | 262 | 18 | Optional<Block> new_target_block; | 263 | 18 | AlternateForm form; | 264 | 18 | }; | 265 | 18 | Vector<CandidateBlock> candidate_blocks; | 266 | | | 267 | 18 | auto is_an_eligible_jump = [](OpCode const& opcode, size_t ip, size_t block_start, AlternateForm alternate_form) { | 268 | 18 | switch (opcode.opcode_id()) { | 269 | 18 | case OpCodeId::JumpNonEmpty: { | 270 | 18 | auto const& op = static_cast<OpCode_JumpNonEmpty const&>(opcode); | 271 | 18 | auto form = op.form(); | 272 | 18 | if (form != OpCodeId::Jump && alternate_form == AlternateForm::DirectLoopWithHeader) | 273 | 18 | return false; | 274 | 18 | if (form != OpCodeId::ForkJump && form != OpCodeId::ForkStay && alternate_form == AlternateForm::DirectLoopWithoutHeader) | 275 | 18 | return false; | 276 | 18 | return op.offset() + ip + opcode.size() == block_start; | 277 | 18 | } | 278 | 18 | case OpCodeId::ForkJump: | 279 | 18 | if (alternate_form == AlternateForm::DirectLoopWithHeader) | 280 | 18 | return false; | 281 | 18 | return static_cast<OpCode_ForkJump const&>(opcode).offset() + ip + opcode.size() == block_start; | 282 | 18 | case OpCodeId::ForkStay: | 283 | 18 | if (alternate_form == AlternateForm::DirectLoopWithHeader) | 284 | 18 | return false; | 285 | 18 | return static_cast<OpCode_ForkStay const&>(opcode).offset() + ip + opcode.size() == block_start; | 286 | 18 | case OpCodeId::Jump: | 287 | | // Infinite loop does *not* produce forks. | 288 | 18 | if (alternate_form == AlternateForm::DirectLoopWithoutHeader) | 289 | 18 | return false; | 290 | 18 | if (alternate_form == AlternateForm::DirectLoopWithHeader) | 291 | 18 | return static_cast<OpCode_Jump const&>(opcode).offset() + ip + opcode.size() == block_start; | 292 | 18 | VERIFY_NOT_REACHED(); | 293 | 18 | default: | 294 | 18 | return false; | 295 | 18 | } | 296 | 18 | }; | 297 | 27 | for (size_t i = 0; i < basic_blocks.size(); ++i) { | 298 | 9 | auto forking_block = basic_blocks[i]; | 299 | 9 | Optional<Block> fork_fallback_block; | 300 | 9 | if (i + 1 < basic_blocks.size()) | 301 | 4 | fork_fallback_block = basic_blocks[i + 1]; | 302 | 9 | MatchState state; | 303 | | // Check if the last instruction in this block is a jump to the block itself: | 304 | 9 | { | 305 | 9 | state.instruction_position = forking_block.end; | 306 | 9 | auto& opcode = bytecode.get_opcode(state); | 307 | 9 | if (is_an_eligible_jump(opcode, state.instruction_position, forking_block.start, AlternateForm::DirectLoopWithoutHeader)) { | 308 | | // We've found RE0 (and RE1 is just the following block, if any), let's see if the precondition applies. | 309 | | // if RE1 is empty, there's no first(RE1), so this is an automatic pass. | 310 | 0 | if (!fork_fallback_block.has_value() || fork_fallback_block->end == fork_fallback_block->start) { | 311 | 0 | candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeader }); | 312 | 0 | break; | 313 | 0 | } | 314 | | | 315 | 0 | auto precondition = block_satisfies_atomic_rewrite_precondition(bytecode, forking_block, *fork_fallback_block); | 316 | 0 | if (precondition == AtomicRewritePreconditionResult::SatisfiedWithProperHeader) { | 317 | 0 | candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeader }); | 318 | 0 | break; | 319 | 0 | } | 320 | 0 | if (precondition == AtomicRewritePreconditionResult::SatisfiedWithEmptyHeader) { | 321 | 0 | candidate_blocks.append({ forking_block, fork_fallback_block, AlternateForm::DirectLoopWithoutHeaderAndEmptyFollow }); | 322 | 0 | break; | 323 | 0 | } | 324 | 0 | } | 325 | 9 | } | 326 | | // Check if the last instruction in the last block is a direct jump to this block | 327 | 9 | if (fork_fallback_block.has_value()) { | 328 | 4 | state.instruction_position = fork_fallback_block->end; | 329 | 4 | auto& opcode = bytecode.get_opcode(state); | 330 | 4 | if (is_an_eligible_jump(opcode, state.instruction_position, forking_block.start, AlternateForm::DirectLoopWithHeader)) { | 331 | | // We've found bb1 and bb0, let's just make sure that bb0 forks to bb2. | 332 | 0 | state.instruction_position = forking_block.end; | 333 | 0 | auto& opcode = bytecode.get_opcode(state); | 334 | 0 | if (opcode.opcode_id() == OpCodeId::ForkJump || opcode.opcode_id() == OpCodeId::ForkStay) { | 335 | 0 | Optional<Block> block_following_fork_fallback; | 336 | 0 | if (i + 2 < basic_blocks.size()) | 337 | 0 | block_following_fork_fallback = basic_blocks[i + 2]; | 338 | 0 | if (!block_following_fork_fallback.has_value() | 339 | 0 | || block_satisfies_atomic_rewrite_precondition(bytecode, *fork_fallback_block, *block_following_fork_fallback) != AtomicRewritePreconditionResult::NotSatisfied) { | 340 | 0 | candidate_blocks.append({ forking_block, {}, AlternateForm::DirectLoopWithHeader }); | 341 | 0 | break; | 342 | 0 | } | 343 | 0 | } | 344 | 0 | } | 345 | 4 | } | 346 | 9 | } | 347 | | | 348 | 18 | dbgln_if(REGEX_DEBUG, "Found {} candidate blocks", candidate_blocks.size()); | 349 | 18 | if (candidate_blocks.is_empty()) { | 350 | 18 | dbgln_if(REGEX_DEBUG, "Failed to find anything for {}", pattern_value); | 351 | 18 | return; | 352 | 18 | } | 353 | | | 354 | 0 | RedBlackTree<size_t, size_t> needed_patches; | 355 | | | 356 | | // Reverse the blocks, so we can patch the bytecode without messing with the latter patches. | 357 | 0 | quick_sort(candidate_blocks, [](auto& a, auto& b) { return b.forking_block.start > a.forking_block.start; }); | 358 | 0 | for (auto& candidate : candidate_blocks) { | 359 | | // Note that both forms share a ForkReplace patch in forking_block. | 360 | | // Patch the ForkX in forking_block to be a ForkReplaceX instead. | 361 | 0 | auto& opcode_id = bytecode[candidate.forking_block.end]; | 362 | 0 | if (opcode_id == (ByteCodeValueType)OpCodeId::ForkStay) { | 363 | 0 | opcode_id = (ByteCodeValueType)OpCodeId::ForkReplaceStay; | 364 | 0 | } else if (opcode_id == (ByteCodeValueType)OpCodeId::ForkJump) { | 365 | 0 | opcode_id = (ByteCodeValueType)OpCodeId::ForkReplaceJump; | 366 | 0 | } else if (opcode_id == (ByteCodeValueType)OpCodeId::JumpNonEmpty) { | 367 | 0 | auto& jump_opcode_id = bytecode[candidate.forking_block.end + 3]; | 368 | 0 | if (jump_opcode_id == (ByteCodeValueType)OpCodeId::ForkStay) | 369 | 0 | jump_opcode_id = (ByteCodeValueType)OpCodeId::ForkReplaceStay; | 370 | 0 | else if (jump_opcode_id == (ByteCodeValueType)OpCodeId::ForkJump) | 371 | 0 | jump_opcode_id = (ByteCodeValueType)OpCodeId::ForkReplaceJump; | 372 | 0 | else | 373 | 0 | VERIFY_NOT_REACHED(); | 374 | 0 | } else { | 375 | 0 | VERIFY_NOT_REACHED(); | 376 | 0 | } | 377 | 0 | } | 378 | |
| 379 | 0 | if (!needed_patches.is_empty()) { | 380 | 0 | MatchState state; | 381 | 0 | auto bytecode_size = bytecode.size(); | 382 | 0 | state.instruction_position = 0; | 383 | 0 | struct Patch { | 384 | 0 | ssize_t value; | 385 | 0 | size_t offset; | 386 | 0 | bool should_negate { false }; | 387 | 0 | }; | 388 | 0 | for (;;) { | 389 | 0 | if (state.instruction_position >= bytecode_size) | 390 | 0 | break; | 391 | | | 392 | 0 | auto& opcode = bytecode.get_opcode(state); | 393 | 0 | Stack<Patch, 2> patch_points; | 394 | |
| 395 | 0 | switch (opcode.opcode_id()) { | 396 | 0 | case OpCodeId::Jump: | 397 | 0 | patch_points.push({ static_cast<OpCode_Jump const&>(opcode).offset(), state.instruction_position + 1 }); | 398 | 0 | break; | 399 | 0 | case OpCodeId::JumpNonEmpty: | 400 | 0 | patch_points.push({ static_cast<OpCode_JumpNonEmpty const&>(opcode).offset(), state.instruction_position + 1 }); | 401 | 0 | patch_points.push({ static_cast<OpCode_JumpNonEmpty const&>(opcode).checkpoint(), state.instruction_position + 2 }); | 402 | 0 | break; | 403 | 0 | case OpCodeId::ForkJump: | 404 | 0 | patch_points.push({ static_cast<OpCode_ForkJump const&>(opcode).offset(), state.instruction_position + 1 }); | 405 | 0 | break; | 406 | 0 | case OpCodeId::ForkStay: | 407 | 0 | patch_points.push({ static_cast<OpCode_ForkStay const&>(opcode).offset(), state.instruction_position + 1 }); | 408 | 0 | break; | 409 | 0 | case OpCodeId::Repeat: | 410 | 0 | patch_points.push({ -(ssize_t) static_cast<OpCode_Repeat const&>(opcode).offset(), state.instruction_position + 1, true }); | 411 | 0 | break; | 412 | 0 | default: | 413 | 0 | break; | 414 | 0 | } | 415 | | | 416 | 0 | while (!patch_points.is_empty()) { | 417 | 0 | auto& patch_point = patch_points.top(); | 418 | 0 | auto target_offset = patch_point.value + state.instruction_position + opcode.size(); | 419 | |
| 420 | 0 | constexpr auto do_patch = [](auto& patch_it, auto& patch_point, auto& target_offset, auto& bytecode, auto ip) { | 421 | 0 | if (patch_it.key() == ip) | 422 | 0 | return; | 423 | |
| 424 | 0 | if (patch_point.value < 0 && target_offset <= patch_it.key() && ip > patch_it.key()) | 425 | 0 | bytecode[patch_point.offset] += (patch_point.should_negate ? 1 : -1) * (*patch_it); | 426 | 0 | else if (patch_point.value > 0 && target_offset >= patch_it.key() && ip < patch_it.key()) | 427 | 0 | bytecode[patch_point.offset] += (patch_point.should_negate ? -1 : 1) * (*patch_it); | 428 | 0 | }; | 429 | |
| 430 | 0 | if (auto patch_it = needed_patches.find_largest_not_above_iterator(target_offset); !patch_it.is_end()) | 431 | 0 | do_patch(patch_it, patch_point, target_offset, bytecode, state.instruction_position); | 432 | 0 | else if (auto patch_it = needed_patches.find_largest_not_above_iterator(state.instruction_position); !patch_it.is_end()) | 433 | 0 | do_patch(patch_it, patch_point, target_offset, bytecode, state.instruction_position); | 434 | |
| 435 | 0 | patch_points.pop(); | 436 | 0 | } | 437 | |
| 438 | 0 | state.instruction_position += opcode.size(); | 439 | 0 | } | 440 | 0 | } | 441 | | | 442 | 0 | if constexpr (REGEX_DEBUG) { | 443 | 0 | warnln("Transformed to:"); | 444 | 0 | RegexDebug dbg; | 445 | 0 | dbg.print_bytecode(*this); | 446 | 0 | } | 447 | 0 | } |
|
448 | | |
449 | | void Optimizer::append_alternation(ByteCode& target, ByteCode&& left, ByteCode&& right) |
450 | 0 | { |
451 | 0 | Array<ByteCode, 2> alternatives; |
452 | 0 | alternatives[0] = move(left); |
453 | 0 | alternatives[1] = move(right); |
454 | |
|
455 | 0 | append_alternation(target, alternatives); |
456 | 0 | } |
457 | | |
458 | | void Optimizer::append_alternation(ByteCode& target, Span<ByteCode> alternatives) |
459 | 64 | { |
460 | 64 | if (alternatives.size() == 0) |
461 | 0 | return; |
462 | | |
463 | 64 | if (alternatives.size() == 1) |
464 | 64 | return target.extend(move(alternatives[0])); |
465 | | |
466 | 0 | if (all_of(alternatives, [](auto& x) { return x.is_empty(); })) |
467 | 0 | return; |
468 | | |
469 | 0 | for (auto& entry : alternatives) |
470 | 0 | entry.flatten(); |
471 | |
|
472 | | #if REGEX_DEBUG |
473 | | ScopeLogger<true> log; |
474 | | warnln("Alternations:"); |
475 | | RegexDebug dbg; |
476 | | for (auto& entry : alternatives) { |
477 | | warnln("----------"); |
478 | | dbg.print_bytecode(entry); |
479 | | } |
480 | | ScopeGuard print_at_end { |
481 | | [&] { |
482 | | warnln("======================"); |
483 | | RegexDebug dbg; |
484 | | dbg.print_bytecode(target); |
485 | | } |
486 | | }; |
487 | | #endif |
488 | |
|
489 | 0 | Vector<Vector<Detail::Block>> basic_blocks; |
490 | 0 | basic_blocks.ensure_capacity(alternatives.size()); |
491 | |
|
492 | 0 | for (auto& entry : alternatives) |
493 | 0 | basic_blocks.append(Regex<PosixBasicParser>::split_basic_blocks(entry)); |
494 | |
|
495 | 0 | size_t left_skip = 0; |
496 | 0 | size_t shared_block_count = basic_blocks.first().size(); |
497 | 0 | for (auto& entry : basic_blocks) |
498 | 0 | shared_block_count = min(shared_block_count, entry.size()); |
499 | |
|
500 | 0 | MatchState state; |
501 | 0 | for (size_t block_index = 0; block_index < shared_block_count; block_index++) { |
502 | 0 | auto& left_block = basic_blocks.first()[block_index]; |
503 | 0 | auto left_end = block_index + 1 == basic_blocks.first().size() ? left_block.end : basic_blocks.first()[block_index + 1].start; |
504 | 0 | auto can_continue = true; |
505 | 0 | for (size_t i = 1; i < alternatives.size(); ++i) { |
506 | 0 | auto& right_blocks = basic_blocks[i]; |
507 | 0 | auto& right_block = right_blocks[block_index]; |
508 | 0 | auto right_end = block_index + 1 == right_blocks.size() ? right_block.end : right_blocks[block_index + 1].start; |
509 | |
|
510 | 0 | if (left_end - left_block.start != right_end - right_block.start) { |
511 | 0 | can_continue = false; |
512 | 0 | break; |
513 | 0 | } |
514 | | |
515 | 0 | if (alternatives[0].spans().slice(left_block.start, left_end - left_block.start) != alternatives[i].spans().slice(right_block.start, right_end - right_block.start)) { |
516 | 0 | can_continue = false; |
517 | 0 | break; |
518 | 0 | } |
519 | 0 | } |
520 | 0 | if (!can_continue) |
521 | 0 | break; |
522 | | |
523 | 0 | size_t i = 0; |
524 | 0 | for (auto& entry : alternatives) { |
525 | 0 | auto& blocks = basic_blocks[i]; |
526 | 0 | auto& block = blocks[block_index]; |
527 | 0 | auto end = block_index + 1 == blocks.size() ? block.end : blocks[block_index + 1].start; |
528 | 0 | state.instruction_position = block.start; |
529 | 0 | size_t skip = 0; |
530 | 0 | while (state.instruction_position < end) { |
531 | 0 | auto& opcode = entry.get_opcode(state); |
532 | 0 | state.instruction_position += opcode.size(); |
533 | 0 | skip = state.instruction_position; |
534 | 0 | } |
535 | 0 | left_skip = min(skip, left_skip); |
536 | 0 | } |
537 | 0 | } |
538 | |
|
539 | 0 | dbgln_if(REGEX_DEBUG, "Skipping {}/{} bytecode entries from {}", left_skip, 0, alternatives[0].size()); |
540 | |
|
541 | 0 | if (left_skip > 0) { |
542 | 0 | target.extend(alternatives[0].release_slice(basic_blocks.first().first().start, left_skip)); |
543 | 0 | auto first = true; |
544 | 0 | for (auto& entry : alternatives) { |
545 | 0 | if (first) { |
546 | 0 | first = false; |
547 | 0 | continue; |
548 | 0 | } |
549 | 0 | entry = entry.release_slice(left_skip); |
550 | 0 | } |
551 | 0 | } |
552 | |
|
553 | 0 | if (all_of(alternatives, [](auto& entry) { return entry.is_empty(); })) |
554 | 0 | return; |
555 | | |
556 | 0 | size_t patch_start = target.size(); |
557 | 0 | for (size_t i = 1; i < alternatives.size(); ++i) { |
558 | 0 | target.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump)); |
559 | 0 | target.empend(0u); // To be filled later. |
560 | 0 | } |
561 | |
|
562 | 0 | size_t size_to_jump = 0; |
563 | 0 | bool seen_one_empty = false; |
564 | 0 | for (size_t i = alternatives.size(); i > 0; --i) { |
565 | 0 | auto& entry = alternatives[i - 1]; |
566 | 0 | if (entry.is_empty()) { |
567 | 0 | if (seen_one_empty) |
568 | 0 | continue; |
569 | 0 | seen_one_empty = true; |
570 | 0 | } |
571 | | |
572 | 0 | auto is_first = i == 1; |
573 | 0 | auto instruction_size = entry.size() + (is_first ? 0 : 2); // Jump; -> +2 |
574 | 0 | size_to_jump += instruction_size; |
575 | |
|
576 | 0 | if (!is_first) |
577 | 0 | target[patch_start + (i - 2) * 2 + 1] = size_to_jump + (alternatives.size() - i) * 2; |
578 | |
|
579 | 0 | dbgln_if(REGEX_DEBUG, "{} size = {}, cum={}", i - 1, instruction_size, size_to_jump); |
580 | 0 | } |
581 | |
|
582 | 0 | seen_one_empty = false; |
583 | 0 | for (size_t i = alternatives.size(); i > 0; --i) { |
584 | 0 | auto& chunk = alternatives[i - 1]; |
585 | 0 | if (chunk.is_empty()) { |
586 | 0 | if (seen_one_empty) |
587 | 0 | continue; |
588 | 0 | seen_one_empty = true; |
589 | 0 | } |
590 | | |
591 | 0 | ByteCode* previous_chunk = nullptr; |
592 | 0 | size_t j = i - 1; |
593 | 0 | auto seen_one_empty_before = chunk.is_empty(); |
594 | 0 | while (j >= 1) { |
595 | 0 | --j; |
596 | 0 | auto& candidate_chunk = alternatives[j]; |
597 | 0 | if (candidate_chunk.is_empty()) { |
598 | 0 | if (seen_one_empty_before) |
599 | 0 | continue; |
600 | 0 | } |
601 | 0 | previous_chunk = &candidate_chunk; |
602 | 0 | break; |
603 | 0 | } |
604 | |
|
605 | 0 | size_to_jump -= chunk.size() + (previous_chunk ? 2 : 0); |
606 | |
|
607 | 0 | target.extend(move(chunk)); |
608 | 0 | target.empend(static_cast<ByteCodeValueType>(OpCodeId::Jump)); |
609 | 0 | target.empend(size_to_jump); // Jump to the _END label |
610 | 0 | } |
611 | 0 | } |
612 | | |
613 | | enum class LookupTableInsertionOutcome { |
614 | | Successful, |
615 | | ReplaceWithAnyChar, |
616 | | TemporaryInversionNeeded, |
617 | | PermanentInversionNeeded, |
618 | | CannotPlaceInTable, |
619 | | }; |
620 | | static LookupTableInsertionOutcome insert_into_lookup_table(RedBlackTree<ByteCodeValueType, CharRange>& table, CompareTypeAndValuePair pair) |
621 | 443 | { |
622 | 443 | switch (pair.type) { |
623 | 0 | case CharacterCompareType::Inverse: |
624 | 0 | return LookupTableInsertionOutcome::PermanentInversionNeeded; |
625 | 0 | case CharacterCompareType::TemporaryInverse: |
626 | 0 | return LookupTableInsertionOutcome::TemporaryInversionNeeded; |
627 | 0 | case CharacterCompareType::AnyChar: |
628 | 0 | return LookupTableInsertionOutcome::ReplaceWithAnyChar; |
629 | 0 | case CharacterCompareType::CharClass: |
630 | 0 | return LookupTableInsertionOutcome::CannotPlaceInTable; |
631 | 443 | case CharacterCompareType::Char: |
632 | 443 | table.insert(pair.value, { (u32)pair.value, (u32)pair.value }); |
633 | 443 | break; |
634 | 0 | case CharacterCompareType::CharRange: { |
635 | 0 | CharRange range { pair.value }; |
636 | 0 | table.insert(range.from, range); |
637 | 0 | break; |
638 | 0 | } |
639 | 0 | case CharacterCompareType::Reference: |
640 | 0 | case CharacterCompareType::Property: |
641 | 0 | case CharacterCompareType::GeneralCategory: |
642 | 0 | case CharacterCompareType::Script: |
643 | 0 | case CharacterCompareType::ScriptExtension: |
644 | 0 | return LookupTableInsertionOutcome::CannotPlaceInTable; |
645 | 0 | case CharacterCompareType::Undefined: |
646 | 0 | case CharacterCompareType::RangeExpressionDummy: |
647 | 0 | case CharacterCompareType::String: |
648 | 0 | case CharacterCompareType::LookupTable: |
649 | 0 | VERIFY_NOT_REACHED(); |
650 | 443 | } |
651 | | |
652 | 443 | return LookupTableInsertionOutcome::Successful; |
653 | 443 | } |
654 | | |
655 | | void Optimizer::append_character_class(ByteCode& target, Vector<CompareTypeAndValuePair>&& pairs) |
656 | 581k | { |
657 | 581k | ByteCode arguments; |
658 | 581k | size_t argument_count = 0; |
659 | | |
660 | 581k | if (pairs.size() <= 1) { |
661 | 581k | for (auto& pair : pairs) { |
662 | 581k | arguments.append(to_underlying(pair.type)); |
663 | 581k | if (pair.type != CharacterCompareType::AnyChar && pair.type != CharacterCompareType::TemporaryInverse && pair.type != CharacterCompareType::Inverse) |
664 | 580k | arguments.append(pair.value); |
665 | 581k | ++argument_count; |
666 | 581k | } |
667 | 581k | } else { |
668 | 18 | RedBlackTree<ByteCodeValueType, CharRange> table; |
669 | 18 | RedBlackTree<ByteCodeValueType, CharRange> inverted_table; |
670 | 18 | auto* current_table = &table; |
671 | 18 | auto* current_inverted_table = &inverted_table; |
672 | 18 | bool invert_for_next_iteration = false; |
673 | 18 | bool is_currently_inverted = false; |
674 | | |
675 | 443 | for (auto& value : pairs) { |
676 | 443 | auto should_invert_after_this_iteration = invert_for_next_iteration; |
677 | 443 | invert_for_next_iteration = false; |
678 | | |
679 | 443 | auto insertion_result = insert_into_lookup_table(*current_table, value); |
680 | 443 | switch (insertion_result) { |
681 | 443 | case LookupTableInsertionOutcome::Successful: |
682 | 443 | break; |
683 | 0 | case LookupTableInsertionOutcome::ReplaceWithAnyChar: { |
684 | 0 | table.clear(); |
685 | 0 | inverted_table.clear(); |
686 | 0 | arguments.append(to_underlying(CharacterCompareType::AnyChar)); |
687 | 0 | ++argument_count; |
688 | 0 | break; |
689 | 0 | } |
690 | 0 | case LookupTableInsertionOutcome::TemporaryInversionNeeded: |
691 | 0 | swap(current_table, current_inverted_table); |
692 | 0 | invert_for_next_iteration = true; |
693 | 0 | is_currently_inverted = !is_currently_inverted; |
694 | 0 | break; |
695 | 0 | case LookupTableInsertionOutcome::PermanentInversionNeeded: |
696 | 0 | swap(current_table, current_inverted_table); |
697 | 0 | is_currently_inverted = !is_currently_inverted; |
698 | 0 | break; |
699 | 0 | case LookupTableInsertionOutcome::CannotPlaceInTable: |
700 | 0 | if (is_currently_inverted) { |
701 | 0 | arguments.append(to_underlying(CharacterCompareType::TemporaryInverse)); |
702 | 0 | ++argument_count; |
703 | 0 | } |
704 | 0 | arguments.append(to_underlying(value.type)); |
705 | 0 | arguments.append(value.value); |
706 | 0 | ++argument_count; |
707 | 0 | break; |
708 | 443 | } |
709 | | |
710 | 443 | if (should_invert_after_this_iteration) { |
711 | 0 | swap(current_table, current_inverted_table); |
712 | 0 | is_currently_inverted = !is_currently_inverted; |
713 | 0 | } |
714 | 443 | } |
715 | 18 | auto append_table = [&](auto& table) { |
716 | 18 | ++argument_count; |
717 | 18 | arguments.append(to_underlying(CharacterCompareType::LookupTable)); |
718 | 18 | auto size_index = arguments.size(); |
719 | 18 | arguments.append(0); |
720 | 18 | Optional<CharRange> active_range; |
721 | 18 | size_t range_count = 0; |
722 | 443 | for (auto& range : table) { |
723 | 443 | if (!active_range.has_value()) { |
724 | 18 | active_range = range; |
725 | 18 | continue; |
726 | 18 | } |
727 | | |
728 | 425 | if (range.from <= active_range->to + 1 && range.to + 1 >= active_range->from) { |
729 | 343 | active_range = CharRange { min(range.from, active_range->from), max(range.to, active_range->to) }; |
730 | 343 | } else { |
731 | 82 | ++range_count; |
732 | 82 | arguments.append(active_range.release_value()); |
733 | 82 | active_range = range; |
734 | 82 | } |
735 | 425 | } |
736 | 18 | if (active_range.has_value()) { |
737 | 18 | ++range_count; |
738 | 18 | arguments.append(active_range.release_value()); |
739 | 18 | } |
740 | 18 | arguments[size_index] = range_count; |
741 | 18 | }; |
742 | | |
743 | 18 | if (!table.is_empty()) |
744 | 18 | append_table(table); |
745 | | |
746 | 18 | if (!inverted_table.is_empty()) { |
747 | 0 | ++argument_count; |
748 | 0 | arguments.append(to_underlying(CharacterCompareType::TemporaryInverse)); |
749 | 0 | append_table(inverted_table); |
750 | 0 | } |
751 | 18 | } |
752 | | |
753 | 581k | target.empend(static_cast<ByteCodeValueType>(OpCodeId::Compare)); |
754 | 581k | target.empend(argument_count); // number of arguments |
755 | 581k | target.empend(arguments.size()); // size of arguments |
756 | 581k | target.extend(move(arguments)); |
757 | 581k | } |
758 | | |
759 | | template void Regex<PosixBasicParser>::run_optimization_passes(); |
760 | | template void Regex<PosixExtendedParser>::run_optimization_passes(); |
761 | | template void Regex<ECMA262Parser>::run_optimization_passes(); |
762 | | } |