/src/serenity/Userland/Libraries/LibRegex/RegexMatcher.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #include <AK/BumpAllocator.h> |
8 | | #include <AK/Debug.h> |
9 | | #include <AK/String.h> |
10 | | #include <AK/StringBuilder.h> |
11 | | #include <LibRegex/RegexMatcher.h> |
12 | | #include <LibRegex/RegexParser.h> |
13 | | |
14 | | #if REGEX_DEBUG |
15 | | # include <LibRegex/RegexDebug.h> |
16 | | #endif |
17 | | |
18 | | namespace regex { |
19 | | |
20 | | #if REGEX_DEBUG |
21 | | static RegexDebug s_regex_dbg(stderr); |
22 | | #endif |
23 | | |
24 | | template<class Parser> |
25 | | regex::Parser::Result Regex<Parser>::parse_pattern(StringView pattern, typename ParserTraits<Parser>::OptionsType regex_options) |
26 | 0 | { |
27 | 0 | regex::Lexer lexer(pattern); |
28 | |
|
29 | 0 | Parser parser(lexer, regex_options); |
30 | 0 | return parser.parse(); |
31 | 0 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::parse_pattern(AK::StringView, regex::RegexOptions<regex::PosixFlags>) Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::parse_pattern(AK::StringView, regex::RegexOptions<regex::PosixFlags>) Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::parse_pattern(AK::StringView, regex::RegexOptions<regex::ECMAScriptFlags>) |
32 | | |
33 | | template<class Parser> |
34 | | Regex<Parser>::Regex(String pattern, typename ParserTraits<Parser>::OptionsType regex_options) |
35 | | : pattern_value(move(pattern)) |
36 | 4.20k | { |
37 | 4.20k | regex::Lexer lexer(pattern_value); |
38 | | |
39 | 4.20k | Parser parser(lexer, regex_options); |
40 | 4.20k | parser_result = parser.parse(); |
41 | | |
42 | 4.20k | run_optimization_passes(); |
43 | 4.20k | if (parser_result.error == regex::Error::NoError) |
44 | 2.47k | matcher = make<Matcher<Parser>>(this, static_cast<decltype(regex_options.value())>(parser_result.options.value())); |
45 | 4.20k | } regex::Regex<regex::PosixBasicParser>::Regex(AK::String, regex::RegexOptions<regex::PosixFlags>) Line | Count | Source | 36 | 4.20k | { | 37 | 4.20k | regex::Lexer lexer(pattern_value); | 38 | | | 39 | 4.20k | Parser parser(lexer, regex_options); | 40 | 4.20k | parser_result = parser.parse(); | 41 | | | 42 | 4.20k | run_optimization_passes(); | 43 | 4.20k | if (parser_result.error == regex::Error::NoError) | 44 | 2.47k | matcher = make<Matcher<Parser>>(this, static_cast<decltype(regex_options.value())>(parser_result.options.value())); | 45 | 4.20k | } |
Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::Regex(AK::String, regex::RegexOptions<regex::PosixFlags>) Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::Regex(AK::String, regex::RegexOptions<regex::ECMAScriptFlags>) |
46 | | |
47 | | template<class Parser> |
48 | | Regex<Parser>::Regex(regex::Parser::Result parse_result, String pattern, typename ParserTraits<Parser>::OptionsType regex_options) |
49 | | : pattern_value(move(pattern)) |
50 | | , parser_result(move(parse_result)) |
51 | 0 | { |
52 | 0 | run_optimization_passes(); |
53 | 0 | if (parser_result.error == regex::Error::NoError) |
54 | 0 | matcher = make<Matcher<Parser>>(this, regex_options | static_cast<decltype(regex_options.value())>(parse_result.options.value())); |
55 | 0 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::Regex(regex::Parser::Result, AK::String, regex::RegexOptions<regex::PosixFlags>) Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::Regex(regex::Parser::Result, AK::String, regex::RegexOptions<regex::PosixFlags>) Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::Regex(regex::Parser::Result, AK::String, regex::RegexOptions<regex::ECMAScriptFlags>) |
56 | | |
57 | | template<class Parser> |
58 | | Regex<Parser>::Regex(Regex&& regex) |
59 | | : pattern_value(move(regex.pattern_value)) |
60 | | , parser_result(move(regex.parser_result)) |
61 | | , matcher(move(regex.matcher)) |
62 | | , start_offset(regex.start_offset) |
63 | 0 | { |
64 | 0 | if (matcher) |
65 | 0 | matcher->reset_pattern({}, this); |
66 | 0 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::Regex(regex::Regex<regex::PosixBasicParser>&&) Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::Regex(regex::Regex<regex::PosixExtendedParser>&&) Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::Regex(regex::Regex<regex::ECMA262Parser>&&) |
67 | | |
68 | | template<class Parser> |
69 | | Regex<Parser>& Regex<Parser>::operator=(Regex&& regex) |
70 | 0 | { |
71 | 0 | pattern_value = move(regex.pattern_value); |
72 | 0 | parser_result = move(regex.parser_result); |
73 | 0 | matcher = move(regex.matcher); |
74 | 0 | if (matcher) |
75 | 0 | matcher->reset_pattern({}, this); |
76 | 0 | start_offset = regex.start_offset; |
77 | 0 | return *this; |
78 | 0 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::operator=(regex::Regex<regex::PosixBasicParser>&&) Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::operator=(regex::Regex<regex::PosixExtendedParser>&&) Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::operator=(regex::Regex<regex::ECMA262Parser>&&) |
79 | | |
80 | | template<class Parser> |
81 | | typename ParserTraits<Parser>::OptionsType Regex<Parser>::options() const |
82 | 0 | { |
83 | 0 | if (!matcher || parser_result.error != Error::NoError) |
84 | 0 | return {}; |
85 | | |
86 | 0 | return matcher->options(); |
87 | 0 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::options() const Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::options() const Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::options() const |
88 | | |
89 | | template<class Parser> |
90 | | String Regex<Parser>::error_string(Optional<String> message) const |
91 | 0 | { |
92 | 0 | StringBuilder eb; |
93 | 0 | eb.append("Error during parsing of regular expression:\n"); |
94 | 0 | eb.appendff(" {}\n ", pattern_value); |
95 | 0 | for (size_t i = 0; i < parser_result.error_token.position(); ++i) |
96 | 0 | eb.append(' '); |
97 | |
|
98 | 0 | eb.appendff("^---- {}", message.value_or(get_error_string(parser_result.error))); |
99 | 0 | return eb.build(); |
100 | 0 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::error_string(AK::Optional<AK::String>) const Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::error_string(AK::Optional<AK::String>) const Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::error_string(AK::Optional<AK::String>) const |
101 | | |
102 | | template<typename Parser> |
103 | | RegexResult Matcher<Parser>::match(RegexStringView view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options) const |
104 | 0 | { |
105 | 0 | AllOptions options = m_regex_options | regex_options.value_or({}).value(); |
106 | |
|
107 | 0 | if constexpr (!IsSame<Parser, ECMA262>) { |
108 | 0 | if (options.has_flag_set(AllFlags::Multiline)) |
109 | 0 | return match(view.lines(), regex_options); // FIXME: how do we know, which line ending a line has (1char or 2char)? This is needed to get the correct match offsets from start of string... |
110 | 0 | } |
111 | | |
112 | 0 | Vector<RegexStringView> views; |
113 | 0 | views.append(view); |
114 | 0 | return match(views, regex_options); |
115 | 0 | } Unexecuted instantiation: regex::Matcher<regex::PosixBasicParser>::match(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const Unexecuted instantiation: regex::Matcher<regex::PosixExtendedParser>::match(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const Unexecuted instantiation: regex::Matcher<regex::ECMA262Parser>::match(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const |
116 | | |
117 | | template<typename Parser> |
118 | | RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options) const |
119 | 0 | { |
120 | | // If the pattern *itself* isn't stateful, reset any changes to start_offset. |
121 | 0 | if (!((AllFlags)m_regex_options.value() & AllFlags::Internal_Stateful)) |
122 | 0 | m_pattern->start_offset = 0; |
123 | |
|
124 | 0 | size_t match_count { 0 }; |
125 | |
|
126 | 0 | MatchInput input; |
127 | 0 | MatchState state; |
128 | 0 | size_t operations = 0; |
129 | |
|
130 | 0 | input.regex_options = m_regex_options | regex_options.value_or({}).value(); |
131 | 0 | input.start_offset = m_pattern->start_offset; |
132 | 0 | size_t lines_to_skip = 0; |
133 | |
|
134 | 0 | bool unicode = input.regex_options.has_flag_set(AllFlags::Unicode); |
135 | 0 | for (auto const& view : views) |
136 | 0 | const_cast<RegexStringView&>(view).set_unicode(unicode); |
137 | |
|
138 | 0 | if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) { |
139 | 0 | if (views.size() > 1 && input.start_offset > views.first().length()) { |
140 | 0 | dbgln_if(REGEX_DEBUG, "Started with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip); |
141 | 0 | for (auto const& view : views) { |
142 | 0 | if (input.start_offset < view.length() + 1) |
143 | 0 | break; |
144 | 0 | ++lines_to_skip; |
145 | 0 | input.start_offset -= view.length() + 1; |
146 | 0 | input.global_offset += view.length() + 1; |
147 | 0 | } |
148 | 0 | dbgln_if(REGEX_DEBUG, "Ended with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip); |
149 | 0 | } |
150 | 0 | } |
151 | |
|
152 | 0 | if (c_match_preallocation_count) { |
153 | 0 | state.matches.ensure_capacity(c_match_preallocation_count); |
154 | 0 | state.capture_group_matches.ensure_capacity(c_match_preallocation_count); |
155 | 0 | auto& capture_groups_count = m_pattern->parser_result.capture_groups_count; |
156 | |
|
157 | 0 | for (size_t j = 0; j < c_match_preallocation_count; ++j) { |
158 | 0 | state.matches.empend(); |
159 | 0 | state.capture_group_matches.unchecked_append({}); |
160 | 0 | state.capture_group_matches.at(j).ensure_capacity(capture_groups_count); |
161 | 0 | for (size_t k = 0; k < capture_groups_count; ++k) |
162 | 0 | state.capture_group_matches.at(j).unchecked_append({}); |
163 | 0 | } |
164 | 0 | } |
165 | |
|
166 | 0 | auto append_match = [](auto& input, auto& state, auto& start_position) { |
167 | 0 | if (state.matches.size() == input.match_index) |
168 | 0 | state.matches.empend(); |
169 | |
|
170 | 0 | VERIFY(start_position + state.string_position - start_position <= input.view.length()); |
171 | 0 | if (input.regex_options.has_flag_set(AllFlags::StringCopyMatches)) { |
172 | 0 | state.matches.at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position).to_string(), input.line, start_position, input.global_offset + start_position }; |
173 | 0 | } else { // let the view point to the original string ... |
174 | 0 | state.matches.at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position), input.line, start_position, input.global_offset + start_position }; |
175 | 0 | } |
176 | 0 | }; Unexecuted instantiation: auto regex::Matcher<regex::PosixBasicParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const::{lambda(auto:1&, auto:2&, auto:3&)#1}::operator()<regex::MatchInput, regex::MatchState, unsigned long>(regex::MatchInput&, regex::MatchState&, unsigned long&) const Unexecuted instantiation: auto regex::Matcher<regex::PosixExtendedParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const::{lambda(auto:1&, auto:2&, auto:3&)#1}::operator()<regex::MatchInput, regex::MatchState, unsigned long>(regex::MatchInput&, regex::MatchState&, unsigned long&) const Unexecuted instantiation: auto regex::Matcher<regex::ECMA262Parser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const::{lambda(auto:1&, auto:2&, auto:3&)#1}::operator()<regex::MatchInput, regex::MatchState, unsigned long>(regex::MatchInput&, regex::MatchState&, unsigned long&) const |
177 | |
|
178 | | #if REGEX_DEBUG |
179 | | s_regex_dbg.print_header(); |
180 | | #endif |
181 | |
|
182 | 0 | bool continue_search = input.regex_options.has_flag_set(AllFlags::Global) || input.regex_options.has_flag_set(AllFlags::Multiline); |
183 | 0 | if (input.regex_options.has_flag_set(AllFlags::Sticky)) |
184 | 0 | continue_search = false; |
185 | |
|
186 | 0 | auto single_match_only = input.regex_options.has_flag_set(AllFlags::SingleMatch); |
187 | |
|
188 | 0 | for (auto const& view : views) { |
189 | 0 | if (lines_to_skip != 0) { |
190 | 0 | ++input.line; |
191 | 0 | --lines_to_skip; |
192 | 0 | continue; |
193 | 0 | } |
194 | 0 | input.view = view; |
195 | 0 | dbgln_if(REGEX_DEBUG, "[match] Starting match with view ({}): _{}_", view.length(), view); |
196 | |
|
197 | 0 | auto view_length = view.length(); |
198 | 0 | size_t view_index = m_pattern->start_offset; |
199 | 0 | state.string_position = view_index; |
200 | 0 | state.string_position_in_code_units = view_index; |
201 | 0 | bool succeeded = false; |
202 | |
|
203 | 0 | if (view_index == view_length && m_pattern->parser_result.match_length_minimum == 0) { |
204 | | // Run the code until it tries to consume something. |
205 | | // This allows non-consuming code to run on empty strings, for instance |
206 | | // e.g. "Exit" |
207 | 0 | size_t temp_operations = operations; |
208 | |
|
209 | 0 | input.column = match_count; |
210 | 0 | input.match_index = match_count; |
211 | |
|
212 | 0 | state.string_position = view_index; |
213 | 0 | state.string_position_in_code_units = view_index; |
214 | 0 | state.instruction_position = 0; |
215 | 0 | state.repetition_marks.clear(); |
216 | |
|
217 | 0 | auto success = execute(input, state, temp_operations); |
218 | | // This success is acceptable only if it doesn't read anything from the input (input length is 0). |
219 | 0 | if (success && (state.string_position <= view_index)) { |
220 | 0 | operations = temp_operations; |
221 | 0 | if (!match_count) { |
222 | | // Nothing was *actually* matched, so append an empty match. |
223 | 0 | append_match(input, state, view_index); |
224 | 0 | ++match_count; |
225 | 0 | } |
226 | 0 | } |
227 | 0 | } |
228 | |
|
229 | 0 | for (; view_index <= view_length; ++view_index) { |
230 | 0 | if (view_index == view_length && input.regex_options.has_flag_set(AllFlags::Multiline)) |
231 | 0 | break; |
232 | | |
233 | 0 | auto& match_length_minimum = m_pattern->parser_result.match_length_minimum; |
234 | | // FIXME: More performant would be to know the remaining minimum string |
235 | | // length needed to match from the current position onwards within |
236 | | // the vm. Add new OpCode for MinMatchLengthFromSp with the value of |
237 | | // the remaining string length from the current path. The value though |
238 | | // has to be filled in reverse. That implies a second run over bytecode |
239 | | // after generation has finished. |
240 | 0 | if (match_length_minimum && match_length_minimum > view_length - view_index) |
241 | 0 | break; |
242 | | |
243 | 0 | input.column = match_count; |
244 | 0 | input.match_index = match_count; |
245 | |
|
246 | 0 | state.string_position = view_index; |
247 | 0 | state.string_position_in_code_units = view_index; |
248 | 0 | state.instruction_position = 0; |
249 | 0 | state.repetition_marks.clear(); |
250 | |
|
251 | 0 | auto success = execute(input, state, operations); |
252 | 0 | if (success) { |
253 | 0 | succeeded = true; |
254 | |
|
255 | 0 | if (input.regex_options.has_flag_set(AllFlags::MatchNotEndOfLine) && state.string_position == input.view.length()) { |
256 | 0 | if (!continue_search) |
257 | 0 | break; |
258 | 0 | continue; |
259 | 0 | } |
260 | 0 | if (input.regex_options.has_flag_set(AllFlags::MatchNotBeginOfLine) && view_index == 0) { |
261 | 0 | if (!continue_search) |
262 | 0 | break; |
263 | 0 | continue; |
264 | 0 | } |
265 | | |
266 | 0 | dbgln_if(REGEX_DEBUG, "state.string_position={}, view_index={}", state.string_position, view_index); |
267 | 0 | dbgln_if(REGEX_DEBUG, "[match] Found a match (length={}): '{}'", state.string_position - view_index, input.view.substring_view(view_index, state.string_position - view_index)); |
268 | |
|
269 | 0 | ++match_count; |
270 | |
|
271 | 0 | if (continue_search) { |
272 | 0 | append_match(input, state, view_index); |
273 | |
|
274 | 0 | bool has_zero_length = state.string_position == view_index; |
275 | 0 | view_index = state.string_position - (has_zero_length ? 0 : 1); |
276 | 0 | if (single_match_only) |
277 | 0 | break; |
278 | 0 | continue; |
279 | 0 | } |
280 | 0 | if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) { |
281 | 0 | append_match(input, state, view_index); |
282 | 0 | break; |
283 | 0 | } |
284 | 0 | if (state.string_position < view_length) { |
285 | 0 | return { false, 0, {}, {}, {}, operations }; |
286 | 0 | } |
287 | | |
288 | 0 | append_match(input, state, view_index); |
289 | 0 | break; |
290 | 0 | } |
291 | | |
292 | 0 | if (!continue_search) |
293 | 0 | break; |
294 | 0 | } |
295 | | |
296 | 0 | ++input.line; |
297 | 0 | input.global_offset += view.length() + 1; // +1 includes the line break character |
298 | |
|
299 | 0 | if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) |
300 | 0 | m_pattern->start_offset = state.string_position; |
301 | |
|
302 | 0 | if (succeeded && !continue_search) |
303 | 0 | break; |
304 | 0 | } |
305 | | |
306 | 0 | RegexResult result { |
307 | 0 | match_count != 0, |
308 | 0 | match_count, |
309 | 0 | move(state.matches), |
310 | 0 | move(state.capture_group_matches), |
311 | 0 | operations, |
312 | 0 | m_pattern->parser_result.capture_groups_count, |
313 | 0 | m_pattern->parser_result.named_capture_groups_count, |
314 | 0 | }; |
315 | |
|
316 | 0 | if (match_count) { |
317 | | // Make sure there are as many capture matches as there are actual matches. |
318 | 0 | if (result.capture_group_matches.size() < match_count) |
319 | 0 | result.capture_group_matches.resize(match_count); |
320 | 0 | for (auto& matches : result.capture_group_matches) |
321 | 0 | matches.resize(m_pattern->parser_result.capture_groups_count + 1); |
322 | 0 | if (!input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) { |
323 | 0 | for (auto& matches : result.capture_group_matches) |
324 | 0 | matches.template remove_all_matching([](auto& match) { return match.view.is_null(); }); Unexecuted instantiation: auto regex::Matcher<regex::PosixBasicParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const::{lambda(auto:1&)#1}::operator()<regex::Match>(regex::Match&) const Unexecuted instantiation: auto regex::Matcher<regex::PosixExtendedParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const::{lambda(auto:1&)#1}::operator()<regex::Match>(regex::Match&) const Unexecuted instantiation: auto regex::Matcher<regex::ECMA262Parser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const::{lambda(auto:1&)#1}::operator()<regex::Match>(regex::Match&) const |
325 | 0 | } |
326 | 0 | } else { |
327 | 0 | result.capture_group_matches.clear_with_capacity(); |
328 | 0 | } |
329 | |
|
330 | 0 | return result; |
331 | 0 | } Unexecuted instantiation: regex::Matcher<regex::PosixBasicParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const Unexecuted instantiation: regex::Matcher<regex::PosixExtendedParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const Unexecuted instantiation: regex::Matcher<regex::ECMA262Parser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const |
332 | | |
333 | | template<typename T> |
334 | | class BumpAllocatedLinkedList { |
335 | | public: |
336 | 0 | BumpAllocatedLinkedList() = default; |
337 | | |
338 | | ALWAYS_INLINE void append(T value) |
339 | 0 | { |
340 | 0 | auto node_ptr = m_allocator.allocate(move(value)); |
341 | 0 | VERIFY(node_ptr); |
342 | | |
343 | 0 | if (!m_first) { |
344 | 0 | m_first = node_ptr; |
345 | 0 | m_last = node_ptr; |
346 | 0 | return; |
347 | 0 | } |
348 | | |
349 | 0 | node_ptr->previous = m_last; |
350 | 0 | m_last->next = node_ptr; |
351 | 0 | m_last = node_ptr; |
352 | 0 | } |
353 | | |
354 | | ALWAYS_INLINE T take_last() |
355 | 0 | { |
356 | 0 | VERIFY(m_last); |
357 | 0 | T value = move(m_last->value); |
358 | 0 | if (m_last == m_first) { |
359 | 0 | m_last = nullptr; |
360 | 0 | m_first = nullptr; |
361 | 0 | } else { |
362 | 0 | m_last = m_last->previous; |
363 | 0 | m_last->next = nullptr; |
364 | 0 | } |
365 | 0 | return value; |
366 | 0 | } |
367 | | |
368 | | ALWAYS_INLINE T& last() |
369 | 0 | { |
370 | 0 | return m_last->value; |
371 | 0 | } |
372 | | |
373 | | ALWAYS_INLINE bool is_empty() const |
374 | 0 | { |
375 | 0 | return m_first == nullptr; |
376 | 0 | } |
377 | | |
378 | 0 | auto reverse_begin() { return ReverseIterator(m_last); } |
379 | 0 | auto reverse_end() { return ReverseIterator(); } |
380 | | |
381 | | private: |
382 | | struct Node { |
383 | | T value; |
384 | | Node* next { nullptr }; |
385 | | Node* previous { nullptr }; |
386 | | }; |
387 | | |
388 | | struct ReverseIterator { |
389 | | ReverseIterator() = default; |
390 | | explicit ReverseIterator(Node* node) |
391 | | : m_node(node) |
392 | 0 | { |
393 | 0 | } |
394 | | |
395 | 0 | T* operator->() { return &m_node->value; } |
396 | 0 | T& operator*() { return m_node->value; } |
397 | 0 | bool operator==(ReverseIterator const& it) const { return m_node == it.m_node; } |
398 | | ReverseIterator& operator++() |
399 | 0 | { |
400 | 0 | if (m_node) |
401 | 0 | m_node = m_node->previous; |
402 | 0 | return *this; |
403 | 0 | } |
404 | | |
405 | | private: |
406 | | Node* m_node; |
407 | | }; |
408 | | |
409 | | UniformBumpAllocator<Node, true, 2 * MiB> m_allocator; |
410 | | Node* m_first { nullptr }; |
411 | | Node* m_last { nullptr }; |
412 | | }; |
413 | | |
414 | | template<class Parser> |
415 | | bool Matcher<Parser>::execute(MatchInput const& input, MatchState& state, size_t& operations) const |
416 | 0 | { |
417 | 0 | BumpAllocatedLinkedList<MatchState> states_to_try_next; |
418 | 0 | size_t recursion_level = 0; |
419 | |
|
420 | 0 | auto& bytecode = m_pattern->parser_result.bytecode; |
421 | |
|
422 | 0 | for (;;) { |
423 | 0 | auto& opcode = bytecode.get_opcode(state); |
424 | 0 | ++operations; |
425 | |
|
426 | | #if REGEX_DEBUG |
427 | | s_regex_dbg.print_opcode("VM", opcode, state, recursion_level, false); |
428 | | #endif |
429 | |
|
430 | 0 | ExecutionResult result; |
431 | 0 | if (input.fail_counter > 0) { |
432 | 0 | --input.fail_counter; |
433 | 0 | result = ExecutionResult::Failed_ExecuteLowPrioForks; |
434 | 0 | } else { |
435 | 0 | result = opcode.execute(input, state); |
436 | 0 | } |
437 | |
|
438 | | #if REGEX_DEBUG |
439 | | s_regex_dbg.print_result(opcode, bytecode, input, state, result); |
440 | | #endif |
441 | |
|
442 | 0 | state.instruction_position += opcode.size(); |
443 | |
|
444 | 0 | switch (result) { |
445 | 0 | case ExecutionResult::Fork_PrioLow: { |
446 | 0 | bool found = false; |
447 | 0 | if (input.fork_to_replace.has_value()) { |
448 | 0 | for (auto it = states_to_try_next.reverse_begin(); it != states_to_try_next.reverse_end(); ++it) { |
449 | 0 | if (it->initiating_fork == input.fork_to_replace.value()) { |
450 | 0 | (*it) = state; |
451 | 0 | it->instruction_position = state.fork_at_position; |
452 | 0 | it->initiating_fork = *input.fork_to_replace; |
453 | 0 | it->capture_group_matches = state.capture_group_matches; |
454 | 0 | it->matches = state.matches; |
455 | 0 | found = true; |
456 | 0 | break; |
457 | 0 | } |
458 | 0 | } |
459 | 0 | input.fork_to_replace.clear(); |
460 | 0 | } |
461 | 0 | if (!found) { |
462 | 0 | states_to_try_next.append(state); |
463 | 0 | states_to_try_next.last().initiating_fork = state.instruction_position - opcode.size(); |
464 | 0 | states_to_try_next.last().instruction_position = state.fork_at_position; |
465 | 0 | } |
466 | 0 | continue; |
467 | 0 | } |
468 | 0 | case ExecutionResult::Fork_PrioHigh: { |
469 | 0 | bool found = false; |
470 | 0 | if (input.fork_to_replace.has_value()) { |
471 | 0 | for (auto it = states_to_try_next.reverse_begin(); it != states_to_try_next.reverse_end(); ++it) { |
472 | 0 | if (it->initiating_fork == input.fork_to_replace.value()) { |
473 | 0 | (*it) = state; |
474 | 0 | it->initiating_fork = *input.fork_to_replace; |
475 | 0 | found = true; |
476 | 0 | break; |
477 | 0 | } |
478 | 0 | } |
479 | 0 | input.fork_to_replace.clear(); |
480 | 0 | } |
481 | 0 | if (!found) { |
482 | 0 | states_to_try_next.append(state); |
483 | 0 | states_to_try_next.last().initiating_fork = state.instruction_position - opcode.size(); |
484 | 0 | } |
485 | 0 | state.instruction_position = state.fork_at_position; |
486 | 0 | ++recursion_level; |
487 | 0 | continue; |
488 | 0 | } |
489 | 0 | case ExecutionResult::Continue: |
490 | 0 | continue; |
491 | 0 | case ExecutionResult::Succeeded: |
492 | 0 | return true; |
493 | 0 | case ExecutionResult::Failed: |
494 | 0 | if (!states_to_try_next.is_empty()) { |
495 | 0 | state = states_to_try_next.take_last(); |
496 | 0 | continue; |
497 | 0 | } |
498 | 0 | return false; |
499 | 0 | case ExecutionResult::Failed_ExecuteLowPrioForks: { |
500 | 0 | if (states_to_try_next.is_empty()) { |
501 | 0 | return false; |
502 | 0 | } |
503 | 0 | state = states_to_try_next.take_last(); |
504 | 0 | ++recursion_level; |
505 | 0 | continue; |
506 | 0 | } |
507 | 0 | } |
508 | 0 | } |
509 | | |
510 | 0 | VERIFY_NOT_REACHED(); |
511 | 0 | } Unexecuted instantiation: regex::Matcher<regex::PosixBasicParser>::execute(regex::MatchInput const&, regex::MatchState&, unsigned long&) const Unexecuted instantiation: regex::Matcher<regex::PosixExtendedParser>::execute(regex::MatchInput const&, regex::MatchState&, unsigned long&) const Unexecuted instantiation: regex::Matcher<regex::ECMA262Parser>::execute(regex::MatchInput const&, regex::MatchState&, unsigned long&) const |
512 | | |
513 | | template class Matcher<PosixBasicParser>; |
514 | | template class Regex<PosixBasicParser>; |
515 | | |
516 | | template class Matcher<PosixExtendedParser>; |
517 | | template class Regex<PosixExtendedParser>; |
518 | | |
519 | | template class Matcher<ECMA262Parser>; |
520 | | template class Regex<ECMA262Parser>; |
521 | | } |