/src/serenity/Userland/Libraries/LibRegex/RegexMatcher.cpp
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #include <AK/BumpAllocator.h> |
8 | | #include <AK/ByteString.h> |
9 | | #include <AK/Debug.h> |
10 | | #include <AK/StringBuilder.h> |
11 | | #include <LibRegex/RegexMatcher.h> |
12 | | #include <LibRegex/RegexParser.h> |
13 | | |
14 | | #if REGEX_DEBUG |
15 | | # include <LibRegex/RegexDebug.h> |
16 | | #endif |
17 | | |
18 | | namespace regex { |
19 | | |
20 | | #if REGEX_DEBUG |
21 | | static RegexDebug s_regex_dbg(stderr); |
22 | | #endif |
23 | | |
24 | | template<class Parser> |
25 | | regex::Parser::Result Regex<Parser>::parse_pattern(StringView pattern, typename ParserTraits<Parser>::OptionsType regex_options) |
26 | 0 | { |
27 | 0 | regex::Lexer lexer(pattern); |
28 | |
|
29 | 0 | Parser parser(lexer, regex_options); |
30 | 0 | return parser.parse(); |
31 | 0 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::parse_pattern(AK::StringView, regex::RegexOptions<regex::PosixFlags>) Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::parse_pattern(AK::StringView, regex::RegexOptions<regex::PosixFlags>) Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::parse_pattern(AK::StringView, regex::RegexOptions<regex::ECMAScriptFlags>) |
32 | | |
33 | | template<class Parser> |
34 | | Regex<Parser>::Regex(ByteString pattern, typename ParserTraits<Parser>::OptionsType regex_options) |
35 | 7.81k | : pattern_value(move(pattern)) |
36 | 7.81k | { |
37 | 7.81k | regex::Lexer lexer(pattern_value); |
38 | | |
39 | 7.81k | Parser parser(lexer, regex_options); |
40 | 7.81k | parser_result = parser.parse(); |
41 | | |
42 | 7.81k | run_optimization_passes(); |
43 | 7.81k | if (parser_result.error == regex::Error::NoError) |
44 | 5.04k | matcher = make<Matcher<Parser>>(this, static_cast<decltype(regex_options.value())>(parser_result.options.value())); |
45 | 7.81k | } regex::Regex<regex::PosixBasicParser>::Regex(AK::ByteString, regex::RegexOptions<regex::PosixFlags>) Line | Count | Source | 35 | 4.82k | : pattern_value(move(pattern)) | 36 | 4.82k | { | 37 | 4.82k | regex::Lexer lexer(pattern_value); | 38 | | | 39 | 4.82k | Parser parser(lexer, regex_options); | 40 | 4.82k | parser_result = parser.parse(); | 41 | | | 42 | 4.82k | run_optimization_passes(); | 43 | 4.82k | if (parser_result.error == regex::Error::NoError) | 44 | 3.26k | matcher = make<Matcher<Parser>>(this, static_cast<decltype(regex_options.value())>(parser_result.options.value())); | 45 | 4.82k | } |
regex::Regex<regex::PosixExtendedParser>::Regex(AK::ByteString, regex::RegexOptions<regex::PosixFlags>) Line | Count | Source | 35 | 686 | : pattern_value(move(pattern)) | 36 | 686 | { | 37 | 686 | regex::Lexer lexer(pattern_value); | 38 | | | 39 | 686 | Parser parser(lexer, regex_options); | 40 | 686 | parser_result = parser.parse(); | 41 | | | 42 | 686 | run_optimization_passes(); | 43 | 686 | if (parser_result.error == regex::Error::NoError) | 44 | 269 | matcher = make<Matcher<Parser>>(this, static_cast<decltype(regex_options.value())>(parser_result.options.value())); | 45 | 686 | } |
regex::Regex<regex::ECMA262Parser>::Regex(AK::ByteString, regex::RegexOptions<regex::ECMAScriptFlags>) Line | Count | Source | 35 | 2.30k | : pattern_value(move(pattern)) | 36 | 2.30k | { | 37 | 2.30k | regex::Lexer lexer(pattern_value); | 38 | | | 39 | 2.30k | Parser parser(lexer, regex_options); | 40 | 2.30k | parser_result = parser.parse(); | 41 | | | 42 | 2.30k | run_optimization_passes(); | 43 | 2.30k | if (parser_result.error == regex::Error::NoError) | 44 | 1.50k | matcher = make<Matcher<Parser>>(this, static_cast<decltype(regex_options.value())>(parser_result.options.value())); | 45 | 2.30k | } |
|
46 | | |
47 | | template<class Parser> |
48 | | Regex<Parser>::Regex(regex::Parser::Result parse_result, ByteString pattern, typename ParserTraits<Parser>::OptionsType regex_options) |
49 | 0 | : pattern_value(move(pattern)) |
50 | 0 | , parser_result(move(parse_result)) |
51 | 0 | { |
52 | 0 | run_optimization_passes(); |
53 | 0 | if (parser_result.error == regex::Error::NoError) |
54 | 0 | matcher = make<Matcher<Parser>>(this, regex_options | static_cast<decltype(regex_options.value())>(parse_result.options.value())); |
55 | 0 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::Regex(regex::Parser::Result, AK::ByteString, regex::RegexOptions<regex::PosixFlags>) Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::Regex(regex::Parser::Result, AK::ByteString, regex::RegexOptions<regex::PosixFlags>) Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::Regex(regex::Parser::Result, AK::ByteString, regex::RegexOptions<regex::ECMAScriptFlags>) |
56 | | |
57 | | template<class Parser> |
58 | | Regex<Parser>::Regex(Regex&& regex) |
59 | 5.28k | : pattern_value(move(regex.pattern_value)) |
60 | 5.28k | , parser_result(move(regex.parser_result)) |
61 | 5.28k | , matcher(move(regex.matcher)) |
62 | 5.28k | , start_offset(regex.start_offset) |
63 | 5.28k | { |
64 | 5.28k | if (matcher) |
65 | 4.49k | matcher->reset_pattern({}, this); |
66 | 5.28k | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::Regex(regex::Regex<regex::PosixBasicParser>&&) Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::Regex(regex::Regex<regex::PosixExtendedParser>&&) regex::Regex<regex::ECMA262Parser>::Regex(regex::Regex<regex::ECMA262Parser>&&) Line | Count | Source | 59 | 5.28k | : pattern_value(move(regex.pattern_value)) | 60 | 5.28k | , parser_result(move(regex.parser_result)) | 61 | 5.28k | , matcher(move(regex.matcher)) | 62 | 5.28k | , start_offset(regex.start_offset) | 63 | 5.28k | { | 64 | 5.28k | if (matcher) | 65 | 4.49k | matcher->reset_pattern({}, this); | 66 | 5.28k | } |
|
67 | | |
68 | | template<class Parser> |
69 | | Regex<Parser>& Regex<Parser>::operator=(Regex&& regex) |
70 | 0 | { |
71 | 0 | pattern_value = move(regex.pattern_value); |
72 | 0 | parser_result = move(regex.parser_result); |
73 | 0 | matcher = move(regex.matcher); |
74 | 0 | if (matcher) |
75 | 0 | matcher->reset_pattern({}, this); |
76 | 0 | start_offset = regex.start_offset; |
77 | 0 | return *this; |
78 | 0 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::operator=(regex::Regex<regex::PosixBasicParser>&&) Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::operator=(regex::Regex<regex::PosixExtendedParser>&&) Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::operator=(regex::Regex<regex::ECMA262Parser>&&) |
79 | | |
80 | | template<class Parser> |
81 | | typename ParserTraits<Parser>::OptionsType Regex<Parser>::options() const |
82 | 0 | { |
83 | 0 | if (!matcher || parser_result.error != Error::NoError) |
84 | 0 | return {}; |
85 | | |
86 | 0 | return matcher->options(); |
87 | 0 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::options() const Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::options() const Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::options() const |
88 | | |
89 | | template<class Parser> |
90 | | ByteString Regex<Parser>::error_string(Optional<ByteString> message) const |
91 | 0 | { |
92 | 0 | StringBuilder eb; |
93 | 0 | eb.append("Error during parsing of regular expression:\n"sv); |
94 | 0 | eb.appendff(" {}\n ", pattern_value); |
95 | 0 | for (size_t i = 0; i < parser_result.error_token.position(); ++i) |
96 | 0 | eb.append(' '); |
97 | |
|
98 | 0 | eb.appendff("^---- {}", message.value_or(get_error_string(parser_result.error))); |
99 | 0 | return eb.to_byte_string(); |
100 | 0 | } Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::error_string(AK::Optional<AK::ByteString>) const Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::error_string(AK::Optional<AK::ByteString>) const Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::error_string(AK::Optional<AK::ByteString>) const |
101 | | |
102 | | template<typename Parser> |
103 | | RegexResult Matcher<Parser>::match(RegexStringView view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options) const |
104 | 34.3M | { |
105 | 34.3M | AllOptions options = m_regex_options | regex_options.value_or({}).value(); |
106 | | |
107 | 34.3M | if constexpr (!IsSame<Parser, ECMA262>) { |
108 | 0 | if (options.has_flag_set(AllFlags::Multiline)) |
109 | 0 | return match(view.lines(), regex_options); // FIXME: how do we know, which line ending a line has (1char or 2char)? This is needed to get the correct match offsets from start of string... |
110 | 0 | } |
111 | | |
112 | 0 | Vector<RegexStringView> views; |
113 | 34.3M | views.append(view); |
114 | 34.3M | return match(views, regex_options); |
115 | 34.3M | } Unexecuted instantiation: regex::Matcher<regex::PosixBasicParser>::match(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const Unexecuted instantiation: regex::Matcher<regex::PosixExtendedParser>::match(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const regex::Matcher<regex::ECMA262Parser>::match(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const Line | Count | Source | 104 | 34.3M | { | 105 | 34.3M | AllOptions options = m_regex_options | regex_options.value_or({}).value(); | 106 | | | 107 | | if constexpr (!IsSame<Parser, ECMA262>) { | 108 | | if (options.has_flag_set(AllFlags::Multiline)) | 109 | | return match(view.lines(), regex_options); // FIXME: how do we know, which line ending a line has (1char or 2char)? This is needed to get the correct match offsets from start of string... | 110 | | } | 111 | | | 112 | 34.3M | Vector<RegexStringView> views; | 113 | 34.3M | views.append(view); | 114 | 34.3M | return match(views, regex_options); | 115 | 34.3M | } |
|
116 | | |
117 | | template<typename Parser> |
118 | | RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options) const |
119 | 34.3M | { |
120 | | // If the pattern *itself* isn't stateful, reset any changes to start_offset. |
121 | 34.3M | if (!((AllFlags)m_regex_options.value() & AllFlags::Internal_Stateful)) |
122 | 34.3M | m_pattern->start_offset = 0; |
123 | | |
124 | 34.3M | size_t match_count { 0 }; |
125 | | |
126 | 34.3M | MatchInput input; |
127 | 34.3M | MatchState state; |
128 | 34.3M | size_t operations = 0; |
129 | | |
130 | 34.3M | input.regex_options = m_regex_options | regex_options.value_or({}).value(); |
131 | 34.3M | input.start_offset = m_pattern->start_offset; |
132 | 34.3M | size_t lines_to_skip = 0; |
133 | | |
134 | 34.3M | bool unicode = input.regex_options.has_flag_set(AllFlags::Unicode); |
135 | 34.3M | for (auto const& view : views) |
136 | 34.3M | const_cast<RegexStringView&>(view).set_unicode(unicode); |
137 | | |
138 | 34.3M | if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) { |
139 | 0 | if (views.size() > 1 && input.start_offset > views.first().length()) { |
140 | 0 | dbgln_if(REGEX_DEBUG, "Started with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip); |
141 | 0 | for (auto const& view : views) { |
142 | 0 | if (input.start_offset < view.length() + 1) |
143 | 0 | break; |
144 | 0 | ++lines_to_skip; |
145 | 0 | input.start_offset -= view.length() + 1; |
146 | 0 | input.global_offset += view.length() + 1; |
147 | 0 | } |
148 | 0 | dbgln_if(REGEX_DEBUG, "Ended with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip); |
149 | 0 | } |
150 | 0 | } |
151 | | |
152 | 34.3M | if (c_match_preallocation_count) { |
153 | 0 | state.matches.ensure_capacity(c_match_preallocation_count); |
154 | 0 | state.capture_group_matches.ensure_capacity(c_match_preallocation_count); |
155 | 0 | auto& capture_groups_count = m_pattern->parser_result.capture_groups_count; |
156 | |
|
157 | 0 | for (size_t j = 0; j < c_match_preallocation_count; ++j) { |
158 | 0 | state.matches.empend(); |
159 | 0 | state.capture_group_matches.empend(); |
160 | 0 | state.capture_group_matches.mutable_at(j).ensure_capacity(capture_groups_count); |
161 | 0 | for (size_t k = 0; k < capture_groups_count; ++k) |
162 | 0 | state.capture_group_matches.mutable_at(j).unchecked_append({}); |
163 | 0 | } |
164 | 0 | } |
165 | | |
166 | 34.3M | auto append_match = [](auto& input, auto& state, auto& start_position) { |
167 | 1.07M | if (state.matches.size() == input.match_index) |
168 | 1.07M | state.matches.empend(); |
169 | | |
170 | 1.07M | VERIFY(start_position + state.string_position - start_position <= input.view.length()); |
171 | 1.07M | if (input.regex_options.has_flag_set(AllFlags::StringCopyMatches)) { |
172 | 0 | state.matches.mutable_at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position).to_byte_string(), input.line, start_position, input.global_offset + start_position }; |
173 | 1.07M | } else { // let the view point to the original string ... |
174 | 1.07M | state.matches.mutable_at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position), input.line, start_position, input.global_offset + start_position }; |
175 | 1.07M | } |
176 | 1.07M | }; Unexecuted instantiation: auto regex::Matcher<regex::PosixBasicParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const::{lambda(auto:1&, auto:2&, auto:3&)#1}::operator()<regex::MatchInput, regex::MatchState, unsigned long>(regex::MatchInput&, regex::MatchState&, unsigned long&) constUnexecuted instantiation: auto regex::Matcher<regex::PosixExtendedParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const::{lambda(auto:1&, auto:2&, auto:3&)#1}::operator()<regex::MatchInput, regex::MatchState, unsigned long>(regex::MatchInput&, regex::MatchState&, unsigned long&) constauto regex::Matcher<regex::ECMA262Parser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const::{lambda(auto:1&, auto:2&, auto:3&)#1}::operator()<regex::MatchInput, regex::MatchState, unsigned long>(regex::MatchInput&, regex::MatchState&, unsigned long&) constLine | Count | Source | 166 | 1.07M | auto append_match = [](auto& input, auto& state, auto& start_position) { | 167 | 1.07M | if (state.matches.size() == input.match_index) | 168 | 1.07M | state.matches.empend(); | 169 | | | 170 | 1.07M | VERIFY(start_position + state.string_position - start_position <= input.view.length()); | 171 | 1.07M | if (input.regex_options.has_flag_set(AllFlags::StringCopyMatches)) { | 172 | 0 | state.matches.mutable_at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position).to_byte_string(), input.line, start_position, input.global_offset + start_position }; | 173 | 1.07M | } else { // let the view point to the original string ... | 174 | 1.07M | state.matches.mutable_at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position), input.line, start_position, input.global_offset + start_position }; | 175 | 1.07M | } | 176 | 1.07M | }; |
|
177 | | |
178 | | #if REGEX_DEBUG |
179 | | s_regex_dbg.print_header(); |
180 | | #endif |
181 | | |
182 | 34.3M | bool continue_search = input.regex_options.has_flag_set(AllFlags::Global) || input.regex_options.has_flag_set(AllFlags::Multiline); |
183 | 34.3M | if (input.regex_options.has_flag_set(AllFlags::Sticky)) |
184 | 0 | continue_search = false; |
185 | | |
186 | 34.3M | auto single_match_only = input.regex_options.has_flag_set(AllFlags::SingleMatch); |
187 | | |
188 | 34.3M | for (auto const& view : views) { |
189 | 34.3M | if (lines_to_skip != 0) { |
190 | 0 | ++input.line; |
191 | 0 | --lines_to_skip; |
192 | 0 | continue; |
193 | 0 | } |
194 | 34.3M | input.view = view; |
195 | 34.3M | dbgln_if(REGEX_DEBUG, "[match] Starting match with view ({}): _{}_", view.length(), view); |
196 | | |
197 | 34.3M | auto view_length = view.length(); |
198 | 34.3M | size_t view_index = m_pattern->start_offset; |
199 | 34.3M | state.string_position = view_index; |
200 | 34.3M | state.string_position_in_code_units = view_index; |
201 | 34.3M | bool succeeded = false; |
202 | | |
203 | 34.3M | if (view_index == view_length && m_pattern->parser_result.match_length_minimum == 0) { |
204 | | // Run the code until it tries to consume something. |
205 | | // This allows non-consuming code to run on empty strings, for instance |
206 | | // e.g. "Exit" |
207 | 0 | size_t temp_operations = operations; |
208 | |
|
209 | 0 | input.column = match_count; |
210 | 0 | input.match_index = match_count; |
211 | |
|
212 | 0 | state.string_position = view_index; |
213 | 0 | state.string_position_in_code_units = view_index; |
214 | 0 | state.instruction_position = 0; |
215 | 0 | state.repetition_marks.clear(); |
216 | |
|
217 | 0 | auto success = execute(input, state, temp_operations); |
218 | | // This success is acceptable only if it doesn't read anything from the input (input length is 0). |
219 | 0 | if (success && (state.string_position <= view_index)) { |
220 | 0 | operations = temp_operations; |
221 | 0 | if (!match_count) { |
222 | | // Nothing was *actually* matched, so append an empty match. |
223 | 0 | append_match(input, state, view_index); |
224 | 0 | ++match_count; |
225 | | |
226 | | // This prevents a regex pattern like ".*" from matching the empty string |
227 | | // multiple times, once in this block and once in the following for loop. |
228 | 0 | if (view_index == 0 && view_length == 0) |
229 | 0 | ++view_index; |
230 | 0 | } |
231 | 0 | } |
232 | 0 | } |
233 | | |
234 | 34.3M | for (; view_index <= view_length; ++view_index) { |
235 | 34.3M | if (view_index == view_length && input.regex_options.has_flag_set(AllFlags::Multiline)) |
236 | 0 | break; |
237 | | |
238 | 34.3M | auto& match_length_minimum = m_pattern->parser_result.match_length_minimum; |
239 | | // FIXME: More performant would be to know the remaining minimum string |
240 | | // length needed to match from the current position onwards within |
241 | | // the vm. Add new OpCode for MinMatchLengthFromSp with the value of |
242 | | // the remaining string length from the current path. The value though |
243 | | // has to be filled in reverse. That implies a second run over bytecode |
244 | | // after generation has finished. |
245 | 34.3M | if (match_length_minimum && match_length_minimum > view_length - view_index) |
246 | 29.7M | break; |
247 | | |
248 | 4.59M | input.column = match_count; |
249 | 4.59M | input.match_index = match_count; |
250 | | |
251 | 4.59M | state.string_position = view_index; |
252 | 4.59M | state.string_position_in_code_units = view_index; |
253 | 4.59M | state.instruction_position = 0; |
254 | 4.59M | state.repetition_marks.clear(); |
255 | | |
256 | 4.59M | auto success = execute(input, state, operations); |
257 | 4.59M | if (success) { |
258 | 1.07M | succeeded = true; |
259 | | |
260 | 1.07M | if (input.regex_options.has_flag_set(AllFlags::MatchNotEndOfLine) && state.string_position == input.view.length()) { |
261 | 0 | if (!continue_search) |
262 | 0 | break; |
263 | 0 | continue; |
264 | 0 | } |
265 | 1.07M | if (input.regex_options.has_flag_set(AllFlags::MatchNotBeginOfLine) && view_index == 0) { |
266 | 0 | if (!continue_search) |
267 | 0 | break; |
268 | 0 | continue; |
269 | 0 | } |
270 | | |
271 | 1.07M | dbgln_if(REGEX_DEBUG, "state.string_position={}, view_index={}", state.string_position, view_index); |
272 | 1.07M | dbgln_if(REGEX_DEBUG, "[match] Found a match (length={}): '{}'", state.string_position - view_index, input.view.substring_view(view_index, state.string_position - view_index)); |
273 | | |
274 | 1.07M | ++match_count; |
275 | | |
276 | 1.07M | if (continue_search) { |
277 | 0 | append_match(input, state, view_index); |
278 | |
|
279 | 0 | bool has_zero_length = state.string_position == view_index; |
280 | 0 | view_index = state.string_position - (has_zero_length ? 0 : 1); |
281 | 0 | if (single_match_only) |
282 | 0 | break; |
283 | 0 | continue; |
284 | 0 | } |
285 | 1.07M | if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) { |
286 | 0 | append_match(input, state, view_index); |
287 | 0 | break; |
288 | 0 | } |
289 | 1.07M | if (state.string_position < view_length) { |
290 | 0 | return { false, 0, {}, {}, {}, operations }; |
291 | 0 | } |
292 | | |
293 | 1.07M | append_match(input, state, view_index); |
294 | 1.07M | break; |
295 | 1.07M | } |
296 | | |
297 | 3.52M | if (!continue_search) |
298 | 3.52M | break; |
299 | 3.52M | } |
300 | | |
301 | 34.3M | ++input.line; |
302 | 34.3M | input.global_offset += view.length() + 1; // +1 includes the line break character |
303 | | |
304 | 34.3M | if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) |
305 | 0 | m_pattern->start_offset = state.string_position; |
306 | | |
307 | 34.3M | if (succeeded && !continue_search) |
308 | 1.07M | break; |
309 | 34.3M | } |
310 | | |
311 | 34.3M | RegexResult result { |
312 | 34.3M | match_count != 0, |
313 | 34.3M | match_count, |
314 | 34.3M | move(state.matches).release(), |
315 | 34.3M | move(state.capture_group_matches).release(), |
316 | 34.3M | operations, |
317 | 34.3M | m_pattern->parser_result.capture_groups_count, |
318 | 34.3M | m_pattern->parser_result.named_capture_groups_count, |
319 | 34.3M | }; |
320 | | |
321 | 34.3M | if (match_count) { |
322 | | // Make sure there are as many capture matches as there are actual matches. |
323 | 1.07M | if (result.capture_group_matches.size() < match_count) |
324 | 0 | result.capture_group_matches.resize(match_count); |
325 | 1.07M | for (auto& matches : result.capture_group_matches) |
326 | 1.07M | matches.resize(m_pattern->parser_result.capture_groups_count + 1); |
327 | 1.07M | if (!input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) { |
328 | 1.07M | for (auto& matches : result.capture_group_matches) |
329 | 5.27M | matches.remove_all_matching([](auto& match) { return match.view.is_null(); });Unexecuted instantiation: auto regex::Matcher<regex::PosixBasicParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const::{lambda(auto:1&)#1}::operator()<regex::Match>(regex::Match&) constUnexecuted instantiation: auto regex::Matcher<regex::PosixExtendedParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const::{lambda(auto:1&)#1}::operator()<regex::Match>(regex::Match&) constauto regex::Matcher<regex::ECMA262Parser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const::{lambda(auto:1&)#1}::operator()<regex::Match>(regex::Match&) constLine | Count | Source | 329 | 5.27M | matches.remove_all_matching([](auto& match) { return match.view.is_null(); }); |
|
330 | 1.07M | } |
331 | 33.2M | } else { |
332 | 33.2M | result.capture_group_matches.clear_with_capacity(); |
333 | 33.2M | } |
334 | | |
335 | 34.3M | return result; |
336 | 34.3M | } Unexecuted instantiation: regex::Matcher<regex::PosixBasicParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const Unexecuted instantiation: regex::Matcher<regex::PosixExtendedParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const regex::Matcher<regex::ECMA262Parser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const Line | Count | Source | 119 | 34.3M | { | 120 | | // If the pattern *itself* isn't stateful, reset any changes to start_offset. | 121 | 34.3M | if (!((AllFlags)m_regex_options.value() & AllFlags::Internal_Stateful)) | 122 | 34.3M | m_pattern->start_offset = 0; | 123 | | | 124 | 34.3M | size_t match_count { 0 }; | 125 | | | 126 | 34.3M | MatchInput input; | 127 | 34.3M | MatchState state; | 128 | 34.3M | size_t operations = 0; | 129 | | | 130 | 34.3M | input.regex_options = m_regex_options | regex_options.value_or({}).value(); | 131 | 34.3M | input.start_offset = m_pattern->start_offset; | 132 | 34.3M | size_t lines_to_skip = 0; | 133 | | | 134 | 34.3M | bool unicode = input.regex_options.has_flag_set(AllFlags::Unicode); | 135 | 34.3M | for (auto const& view : views) | 136 | 34.3M | const_cast<RegexStringView&>(view).set_unicode(unicode); | 137 | | | 138 | 34.3M | if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) { | 139 | 0 | if (views.size() > 1 && input.start_offset > views.first().length()) { | 140 | 0 | dbgln_if(REGEX_DEBUG, "Started with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip); | 141 | 0 | for (auto const& view : views) { | 142 | 0 | if (input.start_offset < view.length() + 1) | 143 | 0 | break; | 144 | 0 | ++lines_to_skip; | 145 | 0 | input.start_offset -= view.length() + 1; | 146 | 0 | input.global_offset += view.length() + 1; | 147 | 0 | } | 148 | 0 | dbgln_if(REGEX_DEBUG, "Ended with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip); | 149 | 0 | } | 150 | 0 | } | 151 | | | 152 | 34.3M | if (c_match_preallocation_count) { | 153 | 0 | state.matches.ensure_capacity(c_match_preallocation_count); | 154 | 0 | state.capture_group_matches.ensure_capacity(c_match_preallocation_count); | 155 | 0 | auto& capture_groups_count = m_pattern->parser_result.capture_groups_count; | 156 | |
| 157 | 0 | for (size_t j = 0; j < c_match_preallocation_count; ++j) { | 158 | 0 | state.matches.empend(); | 159 | 0 | state.capture_group_matches.empend(); | 160 | 0 | state.capture_group_matches.mutable_at(j).ensure_capacity(capture_groups_count); | 161 | 0 | for (size_t k = 0; k < capture_groups_count; ++k) | 162 | 0 | state.capture_group_matches.mutable_at(j).unchecked_append({}); | 163 | 0 | } | 164 | 0 | } | 165 | | | 166 | 34.3M | auto append_match = [](auto& input, auto& state, auto& start_position) { | 167 | 34.3M | if (state.matches.size() == input.match_index) | 168 | 34.3M | state.matches.empend(); | 169 | | | 170 | 34.3M | VERIFY(start_position + state.string_position - start_position <= input.view.length()); | 171 | 34.3M | if (input.regex_options.has_flag_set(AllFlags::StringCopyMatches)) { | 172 | 34.3M | state.matches.mutable_at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position).to_byte_string(), input.line, start_position, input.global_offset + start_position }; | 173 | 34.3M | } else { // let the view point to the original string ... | 174 | 34.3M | state.matches.mutable_at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position), input.line, start_position, input.global_offset + start_position }; | 175 | 34.3M | } | 176 | 34.3M | }; | 177 | | | 178 | | #if REGEX_DEBUG | 179 | | s_regex_dbg.print_header(); | 180 | | #endif | 181 | | | 182 | 34.3M | bool continue_search = input.regex_options.has_flag_set(AllFlags::Global) || input.regex_options.has_flag_set(AllFlags::Multiline); | 183 | 34.3M | if (input.regex_options.has_flag_set(AllFlags::Sticky)) | 184 | 0 | continue_search = false; | 185 | | | 186 | 34.3M | auto single_match_only = input.regex_options.has_flag_set(AllFlags::SingleMatch); | 187 | | | 188 | 34.3M | for (auto const& view : views) { | 189 | 34.3M | if (lines_to_skip != 0) { | 190 | 0 | ++input.line; | 191 | 0 | --lines_to_skip; | 192 | 0 | continue; | 193 | 0 | } | 194 | 34.3M | input.view = view; | 195 | 34.3M | dbgln_if(REGEX_DEBUG, "[match] Starting match with view ({}): _{}_", view.length(), view); | 196 | | | 197 | 34.3M | auto view_length = view.length(); | 198 | 34.3M | size_t view_index = m_pattern->start_offset; | 199 | 34.3M | state.string_position = view_index; | 200 | 34.3M | state.string_position_in_code_units = view_index; | 201 | 34.3M | bool succeeded = false; | 202 | | | 203 | 34.3M | if (view_index == view_length && m_pattern->parser_result.match_length_minimum == 0) { | 204 | | // Run the code until it tries to consume something. | 205 | | // This allows non-consuming code to run on empty strings, for instance | 206 | | // e.g. "Exit" | 207 | 0 | size_t temp_operations = operations; | 208 | |
| 209 | 0 | input.column = match_count; | 210 | 0 | input.match_index = match_count; | 211 | |
| 212 | 0 | state.string_position = view_index; | 213 | 0 | state.string_position_in_code_units = view_index; | 214 | 0 | state.instruction_position = 0; | 215 | 0 | state.repetition_marks.clear(); | 216 | |
| 217 | 0 | auto success = execute(input, state, temp_operations); | 218 | | // This success is acceptable only if it doesn't read anything from the input (input length is 0). | 219 | 0 | if (success && (state.string_position <= view_index)) { | 220 | 0 | operations = temp_operations; | 221 | 0 | if (!match_count) { | 222 | | // Nothing was *actually* matched, so append an empty match. | 223 | 0 | append_match(input, state, view_index); | 224 | 0 | ++match_count; | 225 | | | 226 | | // This prevents a regex pattern like ".*" from matching the empty string | 227 | | // multiple times, once in this block and once in the following for loop. | 228 | 0 | if (view_index == 0 && view_length == 0) | 229 | 0 | ++view_index; | 230 | 0 | } | 231 | 0 | } | 232 | 0 | } | 233 | | | 234 | 34.3M | for (; view_index <= view_length; ++view_index) { | 235 | 34.3M | if (view_index == view_length && input.regex_options.has_flag_set(AllFlags::Multiline)) | 236 | 0 | break; | 237 | | | 238 | 34.3M | auto& match_length_minimum = m_pattern->parser_result.match_length_minimum; | 239 | | // FIXME: More performant would be to know the remaining minimum string | 240 | | // length needed to match from the current position onwards within | 241 | | // the vm. Add new OpCode for MinMatchLengthFromSp with the value of | 242 | | // the remaining string length from the current path. The value though | 243 | | // has to be filled in reverse. That implies a second run over bytecode | 244 | | // after generation has finished. | 245 | 34.3M | if (match_length_minimum && match_length_minimum > view_length - view_index) | 246 | 29.7M | break; | 247 | | | 248 | 4.59M | input.column = match_count; | 249 | 4.59M | input.match_index = match_count; | 250 | | | 251 | 4.59M | state.string_position = view_index; | 252 | 4.59M | state.string_position_in_code_units = view_index; | 253 | 4.59M | state.instruction_position = 0; | 254 | 4.59M | state.repetition_marks.clear(); | 255 | | | 256 | 4.59M | auto success = execute(input, state, operations); | 257 | 4.59M | if (success) { | 258 | 1.07M | succeeded = true; | 259 | | | 260 | 1.07M | if (input.regex_options.has_flag_set(AllFlags::MatchNotEndOfLine) && state.string_position == input.view.length()) { | 261 | 0 | if (!continue_search) | 262 | 0 | break; | 263 | 0 | continue; | 264 | 0 | } | 265 | 1.07M | if (input.regex_options.has_flag_set(AllFlags::MatchNotBeginOfLine) && view_index == 0) { | 266 | 0 | if (!continue_search) | 267 | 0 | break; | 268 | 0 | continue; | 269 | 0 | } | 270 | | | 271 | 1.07M | dbgln_if(REGEX_DEBUG, "state.string_position={}, view_index={}", state.string_position, view_index); | 272 | 1.07M | dbgln_if(REGEX_DEBUG, "[match] Found a match (length={}): '{}'", state.string_position - view_index, input.view.substring_view(view_index, state.string_position - view_index)); | 273 | | | 274 | 1.07M | ++match_count; | 275 | | | 276 | 1.07M | if (continue_search) { | 277 | 0 | append_match(input, state, view_index); | 278 | |
| 279 | 0 | bool has_zero_length = state.string_position == view_index; | 280 | 0 | view_index = state.string_position - (has_zero_length ? 0 : 1); | 281 | 0 | if (single_match_only) | 282 | 0 | break; | 283 | 0 | continue; | 284 | 0 | } | 285 | 1.07M | if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) { | 286 | 0 | append_match(input, state, view_index); | 287 | 0 | break; | 288 | 0 | } | 289 | 1.07M | if (state.string_position < view_length) { | 290 | 0 | return { false, 0, {}, {}, {}, operations }; | 291 | 0 | } | 292 | | | 293 | 1.07M | append_match(input, state, view_index); | 294 | 1.07M | break; | 295 | 1.07M | } | 296 | | | 297 | 3.52M | if (!continue_search) | 298 | 3.52M | break; | 299 | 3.52M | } | 300 | | | 301 | 34.3M | ++input.line; | 302 | 34.3M | input.global_offset += view.length() + 1; // +1 includes the line break character | 303 | | | 304 | 34.3M | if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) | 305 | 0 | m_pattern->start_offset = state.string_position; | 306 | | | 307 | 34.3M | if (succeeded && !continue_search) | 308 | 1.07M | break; | 309 | 34.3M | } | 310 | | | 311 | 34.3M | RegexResult result { | 312 | 34.3M | match_count != 0, | 313 | 34.3M | match_count, | 314 | 34.3M | move(state.matches).release(), | 315 | 34.3M | move(state.capture_group_matches).release(), | 316 | 34.3M | operations, | 317 | 34.3M | m_pattern->parser_result.capture_groups_count, | 318 | 34.3M | m_pattern->parser_result.named_capture_groups_count, | 319 | 34.3M | }; | 320 | | | 321 | 34.3M | if (match_count) { | 322 | | // Make sure there are as many capture matches as there are actual matches. | 323 | 1.07M | if (result.capture_group_matches.size() < match_count) | 324 | 0 | result.capture_group_matches.resize(match_count); | 325 | 1.07M | for (auto& matches : result.capture_group_matches) | 326 | 1.07M | matches.resize(m_pattern->parser_result.capture_groups_count + 1); | 327 | 1.07M | if (!input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) { | 328 | 1.07M | for (auto& matches : result.capture_group_matches) | 329 | 1.07M | matches.remove_all_matching([](auto& match) { return match.view.is_null(); }); | 330 | 1.07M | } | 331 | 33.2M | } else { | 332 | 33.2M | result.capture_group_matches.clear_with_capacity(); | 333 | 33.2M | } | 334 | | | 335 | 34.3M | return result; | 336 | 34.3M | } |
|
337 | | |
338 | | template<typename T> |
339 | | class BumpAllocatedLinkedList { |
340 | | public: |
341 | 4.59M | BumpAllocatedLinkedList() = default; |
342 | | |
343 | | ALWAYS_INLINE void append(T value) |
344 | 60.9M | { |
345 | 60.9M | auto node_ptr = m_allocator.allocate(move(value)); |
346 | 60.9M | VERIFY(node_ptr); |
347 | | |
348 | 60.9M | if (!m_first) { |
349 | 10.9M | m_first = node_ptr; |
350 | 10.9M | m_last = node_ptr; |
351 | 10.9M | return; |
352 | 10.9M | } |
353 | | |
354 | 50.0M | node_ptr->previous = m_last; |
355 | 50.0M | m_last->next = node_ptr; |
356 | 50.0M | m_last = node_ptr; |
357 | 50.0M | } |
358 | | |
359 | | ALWAYS_INLINE T take_last() |
360 | 21.6M | { |
361 | 21.6M | VERIFY(m_last); |
362 | 21.6M | T value = move(m_last->value); |
363 | 21.6M | if (m_last == m_first) { |
364 | 10.9M | m_last = nullptr; |
365 | 10.9M | m_first = nullptr; |
366 | 10.9M | } else { |
367 | 10.7M | m_last = m_last->previous; |
368 | 10.7M | m_last->next = nullptr; |
369 | 10.7M | } |
370 | 21.6M | return value; |
371 | 21.6M | } |
372 | | |
373 | | ALWAYS_INLINE T& last() |
374 | 114M | { |
375 | 114M | return m_last->value; |
376 | 114M | } |
377 | | |
378 | | ALWAYS_INLINE bool is_empty() const |
379 | 25.2M | { |
380 | 25.2M | return m_first == nullptr; |
381 | 25.2M | } |
382 | | |
383 | 55.2k | auto reverse_begin() { return ReverseIterator(m_last); } |
384 | 321M | auto reverse_end() { return ReverseIterator(); } |
385 | | |
386 | | private: |
387 | | struct Node { |
388 | | T value; |
389 | | Node* next { nullptr }; |
390 | | Node* previous { nullptr }; |
391 | | }; |
392 | | |
393 | | struct ReverseIterator { |
394 | | ReverseIterator() = default; |
395 | | explicit ReverseIterator(Node* node) |
396 | 55.2k | : m_node(node) |
397 | 55.2k | { |
398 | 55.2k | } |
399 | | |
400 | 321M | T* operator->() { return &m_node->value; } |
401 | 223 | T& operator*() { return m_node->value; } |
402 | 321M | bool operator==(ReverseIterator const& it) const { return m_node == it.m_node; } |
403 | | ReverseIterator& operator++() |
404 | 321M | { |
405 | 321M | if (m_node) |
406 | 321M | m_node = m_node->previous; |
407 | 321M | return *this; |
408 | 321M | } |
409 | | |
410 | | private: |
411 | | Node* m_node; |
412 | | }; |
413 | | |
414 | | UniformBumpAllocator<Node, true, 2 * MiB> m_allocator; |
415 | | Node* m_first { nullptr }; |
416 | | Node* m_last { nullptr }; |
417 | | }; |
418 | | |
419 | | template<class Parser> |
420 | | bool Matcher<Parser>::execute(MatchInput const& input, MatchState& state, size_t& operations) const |
421 | 4.59M | { |
422 | 4.59M | if (m_pattern->parser_result.optimization_data.pure_substring_search.has_value() && input.view.is_string_view()) { |
423 | | // Yay, we can do a simple substring search! |
424 | 0 | auto& needle = m_pattern->parser_result.optimization_data.pure_substring_search.value(); |
425 | 0 | if (needle.length() + state.string_position > input.view.length()) |
426 | 0 | return false; |
427 | | |
428 | 0 | auto haystack = input.view.string_view().substring_view(state.string_position); |
429 | 0 | if (input.regex_options.has_flag_set(AllFlags::Insensitive)) { |
430 | 0 | if (!haystack.substring_view(0, needle.length()).equals_ignoring_ascii_case(needle)) |
431 | 0 | return false; |
432 | 0 | } else { |
433 | 0 | if (!haystack.starts_with(needle)) |
434 | 0 | return false; |
435 | 0 | } |
436 | | |
437 | 0 | state.string_position += needle.length(); |
438 | 0 | state.string_position_in_code_units += needle.length(); |
439 | 0 | return true; |
440 | 0 | } |
441 | | |
442 | 4.59M | BumpAllocatedLinkedList<MatchState> states_to_try_next; |
443 | | #if REGEX_DEBUG |
444 | | size_t recursion_level = 0; |
445 | | #endif |
446 | | |
447 | 4.59M | auto& bytecode = m_pattern->parser_result.bytecode; |
448 | | |
449 | 278M | for (;;) { |
450 | 278M | auto& opcode = bytecode.get_opcode(state); |
451 | 278M | ++operations; |
452 | | |
453 | | #if REGEX_DEBUG |
454 | | s_regex_dbg.print_opcode("VM", opcode, state, recursion_level, false); |
455 | | #endif |
456 | | |
457 | 278M | ExecutionResult result; |
458 | 278M | if (input.fail_counter > 0) { |
459 | 0 | --input.fail_counter; |
460 | 0 | result = ExecutionResult::Failed_ExecuteLowPrioForks; |
461 | 278M | } else { |
462 | 278M | result = opcode.execute(input, state); |
463 | 278M | } |
464 | | |
465 | | #if REGEX_DEBUG |
466 | | s_regex_dbg.print_result(opcode, bytecode, input, state, result); |
467 | | #endif |
468 | | |
469 | 278M | state.instruction_position += opcode.size(); |
470 | | |
471 | 278M | switch (result) { |
472 | 54.0M | case ExecutionResult::Fork_PrioLow: { |
473 | 54.0M | bool found = false; |
474 | 54.0M | if (input.fork_to_replace.has_value()) { |
475 | 321M | for (auto it = states_to_try_next.reverse_begin(); it != states_to_try_next.reverse_end(); ++it) { |
476 | 321M | if (it->initiating_fork == input.fork_to_replace.value()) { |
477 | 223 | (*it) = state; |
478 | 223 | it->instruction_position = state.fork_at_position; |
479 | 223 | it->initiating_fork = *input.fork_to_replace; |
480 | 223 | found = true; |
481 | 223 | break; |
482 | 223 | } |
483 | 321M | } |
484 | 55.2k | input.fork_to_replace.clear(); |
485 | 55.2k | } |
486 | 54.0M | if (!found) { |
487 | 54.0M | states_to_try_next.append(state); |
488 | 54.0M | states_to_try_next.last().initiating_fork = state.instruction_position - opcode.size(); |
489 | 54.0M | states_to_try_next.last().instruction_position = state.fork_at_position; |
490 | 54.0M | } |
491 | 54.0M | continue; |
492 | 0 | } |
493 | 6.94M | case ExecutionResult::Fork_PrioHigh: { |
494 | 6.94M | bool found = false; |
495 | 6.94M | if (input.fork_to_replace.has_value()) { |
496 | 0 | for (auto it = states_to_try_next.reverse_begin(); it != states_to_try_next.reverse_end(); ++it) { |
497 | 0 | if (it->initiating_fork == input.fork_to_replace.value()) { |
498 | 0 | (*it) = state; |
499 | 0 | it->initiating_fork = *input.fork_to_replace; |
500 | 0 | found = true; |
501 | 0 | break; |
502 | 0 | } |
503 | 0 | } |
504 | 0 | input.fork_to_replace.clear(); |
505 | 0 | } |
506 | 6.94M | if (!found) { |
507 | 6.94M | states_to_try_next.append(state); |
508 | 6.94M | states_to_try_next.last().initiating_fork = state.instruction_position - opcode.size(); |
509 | 6.94M | } |
510 | 6.94M | state.instruction_position = state.fork_at_position; |
511 | | #if REGEX_DEBUG |
512 | | ++recursion_level; |
513 | | #endif |
514 | 6.94M | continue; |
515 | 0 | } |
516 | 190M | case ExecutionResult::Continue: |
517 | 190M | continue; |
518 | 1.07M | case ExecutionResult::Succeeded: |
519 | 1.07M | return true; |
520 | 0 | case ExecutionResult::Failed: |
521 | 0 | if (!states_to_try_next.is_empty()) { |
522 | 0 | state = states_to_try_next.take_last(); |
523 | 0 | continue; |
524 | 0 | } |
525 | 0 | return false; |
526 | 25.2M | case ExecutionResult::Failed_ExecuteLowPrioForks: { |
527 | 25.2M | if (states_to_try_next.is_empty()) { |
528 | 3.52M | return false; |
529 | 3.52M | } |
530 | 21.6M | state = states_to_try_next.take_last(); |
531 | | #if REGEX_DEBUG |
532 | | ++recursion_level; |
533 | | #endif |
534 | 21.6M | continue; |
535 | 25.2M | } |
536 | 278M | } |
537 | 278M | } |
538 | | |
539 | 0 | VERIFY_NOT_REACHED(); |
540 | 0 | } Unexecuted instantiation: regex::Matcher<regex::PosixBasicParser>::execute(regex::MatchInput const&, regex::MatchState&, unsigned long&) const Unexecuted instantiation: regex::Matcher<regex::PosixExtendedParser>::execute(regex::MatchInput const&, regex::MatchState&, unsigned long&) const regex::Matcher<regex::ECMA262Parser>::execute(regex::MatchInput const&, regex::MatchState&, unsigned long&) const Line | Count | Source | 421 | 4.59M | { | 422 | 4.59M | if (m_pattern->parser_result.optimization_data.pure_substring_search.has_value() && input.view.is_string_view()) { | 423 | | // Yay, we can do a simple substring search! | 424 | 0 | auto& needle = m_pattern->parser_result.optimization_data.pure_substring_search.value(); | 425 | 0 | if (needle.length() + state.string_position > input.view.length()) | 426 | 0 | return false; | 427 | | | 428 | 0 | auto haystack = input.view.string_view().substring_view(state.string_position); | 429 | 0 | if (input.regex_options.has_flag_set(AllFlags::Insensitive)) { | 430 | 0 | if (!haystack.substring_view(0, needle.length()).equals_ignoring_ascii_case(needle)) | 431 | 0 | return false; | 432 | 0 | } else { | 433 | 0 | if (!haystack.starts_with(needle)) | 434 | 0 | return false; | 435 | 0 | } | 436 | | | 437 | 0 | state.string_position += needle.length(); | 438 | 0 | state.string_position_in_code_units += needle.length(); | 439 | 0 | return true; | 440 | 0 | } | 441 | | | 442 | 4.59M | BumpAllocatedLinkedList<MatchState> states_to_try_next; | 443 | | #if REGEX_DEBUG | 444 | | size_t recursion_level = 0; | 445 | | #endif | 446 | | | 447 | 4.59M | auto& bytecode = m_pattern->parser_result.bytecode; | 448 | | | 449 | 278M | for (;;) { | 450 | 278M | auto& opcode = bytecode.get_opcode(state); | 451 | 278M | ++operations; | 452 | | | 453 | | #if REGEX_DEBUG | 454 | | s_regex_dbg.print_opcode("VM", opcode, state, recursion_level, false); | 455 | | #endif | 456 | | | 457 | 278M | ExecutionResult result; | 458 | 278M | if (input.fail_counter > 0) { | 459 | 0 | --input.fail_counter; | 460 | 0 | result = ExecutionResult::Failed_ExecuteLowPrioForks; | 461 | 278M | } else { | 462 | 278M | result = opcode.execute(input, state); | 463 | 278M | } | 464 | | | 465 | | #if REGEX_DEBUG | 466 | | s_regex_dbg.print_result(opcode, bytecode, input, state, result); | 467 | | #endif | 468 | | | 469 | 278M | state.instruction_position += opcode.size(); | 470 | | | 471 | 278M | switch (result) { | 472 | 54.0M | case ExecutionResult::Fork_PrioLow: { | 473 | 54.0M | bool found = false; | 474 | 54.0M | if (input.fork_to_replace.has_value()) { | 475 | 321M | for (auto it = states_to_try_next.reverse_begin(); it != states_to_try_next.reverse_end(); ++it) { | 476 | 321M | if (it->initiating_fork == input.fork_to_replace.value()) { | 477 | 223 | (*it) = state; | 478 | 223 | it->instruction_position = state.fork_at_position; | 479 | 223 | it->initiating_fork = *input.fork_to_replace; | 480 | 223 | found = true; | 481 | 223 | break; | 482 | 223 | } | 483 | 321M | } | 484 | 55.2k | input.fork_to_replace.clear(); | 485 | 55.2k | } | 486 | 54.0M | if (!found) { | 487 | 54.0M | states_to_try_next.append(state); | 488 | 54.0M | states_to_try_next.last().initiating_fork = state.instruction_position - opcode.size(); | 489 | 54.0M | states_to_try_next.last().instruction_position = state.fork_at_position; | 490 | 54.0M | } | 491 | 54.0M | continue; | 492 | 0 | } | 493 | 6.94M | case ExecutionResult::Fork_PrioHigh: { | 494 | 6.94M | bool found = false; | 495 | 6.94M | if (input.fork_to_replace.has_value()) { | 496 | 0 | for (auto it = states_to_try_next.reverse_begin(); it != states_to_try_next.reverse_end(); ++it) { | 497 | 0 | if (it->initiating_fork == input.fork_to_replace.value()) { | 498 | 0 | (*it) = state; | 499 | 0 | it->initiating_fork = *input.fork_to_replace; | 500 | 0 | found = true; | 501 | 0 | break; | 502 | 0 | } | 503 | 0 | } | 504 | 0 | input.fork_to_replace.clear(); | 505 | 0 | } | 506 | 6.94M | if (!found) { | 507 | 6.94M | states_to_try_next.append(state); | 508 | 6.94M | states_to_try_next.last().initiating_fork = state.instruction_position - opcode.size(); | 509 | 6.94M | } | 510 | 6.94M | state.instruction_position = state.fork_at_position; | 511 | | #if REGEX_DEBUG | 512 | | ++recursion_level; | 513 | | #endif | 514 | 6.94M | continue; | 515 | 0 | } | 516 | 190M | case ExecutionResult::Continue: | 517 | 190M | continue; | 518 | 1.07M | case ExecutionResult::Succeeded: | 519 | 1.07M | return true; | 520 | 0 | case ExecutionResult::Failed: | 521 | 0 | if (!states_to_try_next.is_empty()) { | 522 | 0 | state = states_to_try_next.take_last(); | 523 | 0 | continue; | 524 | 0 | } | 525 | 0 | return false; | 526 | 25.2M | case ExecutionResult::Failed_ExecuteLowPrioForks: { | 527 | 25.2M | if (states_to_try_next.is_empty()) { | 528 | 3.52M | return false; | 529 | 3.52M | } | 530 | 21.6M | state = states_to_try_next.take_last(); | 531 | | #if REGEX_DEBUG | 532 | | ++recursion_level; | 533 | | #endif | 534 | 21.6M | continue; | 535 | 25.2M | } | 536 | 278M | } | 537 | 278M | } | 538 | | | 539 | 0 | VERIFY_NOT_REACHED(); | 540 | 0 | } |
|
541 | | |
542 | | template class Matcher<PosixBasicParser>; |
543 | | template class Regex<PosixBasicParser>; |
544 | | |
545 | | template class Matcher<PosixExtendedParser>; |
546 | | template class Regex<PosixExtendedParser>; |
547 | | |
548 | | template class Matcher<ECMA262Parser>; |
549 | | template class Regex<ECMA262Parser>; |
550 | | } |