Coverage Report

Created: 2025-03-04 07:22

/src/serenity/Userland/Libraries/LibRegex/RegexMatcher.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#pragma once
8
9
#include "RegexByteCode.h"
10
#include "RegexMatch.h"
11
#include "RegexOptions.h"
12
#include "RegexParser.h"
13
14
#include <AK/Forward.h>
15
#include <AK/GenericLexer.h>
16
#include <AK/HashMap.h>
17
#include <AK/Types.h>
18
#include <AK/Utf32View.h>
19
#include <AK/Vector.h>
20
#include <ctype.h>
21
22
#include <stdio.h>
23
24
namespace regex {
25
26
namespace Detail {
27
28
struct Block {
29
    size_t start;
30
    size_t end;
31
    StringView comment { "N/A"sv };
32
};
33
34
}
35
36
static constexpr size_t const c_max_recursion = 5000;
37
static constexpr size_t const c_match_preallocation_count = 0;
38
39
struct RegexResult final {
40
    bool success { false };
41
    size_t count { 0 };
42
    Vector<Match> matches;
43
    Vector<Vector<Match>> capture_group_matches;
44
    size_t n_operations { 0 };
45
    size_t n_capture_groups { 0 };
46
    size_t n_named_capture_groups { 0 };
47
};
48
49
template<class Parser>
50
class Regex;
51
52
template<class Parser>
53
class Matcher final {
54
55
public:
56
    Matcher(Regex<Parser> const* pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
57
13.2k
        : m_pattern(pattern)
58
13.2k
        , m_regex_options(regex_options.value_or({}))
59
13.2k
    {
60
13.2k
    }
regex::Matcher<regex::PosixBasicParser>::Matcher(regex::Regex<regex::PosixBasicParser> const*, AK::Optional<regex::RegexOptions<regex::PosixFlags> >)
Line
Count
Source
57
131
        : m_pattern(pattern)
58
131
        , m_regex_options(regex_options.value_or({}))
59
131
    {
60
131
    }
regex::Matcher<regex::PosixExtendedParser>::Matcher(regex::Regex<regex::PosixExtendedParser> const*, AK::Optional<regex::RegexOptions<regex::PosixFlags> >)
Line
Count
Source
57
234
        : m_pattern(pattern)
58
234
        , m_regex_options(regex_options.value_or({}))
59
234
    {
60
234
    }
regex::Matcher<regex::ECMA262Parser>::Matcher(regex::Regex<regex::ECMA262Parser> const*, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >)
Line
Count
Source
57
12.8k
        : m_pattern(pattern)
58
12.8k
        , m_regex_options(regex_options.value_or({}))
59
12.8k
    {
60
12.8k
    }
61
    ~Matcher() = default;
62
63
    RegexResult match(RegexStringView, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
64
    RegexResult match(Vector<RegexStringView> const&, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
65
66
    typename ParserTraits<Parser>::OptionsType options() const
67
0
    {
68
0
        return m_regex_options;
69
0
    }
Unexecuted instantiation: regex::Matcher<regex::PosixBasicParser>::options() const
Unexecuted instantiation: regex::Matcher<regex::PosixExtendedParser>::options() const
Unexecuted instantiation: regex::Matcher<regex::ECMA262Parser>::options() const
70
71
    void reset_pattern(Badge<Regex<Parser>>, Regex<Parser> const* pattern)
72
38.5k
    {
73
38.5k
        m_pattern = pattern;
74
38.5k
    }
Unexecuted instantiation: regex::Matcher<regex::PosixBasicParser>::reset_pattern(AK::Badge<regex::Regex<regex::PosixBasicParser> >, regex::Regex<regex::PosixBasicParser> const*)
Unexecuted instantiation: regex::Matcher<regex::PosixExtendedParser>::reset_pattern(AK::Badge<regex::Regex<regex::PosixExtendedParser> >, regex::Regex<regex::PosixExtendedParser> const*)
regex::Matcher<regex::ECMA262Parser>::reset_pattern(AK::Badge<regex::Regex<regex::ECMA262Parser> >, regex::Regex<regex::ECMA262Parser> const*)
Line
Count
Source
72
38.5k
    {
73
38.5k
        m_pattern = pattern;
74
38.5k
    }
75
76
private:
77
    bool execute(MatchInput const& input, MatchState& state, size_t& operations) const;
78
79
    Regex<Parser> const* m_pattern;
80
    typename ParserTraits<Parser>::OptionsType const m_regex_options;
81
};
82
83
template<class Parser>
84
class Regex final {
85
public:
86
    ByteString pattern_value;
87
    regex::Parser::Result parser_result;
88
    OwnPtr<Matcher<Parser>> matcher { nullptr };
89
    mutable size_t start_offset { 0 };
90
91
    static regex::Parser::Result parse_pattern(StringView pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
92
93
    explicit Regex(ByteString pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
94
    Regex(regex::Parser::Result parse_result, ByteString pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
95
56.8k
    ~Regex() = default;
regex::Regex<regex::ECMA262Parser>::~Regex()
Line
Count
Source
95
55.9k
    ~Regex() = default;
regex::Regex<regex::PosixBasicParser>::~Regex()
Line
Count
Source
95
218
    ~Regex() = default;
regex::Regex<regex::PosixExtendedParser>::~Regex()
Line
Count
Source
95
662
    ~Regex() = default;
96
    Regex(Regex&&);
97
    Regex& operator=(Regex&&);
98
99
    typename ParserTraits<Parser>::OptionsType options() const;
100
    ByteString error_string(Optional<ByteString> message = {}) const;
101
102
    RegexResult match(RegexStringView view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
103
35.0M
    {
104
35.0M
        if (!matcher || parser_result.error != Error::NoError)
105
0
            return {};
106
35.0M
        return matcher->match(view, regex_options);
107
35.0M
    }
regex::Regex<regex::ECMA262Parser>::match(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const
Line
Count
Source
103
35.0M
    {
104
35.0M
        if (!matcher || parser_result.error != Error::NoError)
105
0
            return {};
106
35.0M
        return matcher->match(view, regex_options);
107
35.0M
    }
Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::match(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::match(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
108
109
    RegexResult match(Vector<RegexStringView> const& views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
110
0
    {
111
0
        if (!matcher || parser_result.error != Error::NoError)
112
0
            return {};
113
0
        return matcher->match(views, regex_options);
114
0
    }
Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const
115
116
    ByteString replace(RegexStringView view, StringView replacement_pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
117
0
    {
118
0
        if (!matcher || parser_result.error != Error::NoError)
119
0
            return {};
120
121
0
        StringBuilder builder;
122
0
        size_t start_offset = 0;
123
0
        RegexResult result = matcher->match(view, regex_options);
124
0
        if (!result.success)
125
0
            return view.to_byte_string();
126
127
0
        for (size_t i = 0; i < result.matches.size(); ++i) {
128
0
            auto& match = result.matches[i];
129
0
            builder.append(view.substring_view(start_offset, match.global_offset - start_offset).to_byte_string());
130
0
            start_offset = match.global_offset + match.view.length();
131
0
            GenericLexer lexer(replacement_pattern);
132
0
            while (!lexer.is_eof()) {
133
0
                if (lexer.consume_specific('\\')) {
134
0
                    if (lexer.consume_specific('\\')) {
135
0
                        builder.append('\\');
136
0
                        continue;
137
0
                    }
138
0
                    auto number = lexer.consume_while(isdigit);
139
0
                    if (auto index = number.to_number<unsigned>(); index.has_value() && result.n_capture_groups >= index.value()) {
140
0
                        builder.append(result.capture_group_matches[i][index.value() - 1].view.to_byte_string());
141
0
                    } else {
142
0
                        builder.appendff("\\{}", number);
143
0
                    }
144
0
                } else {
145
0
                    builder.append(lexer.consume_while([](auto ch) { return ch != '\\'; }));
Unexecuted instantiation: auto regex::Regex<regex::PosixBasicParser>::replace(regex::RegexStringView, AK::StringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const::{lambda(auto:1)#1}::operator()<char>(char) const
Unexecuted instantiation: auto regex::Regex<regex::PosixExtendedParser>::replace(regex::RegexStringView, AK::StringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const::{lambda(auto:1)#1}::operator()<char>(char) const
Unexecuted instantiation: auto regex::Regex<regex::ECMA262Parser>::replace(regex::RegexStringView, AK::StringView, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const::{lambda(auto:1)#1}::operator()<char>(char) const
146
0
                }
147
0
            }
148
0
        }
149
150
0
        builder.append(view.substring_view(start_offset, view.length() - start_offset).to_byte_string());
151
152
0
        return builder.to_byte_string();
153
0
    }
Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::replace(regex::RegexStringView, AK::StringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::replace(regex::RegexStringView, AK::StringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::replace(regex::RegexStringView, AK::StringView, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const
154
155
    // FIXME: replace(Vector<RegexStringView> const , ...)
156
157
    RegexResult search(RegexStringView view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
158
0
    {
159
0
        if (!matcher || parser_result.error != Error::NoError)
160
0
            return {};
161
162
0
        AllOptions options = (AllOptions)regex_options.value_or({});
163
0
        if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
164
0
            options.reset_flag(AllFlags::MatchNotEndOfLine);
165
0
            options.reset_flag(AllFlags::MatchNotBeginOfLine);
166
0
        }
167
0
        options.reset_flag(AllFlags::Internal_Stateful);
168
0
        options |= AllFlags::Global;
169
170
0
        return matcher->match(view, options);
171
0
    }
Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::search(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::search(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::search(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const
172
173
    RegexResult search(Vector<RegexStringView> const& views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
174
0
    {
175
0
        if (!matcher || parser_result.error != Error::NoError)
176
0
            return {};
177
178
0
        AllOptions options = (AllOptions)regex_options.value_or({});
179
0
        if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
180
0
            options.reset_flag(AllFlags::MatchNotEndOfLine);
181
0
            options.reset_flag(AllFlags::MatchNotBeginOfLine);
182
0
        }
183
0
        options.reset_flag(AllFlags::Internal_Stateful);
184
0
        options |= AllFlags::Global;
185
186
0
        return matcher->match(views, options);
187
0
    }
Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::search(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::search(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::search(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const
188
189
    bool match(RegexStringView view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
190
0
    {
191
0
        m = match(view, regex_options);
192
0
        return m.success;
193
0
    }
Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::match(regex::RegexStringView, regex::RegexResult&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::match(regex::RegexStringView, regex::RegexResult&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::match(regex::RegexStringView, regex::RegexResult&, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const
194
195
    bool match(Vector<RegexStringView> const& views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
196
0
    {
197
0
        m = match(views, regex_options);
198
0
        return m.success;
199
0
    }
Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, regex::RegexResult&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, regex::RegexResult&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::match(AK::Vector<regex::RegexStringView, 0ul> const&, regex::RegexResult&, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const
200
201
    bool search(RegexStringView view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
202
0
    {
203
0
        m = search(view, regex_options);
204
0
        return m.success;
205
0
    }
Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::search(regex::RegexStringView, regex::RegexResult&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::search(regex::RegexStringView, regex::RegexResult&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::search(regex::RegexStringView, regex::RegexResult&, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const
206
207
    bool search(Vector<RegexStringView> const& views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
208
0
    {
209
0
        m = search(views, regex_options);
210
0
        return m.success;
211
0
    }
Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::search(AK::Vector<regex::RegexStringView, 0ul> const&, regex::RegexResult&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::search(AK::Vector<regex::RegexStringView, 0ul> const&, regex::RegexResult&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::search(AK::Vector<regex::RegexStringView, 0ul> const&, regex::RegexResult&, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const
212
213
    bool has_match(RegexStringView view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
214
0
    {
215
0
        if (!matcher || parser_result.error != Error::NoError)
216
0
            return false;
217
0
        RegexResult result = matcher->match(view, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
218
0
        return result.success;
219
0
    }
Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::has_match(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::has_match(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::has_match(regex::RegexStringView, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const
220
221
    bool has_match(Vector<RegexStringView> const& views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
222
0
    {
223
0
        if (!matcher || parser_result.error != Error::NoError)
224
0
            return false;
225
0
        RegexResult result = matcher->match(views, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
226
0
        return result.success;
227
0
    }
Unexecuted instantiation: regex::Regex<regex::PosixBasicParser>::has_match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::PosixExtendedParser>::has_match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::PosixFlags> >) const
Unexecuted instantiation: regex::Regex<regex::ECMA262Parser>::has_match(AK::Vector<regex::RegexStringView, 0ul> const&, AK::Optional<regex::RegexOptions<regex::ECMAScriptFlags> >) const
228
229
    using BasicBlockList = Vector<Detail::Block>;
230
    static BasicBlockList split_basic_blocks(ByteCode const&);
231
232
private:
233
    void run_optimization_passes();
234
    void attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&);
235
    bool attempt_rewrite_entire_match_as_substring_search(BasicBlockList const&);
236
};
237
238
// free standing functions for match, search and has_match
239
template<class Parser>
240
RegexResult match(RegexStringView view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
241
{
242
    return pattern.match(view, regex_options);
243
}
244
245
template<class Parser>
246
RegexResult match(Vector<RegexStringView> const& view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
247
{
248
    return pattern.match(view, regex_options);
249
}
250
251
template<class Parser>
252
bool match(RegexStringView view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
253
{
254
    return pattern.match(view, regex_options);
255
}
256
257
template<class Parser>
258
bool match(Vector<RegexStringView> const& view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
259
{
260
    return pattern.match(view, regex_options);
261
}
262
263
template<class Parser>
264
RegexResult search(RegexStringView view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
265
{
266
    return pattern.search(view, regex_options);
267
}
268
269
template<class Parser>
270
RegexResult search(Vector<RegexStringView> const& views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
271
{
272
    return pattern.search(views, regex_options);
273
}
274
275
template<class Parser>
276
bool search(RegexStringView view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
277
{
278
    return pattern.search(view, regex_options);
279
}
280
281
template<class Parser>
282
bool search(Vector<RegexStringView> const& views, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
283
{
284
    return pattern.search(views, regex_options);
285
}
286
287
template<class Parser>
288
bool has_match(RegexStringView view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
289
{
290
    return pattern.has_match(view, regex_options);
291
}
292
293
template<class Parser>
294
bool has_match(Vector<RegexStringView> const& views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
295
{
296
    return pattern.has_match(views, regex_options);
297
}
298
}
299
300
using regex::has_match;
301
using regex::match;
302
using regex::Regex;
303
using regex::RegexResult;