Coverage Report

Created: 2026-02-14 08:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/Userland/Libraries/LibXML/Parser/Parser.h
Line
Count
Source
1
/*
2
 * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
3
 *
4
 * SPDX-License-Identifier: BSD-2-Clause
5
 */
6
7
#pragma once
8
9
#include <AK/ByteString.h>
10
#include <AK/Debug.h>
11
#include <AK/Function.h>
12
#include <AK/GenericLexer.h>
13
#include <AK/HashMap.h>
14
#include <AK/OwnPtr.h>
15
#include <AK/SourceLocation.h>
16
#include <AK/TemporaryChange.h>
17
#include <LibXML/DOM/Document.h>
18
#include <LibXML/DOM/DocumentTypeDeclaration.h>
19
#include <LibXML/DOM/Node.h>
20
#include <LibXML/Forward.h>
21
22
namespace XML {
23
24
struct Expectation {
25
    StringView expected;
26
};
27
28
struct ParseError {
29
    LineTrackingLexer::Position position {};
30
    Variant<ByteString, Expectation> error;
31
};
32
33
struct Listener {
34
0
    virtual ~Listener() { }
35
36
0
    virtual void set_source(ByteString) { }
37
0
    virtual void set_doctype(XML::Doctype) { }
38
0
    virtual void document_start() { }
39
0
    virtual void document_end() { }
40
0
    virtual void element_start(Name const&, HashMap<Name, ByteString> const&) { }
41
0
    virtual void element_end(Name const&) { }
42
0
    virtual void text(StringView) { }
43
0
    virtual void comment(StringView) { }
44
0
    virtual void error(ParseError const&) { }
45
};
46
47
class Parser {
48
public:
49
    struct Options {
50
        bool preserve_cdata { true };
51
        bool preserve_comments { false };
52
        bool treat_errors_as_fatal { true };
53
        Function<ErrorOr<Variant<ByteString, Vector<MarkupDeclaration>>>(SystemID const&, Optional<PublicID> const&)> resolve_external_resource {};
54
    };
55
56
    Parser(StringView source, Options options)
57
        : m_source(source)
58
        , m_lexer(source)
59
        , m_options(move(options))
60
0
    {
61
0
    }
62
63
    explicit Parser(StringView source)
64
8.57k
        : m_source(source)
65
8.57k
        , m_lexer(source)
66
8.57k
    {
67
8.57k
    }
68
69
    ErrorOr<Document, ParseError> parse();
70
    ErrorOr<void, ParseError> parse_with_listener(Listener&);
71
72
0
    Vector<ParseError> const& parse_error_causes() const { return m_parse_errors; }
73
74
    ErrorOr<Vector<MarkupDeclaration>, ParseError> parse_external_subset();
75
76
private:
77
    struct EntityReference {
78
        Name name;
79
    };
80
81
    ErrorOr<void, ParseError> parse_internal();
82
    void append_node(NonnullOwnPtr<Node>);
83
    void append_text(StringView, LineTrackingLexer::Position);
84
    void append_comment(StringView, LineTrackingLexer::Position);
85
    void enter_node(Node&);
86
    void leave_node();
87
88
    enum class ReferencePlacement {
89
        AttributeValue,
90
        Content,
91
    };
92
    ErrorOr<ByteString, ParseError> resolve_reference(EntityReference const&, ReferencePlacement);
93
94
    enum class Required {
95
        No,
96
        Yes,
97
    };
98
    ErrorOr<void, ParseError> skip_whitespace(Required = Required::No);
99
100
    ErrorOr<void, ParseError> parse_prolog();
101
    ErrorOr<void, ParseError> parse_element();
102
    ErrorOr<void, ParseError> parse_misc();
103
    ErrorOr<void, ParseError> parse_xml_decl();
104
    ErrorOr<void, ParseError> parse_doctype_decl();
105
    ErrorOr<void, ParseError> parse_version_info();
106
    ErrorOr<void, ParseError> parse_encoding_decl();
107
    ErrorOr<void, ParseError> parse_standalone_document_decl();
108
    ErrorOr<void, ParseError> parse_eq();
109
    ErrorOr<void, ParseError> parse_comment();
110
    ErrorOr<void, ParseError> parse_processing_instruction();
111
    ErrorOr<Name, ParseError> parse_processing_instruction_target();
112
    ErrorOr<Name, ParseError> parse_name();
113
    ErrorOr<NonnullOwnPtr<Node>, ParseError> parse_empty_element_tag();
114
    ErrorOr<NonnullOwnPtr<Node>, ParseError> parse_start_tag();
115
    ErrorOr<Name, ParseError> parse_end_tag();
116
    ErrorOr<void, ParseError> parse_content();
117
    ErrorOr<Attribute, ParseError> parse_attribute();
118
    ErrorOr<ByteString, ParseError> parse_attribute_value();
119
    ErrorOr<Variant<EntityReference, ByteString>, ParseError> parse_reference();
120
    ErrorOr<StringView, ParseError> parse_char_data();
121
    ErrorOr<Vector<MarkupDeclaration>, ParseError> parse_internal_subset();
122
    ErrorOr<Optional<MarkupDeclaration>, ParseError> parse_markup_declaration();
123
    ErrorOr<Optional<ByteString>, ParseError> parse_declaration_separator();
124
    ErrorOr<Vector<MarkupDeclaration>, ParseError> parse_external_subset_declaration();
125
    ErrorOr<ElementDeclaration, ParseError> parse_element_declaration();
126
    ErrorOr<AttributeListDeclaration, ParseError> parse_attribute_list_declaration();
127
    ErrorOr<EntityDeclaration, ParseError> parse_entity_declaration();
128
    ErrorOr<NotationDeclaration, ParseError> parse_notation_declaration();
129
    ErrorOr<Name, ParseError> parse_parameter_entity_reference();
130
    ErrorOr<ElementDeclaration::ContentSpec, ParseError> parse_content_spec();
131
    ErrorOr<AttributeListDeclaration::Definition, ParseError> parse_attribute_definition();
132
    ErrorOr<StringView, ParseError> parse_nm_token();
133
    ErrorOr<EntityDeclaration, ParseError> parse_general_entity_declaration();
134
    ErrorOr<EntityDeclaration, ParseError> parse_parameter_entity_declaration();
135
    ErrorOr<PublicID, ParseError> parse_public_id();
136
    ErrorOr<SystemID, ParseError> parse_system_id();
137
    ErrorOr<ExternalID, ParseError> parse_external_id();
138
    ErrorOr<ByteString, ParseError> parse_entity_value();
139
    ErrorOr<Name, ParseError> parse_notation_data_declaration();
140
    ErrorOr<StringView, ParseError> parse_public_id_literal();
141
    ErrorOr<StringView, ParseError> parse_system_id_literal();
142
    ErrorOr<StringView, ParseError> parse_cdata_section();
143
    ErrorOr<ByteString, ParseError> parse_attribute_value_inner(StringView disallow);
144
    ErrorOr<void, ParseError> parse_text_declaration();
145
146
    ErrorOr<void, ParseError> expect(StringView);
147
    template<typename Pred>
148
    requires(IsCallableWithArguments<Pred, bool, char>) ErrorOr<StringView, ParseError> expect(Pred, StringView description);
149
    template<typename Pred>
150
    requires(IsCallableWithArguments<Pred, bool, char>) ErrorOr<StringView, ParseError> expect_many(Pred, StringView description, bool allow_empty = false);
151
152
    static size_t s_debug_indent_level;
153
    [[nodiscard]] auto rollback_point(SourceLocation location = SourceLocation::current())
154
256M
    {
155
256M
        return ArmedScopeGuard {
156
256M
            [this, position = m_lexer.tell(), location] {
157
67.5M
                m_lexer.retreat(m_lexer.tell() - position);
158
67.5M
                (void)location;
159
67.5M
                dbgln_if(XML_PARSER_DEBUG, "{:->{}}FAIL @ {} -- \x1b[31m{}\x1b[0m", " ", s_debug_indent_level * 2, location, m_lexer.remaining().substring_view(0, min(16, m_lexer.tell_remaining())).replace("\n"sv, "\\n"sv, ReplaceMode::All));
160
67.5M
            }
161
256M
        };
162
256M
    }
163
164
    [[nodiscard]] auto accept_rule()
165
77.0M
    {
166
77.0M
        return TemporaryChange { m_current_rule.accept, true };
167
77.0M
    }
168
    [[nodiscard]] auto enter_rule(SourceLocation location = SourceLocation::current())
169
177M
    {
170
177M
        dbgln_if(XML_PARSER_DEBUG, "{:->{}}Enter {}", " ", s_debug_indent_level * 2, location);
171
177M
        ++s_debug_indent_level;
172
177M
        auto rule = m_current_rule;
173
177M
        m_current_rule = { location.function_name(), false };
174
177M
        return ScopeGuard {
175
177M
            [location, rule, this] {
176
177M
                m_current_rule = rule;
177
177M
                --s_debug_indent_level;
178
177M
                (void)location;
179
177M
                dbgln_if(XML_PARSER_DEBUG, "{:->{}}Leave {}", " ", s_debug_indent_level * 2, location);
180
177M
            }
181
177M
        };
182
177M
    }
183
184
    template<typename... Ts>
185
    ParseError parse_error(Ts&&... args)
186
35.7M
    {
187
35.7M
        auto error = ParseError { forward<Ts>(args)... };
188
35.7M
        if (m_current_rule.accept) {
189
2.20M
            auto rule_name = m_current_rule.rule.value_or("<?>"sv);
190
2.20M
            if (rule_name.starts_with("parse_"sv))
191
207k
                rule_name = rule_name.substring_view(6);
192
193
2.20M
            auto error_string = error.error.visit(
194
2.20M
                [](ByteString const& error) -> ByteString { return error; },
Unexecuted instantiation: XML::Parser::parse_error<AK::LineTrackingLexer::Position, XML::Expectation>(AK::LineTrackingLexer::Position&&, XML::Expectation&&)::{lambda(AK::ByteString const&)#1}::operator()(AK::ByteString const&) const
XML::Parser::parse_error<AK::LineTrackingLexer::Position, AK::ByteString>(AK::LineTrackingLexer::Position&&, AK::ByteString&&)::{lambda(AK::ByteString const&)#1}::operator()(AK::ByteString const&) const
Line
Count
Source
194
1.70M
                [](ByteString const& error) -> ByteString { return error; },
195
2.20M
                [](XML::Expectation const& expectation) -> ByteString { return ByteString::formatted("Expected {}", expectation.expected); });
XML::Parser::parse_error<AK::LineTrackingLexer::Position, XML::Expectation>(AK::LineTrackingLexer::Position&&, XML::Expectation&&)::{lambda(XML::Expectation const&)#1}::operator()(XML::Expectation const&) const
Line
Count
Source
195
504k
                [](XML::Expectation const& expectation) -> ByteString { return ByteString::formatted("Expected {}", expectation.expected); });
Unexecuted instantiation: XML::Parser::parse_error<AK::LineTrackingLexer::Position, AK::ByteString>(AK::LineTrackingLexer::Position&&, AK::ByteString&&)::{lambda(XML::Expectation const&)#1}::operator()(XML::Expectation const&) const
196
2.20M
            m_parse_errors.append({
197
2.20M
                error.position,
198
2.20M
                ByteString::formatted("{}: {}", rule_name, error_string),
199
2.20M
            });
200
2.20M
        }
201
35.7M
        return error;
202
35.7M
    }
XML::ParseError XML::Parser::parse_error<AK::LineTrackingLexer::Position, XML::Expectation>(AK::LineTrackingLexer::Position&&, XML::Expectation&&)
Line
Count
Source
186
11.7M
    {
187
11.7M
        auto error = ParseError { forward<Ts>(args)... };
188
11.7M
        if (m_current_rule.accept) {
189
504k
            auto rule_name = m_current_rule.rule.value_or("<?>"sv);
190
504k
            if (rule_name.starts_with("parse_"sv))
191
1.65k
                rule_name = rule_name.substring_view(6);
192
193
504k
            auto error_string = error.error.visit(
194
504k
                [](ByteString const& error) -> ByteString { return error; },
195
504k
                [](XML::Expectation const& expectation) -> ByteString { return ByteString::formatted("Expected {}", expectation.expected); });
196
504k
            m_parse_errors.append({
197
504k
                error.position,
198
504k
                ByteString::formatted("{}: {}", rule_name, error_string),
199
504k
            });
200
504k
        }
201
11.7M
        return error;
202
11.7M
    }
XML::ParseError XML::Parser::parse_error<AK::LineTrackingLexer::Position, AK::ByteString>(AK::LineTrackingLexer::Position&&, AK::ByteString&&)
Line
Count
Source
186
24.0M
    {
187
24.0M
        auto error = ParseError { forward<Ts>(args)... };
188
24.0M
        if (m_current_rule.accept) {
189
1.70M
            auto rule_name = m_current_rule.rule.value_or("<?>"sv);
190
1.70M
            if (rule_name.starts_with("parse_"sv))
191
205k
                rule_name = rule_name.substring_view(6);
192
193
1.70M
            auto error_string = error.error.visit(
194
1.70M
                [](ByteString const& error) -> ByteString { return error; },
195
1.70M
                [](XML::Expectation const& expectation) -> ByteString { return ByteString::formatted("Expected {}", expectation.expected); });
196
1.70M
            m_parse_errors.append({
197
1.70M
                error.position,
198
1.70M
                ByteString::formatted("{}: {}", rule_name, error_string),
199
1.70M
            });
200
1.70M
        }
201
24.0M
        return error;
202
24.0M
    }
203
204
    StringView m_source;
205
    LineTrackingLexer m_lexer;
206
    Options m_options;
207
    Listener* m_listener { nullptr };
208
209
    OwnPtr<Node> m_root_node;
210
    Node* m_entered_node { nullptr };
211
    Version m_version { Version::Version11 };
212
    bool m_in_compatibility_mode { false };
213
    ByteString m_encoding;
214
    bool m_standalone { false };
215
    HashMap<Name, ByteString> m_processing_instructions;
216
    struct AcceptedRule {
217
        Optional<StringView> rule {};
218
        bool accept { false };
219
    } m_current_rule {};
220
221
    Vector<ParseError> m_parse_errors;
222
223
    Optional<Doctype> m_doctype;
224
};
225
}
226
227
template<>
228
struct AK::Formatter<XML::ParseError> : public AK::Formatter<FormatString> {
229
    ErrorOr<void> format(FormatBuilder& builder, XML::ParseError const& error)
230
0
    {
231
0
        auto error_string = error.error.visit(
232
0
            [](ByteString const& error) -> ByteString { return error; },
233
0
            [](XML::Expectation const& expectation) -> ByteString { return ByteString::formatted("Expected {}", expectation.expected); });
234
0
        return Formatter<FormatString>::format(builder, "{} at line: {}, col: {} (offset {})"sv, error_string, error.position.line, error.position.column, error.position.offset);
235
0
    }
236
};