/src/serenity/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2020-2022, Andreas Kling <kling@serenityos.org> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #pragma once |
8 | | |
9 | | #include <LibGfx/Color.h> |
10 | | #include <LibJS/Heap/Cell.h> |
11 | | #include <LibWeb/DOM/Node.h> |
12 | | #include <LibWeb/HTML/Parser/HTMLTokenizer.h> |
13 | | #include <LibWeb/HTML/Parser/ListOfActiveFormattingElements.h> |
14 | | #include <LibWeb/HTML/Parser/StackOfOpenElements.h> |
15 | | #include <LibWeb/MimeSniff/MimeType.h> |
16 | | |
17 | | namespace Web::HTML { |
18 | | |
19 | | #define ENUMERATE_INSERTION_MODES \ |
20 | 0 | __ENUMERATE_INSERTION_MODE(Initial) \ |
21 | 0 | __ENUMERATE_INSERTION_MODE(BeforeHTML) \ |
22 | 0 | __ENUMERATE_INSERTION_MODE(BeforeHead) \ |
23 | 0 | __ENUMERATE_INSERTION_MODE(InHead) \ |
24 | 0 | __ENUMERATE_INSERTION_MODE(InHeadNoscript) \ |
25 | 0 | __ENUMERATE_INSERTION_MODE(AfterHead) \ |
26 | 0 | __ENUMERATE_INSERTION_MODE(InBody) \ |
27 | 0 | __ENUMERATE_INSERTION_MODE(Text) \ |
28 | 0 | __ENUMERATE_INSERTION_MODE(InTable) \ |
29 | 0 | __ENUMERATE_INSERTION_MODE(InTableText) \ |
30 | 0 | __ENUMERATE_INSERTION_MODE(InCaption) \ |
31 | 0 | __ENUMERATE_INSERTION_MODE(InColumnGroup) \ |
32 | 0 | __ENUMERATE_INSERTION_MODE(InTableBody) \ |
33 | 0 | __ENUMERATE_INSERTION_MODE(InRow) \ |
34 | 0 | __ENUMERATE_INSERTION_MODE(InCell) \ |
35 | 0 | __ENUMERATE_INSERTION_MODE(InSelect) \ |
36 | 0 | __ENUMERATE_INSERTION_MODE(InSelectInTable) \ |
37 | 0 | __ENUMERATE_INSERTION_MODE(InTemplate) \ |
38 | 0 | __ENUMERATE_INSERTION_MODE(AfterBody) \ |
39 | 0 | __ENUMERATE_INSERTION_MODE(InFrameset) \ |
40 | 0 | __ENUMERATE_INSERTION_MODE(AfterFrameset) \ |
41 | 0 | __ENUMERATE_INSERTION_MODE(AfterAfterBody) \ |
42 | 0 | __ENUMERATE_INSERTION_MODE(AfterAfterFrameset) |
43 | | |
44 | | class HTMLParser final : public JS::Cell { |
45 | | JS_CELL(HTMLParser, JS::Cell); |
46 | | JS_DECLARE_ALLOCATOR(HTMLParser); |
47 | | |
48 | | friend class HTMLTokenizer; |
49 | | |
50 | | public: |
51 | | ~HTMLParser(); |
52 | | |
53 | | static JS::NonnullGCPtr<HTMLParser> create_for_scripting(DOM::Document&); |
54 | | static JS::NonnullGCPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, ByteBuffer const& input, Optional<MimeSniff::MimeType> maybe_mime_type = {}); |
55 | | static JS::NonnullGCPtr<HTMLParser> create(DOM::Document&, StringView input, StringView encoding); |
56 | | |
57 | | void run(HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No); |
58 | | void run(const URL::URL&, HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No); |
59 | | |
60 | | static void the_end(JS::NonnullGCPtr<DOM::Document>, JS::GCPtr<HTMLParser> = nullptr); |
61 | | |
62 | | DOM::Document& document(); |
63 | | enum class AllowDeclarativeShadowRoots { |
64 | | No, |
65 | | Yes, |
66 | | }; |
67 | | static Vector<JS::Handle<DOM::Node>> parse_html_fragment(DOM::Element& context_element, StringView, AllowDeclarativeShadowRoots = AllowDeclarativeShadowRoots::No); |
68 | | enum class SerializableShadowRoots { |
69 | | No, |
70 | | Yes, |
71 | | }; |
72 | | static String serialize_html_fragment(DOM::Node const&, SerializableShadowRoots, Vector<JS::Handle<DOM::ShadowRoot>> const&, DOM::FragmentSerializationMode = DOM::FragmentSerializationMode::Inner); |
73 | | |
74 | | enum class InsertionMode { |
75 | | #define __ENUMERATE_INSERTION_MODE(mode) mode, |
76 | | ENUMERATE_INSERTION_MODES |
77 | | #undef __ENUMERATE_INSERTION_MODE |
78 | | }; |
79 | | |
80 | 0 | InsertionMode insertion_mode() const { return m_insertion_mode; } |
81 | | |
82 | | static bool is_special_tag(FlyString const& tag_name, Optional<FlyString> const& namespace_); |
83 | | |
84 | 0 | HTMLTokenizer& tokenizer() { return m_tokenizer; } |
85 | | |
86 | | // https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser |
87 | | void abort(); |
88 | | |
89 | 0 | bool aborted() const { return m_aborted; } |
90 | 0 | bool stopped() const { return m_stop_parsing; } |
91 | | |
92 | 0 | size_t script_nesting_level() const { return m_script_nesting_level; } |
93 | | |
94 | | private: |
95 | | HTMLParser(DOM::Document&, StringView input, StringView encoding); |
96 | | HTMLParser(DOM::Document&); |
97 | | |
98 | | virtual void visit_edges(Cell::Visitor&) override; |
99 | | |
100 | | char const* insertion_mode_name() const; |
101 | | |
102 | | DOM::QuirksMode which_quirks_mode(HTMLToken const&) const; |
103 | | |
104 | | void handle_initial(HTMLToken&); |
105 | | void handle_before_html(HTMLToken&); |
106 | | void handle_before_head(HTMLToken&); |
107 | | void handle_in_head(HTMLToken&); |
108 | | void handle_in_head_noscript(HTMLToken&); |
109 | | void handle_after_head(HTMLToken&); |
110 | | void handle_in_body(HTMLToken&); |
111 | | void handle_after_body(HTMLToken&); |
112 | | void handle_after_after_body(HTMLToken&); |
113 | | void handle_text(HTMLToken&); |
114 | | void handle_in_table(HTMLToken&); |
115 | | void handle_in_table_body(HTMLToken&); |
116 | | void handle_in_row(HTMLToken&); |
117 | | void handle_in_cell(HTMLToken&); |
118 | | void handle_in_table_text(HTMLToken&); |
119 | | void handle_in_select_in_table(HTMLToken&); |
120 | | void handle_in_select(HTMLToken&); |
121 | | void handle_in_caption(HTMLToken&); |
122 | | void handle_in_column_group(HTMLToken&); |
123 | | void handle_in_template(HTMLToken&); |
124 | | void handle_in_frameset(HTMLToken&); |
125 | | void handle_after_frameset(HTMLToken&); |
126 | | void handle_after_after_frameset(HTMLToken&); |
127 | | |
128 | 0 | void stop_parsing() { m_stop_parsing = true; } |
129 | | |
130 | | void generate_implied_end_tags(FlyString const& exception = {}); |
131 | | void generate_all_implied_end_tags_thoroughly(); |
132 | | JS::NonnullGCPtr<DOM::Element> create_element_for(HTMLToken const&, Optional<FlyString> const& namespace_, DOM::Node& intended_parent); |
133 | | |
134 | | struct AdjustedInsertionLocation { |
135 | | JS::GCPtr<DOM::Node> parent; |
136 | | JS::GCPtr<DOM::Node> insert_before_sibling; |
137 | | }; |
138 | | |
139 | | AdjustedInsertionLocation find_appropriate_place_for_inserting_node(JS::GCPtr<DOM::Element> override_target = nullptr); |
140 | | |
141 | | void insert_an_element_at_the_adjusted_insertion_location(JS::NonnullGCPtr<DOM::Element>); |
142 | | |
143 | | DOM::Text* find_character_insertion_node(); |
144 | | void flush_character_insertions(); |
145 | | enum class OnlyAddToElementStack { |
146 | | No, |
147 | | Yes, |
148 | | }; |
149 | | JS::NonnullGCPtr<DOM::Element> insert_foreign_element(HTMLToken const&, Optional<FlyString> const& namespace_, OnlyAddToElementStack); |
150 | | JS::NonnullGCPtr<DOM::Element> insert_html_element(HTMLToken const&); |
151 | | DOM::Element& current_node(); |
152 | | DOM::Element& adjusted_current_node(); |
153 | | DOM::Element& node_before_current_node(); |
154 | | void insert_character(u32 data); |
155 | | void insert_comment(HTMLToken&); |
156 | | void reconstruct_the_active_formatting_elements(); |
157 | | void close_a_p_element(); |
158 | | void process_using_the_rules_for(InsertionMode, HTMLToken&); |
159 | | void process_using_the_rules_for_foreign_content(HTMLToken&); |
160 | | void parse_generic_raw_text_element(HTMLToken&); |
161 | | void increment_script_nesting_level(); |
162 | | void decrement_script_nesting_level(); |
163 | | void reset_the_insertion_mode_appropriately(); |
164 | | |
165 | | void adjust_mathml_attributes(HTMLToken&); |
166 | | void adjust_svg_tag_names(HTMLToken&); |
167 | | void adjust_svg_attributes(HTMLToken&); |
168 | | static void adjust_foreign_attributes(HTMLToken&); |
169 | | |
170 | | enum AdoptionAgencyAlgorithmOutcome { |
171 | | DoNothing, |
172 | | RunAnyOtherEndTagSteps, |
173 | | }; |
174 | | |
175 | | AdoptionAgencyAlgorithmOutcome run_the_adoption_agency_algorithm(HTMLToken&); |
176 | | void clear_the_stack_back_to_a_table_context(); |
177 | | void clear_the_stack_back_to_a_table_body_context(); |
178 | | void clear_the_stack_back_to_a_table_row_context(); |
179 | | void close_the_cell(); |
180 | | |
181 | | InsertionMode m_insertion_mode { InsertionMode::Initial }; |
182 | | InsertionMode m_original_insertion_mode { InsertionMode::Initial }; |
183 | | |
184 | | StackOfOpenElements m_stack_of_open_elements; |
185 | | Vector<InsertionMode> m_stack_of_template_insertion_modes; |
186 | | ListOfActiveFormattingElements m_list_of_active_formatting_elements; |
187 | | |
188 | | HTMLTokenizer m_tokenizer; |
189 | | |
190 | | bool m_foster_parenting { false }; |
191 | | bool m_frameset_ok { true }; |
192 | | bool m_parsing_fragment { false }; |
193 | | |
194 | | // https://html.spec.whatwg.org/multipage/parsing.html#scripting-flag |
195 | | // The scripting flag is set to "enabled" if scripting was enabled for the Document with which the parser is associated when the parser was created, and "disabled" otherwise. |
196 | | bool m_scripting_enabled { true }; |
197 | | |
198 | | bool m_invoked_via_document_write { false }; |
199 | | bool m_aborted { false }; |
200 | | bool m_parser_pause_flag { false }; |
201 | | bool m_stop_parsing { false }; |
202 | | size_t m_script_nesting_level { 0 }; |
203 | | |
204 | | JS::Realm& realm(); |
205 | | |
206 | | JS::GCPtr<DOM::Document> m_document; |
207 | | JS::GCPtr<HTMLHeadElement> m_head_element; |
208 | | JS::GCPtr<HTMLFormElement> m_form_element; |
209 | | JS::GCPtr<DOM::Element> m_context_element; |
210 | | |
211 | | Vector<HTMLToken> m_pending_table_character_tokens; |
212 | | |
213 | | JS::GCPtr<DOM::Text> m_character_insertion_node; |
214 | | StringBuilder m_character_insertion_builder; |
215 | | }; |
216 | | |
217 | | RefPtr<CSS::CSSStyleValue> parse_dimension_value(StringView); |
218 | | RefPtr<CSS::CSSStyleValue> parse_nonzero_dimension_value(StringView); |
219 | | Optional<Color> parse_legacy_color_value(StringView); |
220 | | |
221 | | } |