Coverage Report

Created: 2025-05-08 06:13

/src/html5ever/rcdom/lib.rs
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2014-2017 The html5ever Project Developers. See the
2
// COPYRIGHT file at the top-level directory of this distribution.
3
//
4
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
// option. This file may not be copied, modified, or distributed
8
// except according to those terms.
9
10
//! A simple reference-counted DOM.
11
//!
12
//! This is sufficient as a static parse tree, but don't build a
13
//! web browser using it. :)
14
//!
15
//! A DOM is a [tree structure] with ordered children that can be represented in an XML-like
16
//! format. For example, the following graph
17
//!
18
//! ```text
19
//! div
20
//!  +- "text node"
21
//!  +- span
22
//! ```
23
//! in HTML would be serialized as
24
//!
25
//! ```html
26
//! <div>text node<span></span></div>
27
//! ```
28
//!
29
//! See the [document object model article on wikipedia][dom wiki] for more information.
30
//!
31
//! This implementation stores the information associated with each node once, and then hands out
32
//! refs to children. The nodes themselves are reference-counted to avoid copying - you can create
33
//! a new ref and then a node will outlive the document. Nodes own their children, but only have
34
//! weak references to their parents.
35
//!
36
//! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure)
37
//! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model
38
39
extern crate markup5ever;
40
extern crate tendril;
41
42
use std::borrow::Cow;
43
use std::cell::{Cell, RefCell};
44
use std::collections::{HashSet, VecDeque};
45
use std::default::Default;
46
use std::fmt;
47
use std::io;
48
use std::mem;
49
use std::rc::{Rc, Weak};
50
51
use tendril::StrTendril;
52
53
use markup5ever::interface::tree_builder;
54
use markup5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
55
use markup5ever::serialize::TraversalScope;
56
use markup5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
57
use markup5ever::serialize::{Serialize, Serializer};
58
use markup5ever::Attribute;
59
use markup5ever::ExpandedName;
60
use markup5ever::QualName;
61
62
/// The different kinds of nodes in the DOM.
63
#[derive(Debug)]
64
pub enum NodeData {
65
    /// The `Document` itself - the root node of a HTML document.
66
    Document,
67
68
    /// A `DOCTYPE` with name, public id, and system id. See
69
    /// [document type declaration on wikipedia][dtd wiki].
70
    ///
71
    /// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration
72
    Doctype {
73
        name: StrTendril,
74
        public_id: StrTendril,
75
        system_id: StrTendril,
76
    },
77
78
    /// A text node.
79
    Text { contents: RefCell<StrTendril> },
80
81
    /// A comment.
82
    Comment { contents: StrTendril },
83
84
    /// An element with attributes.
85
    Element {
86
        name: QualName,
87
        attrs: RefCell<Vec<Attribute>>,
88
89
        /// For HTML \<template\> elements, the [template contents].
90
        ///
91
        /// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents
92
        template_contents: RefCell<Option<Handle>>,
93
94
        /// Whether the node is a [HTML integration point].
95
        ///
96
        /// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point
97
        mathml_annotation_xml_integration_point: bool,
98
    },
99
100
    /// A Processing instruction.
101
    ProcessingInstruction {
102
        target: StrTendril,
103
        contents: StrTendril,
104
    },
105
}
106
107
/// A DOM node.
108
pub struct Node {
109
    /// Parent node.
110
    pub parent: Cell<Option<WeakHandle>>,
111
    /// Child nodes of this node.
112
    pub children: RefCell<Vec<Handle>>,
113
    /// Represents this node's data.
114
    pub data: NodeData,
115
}
116
117
impl Node {
118
    /// Create a new node from its contents
119
6.72M
    pub fn new(data: NodeData) -> Rc<Self> {
120
6.72M
        Rc::new(Node {
121
6.72M
            data,
122
6.72M
            parent: Cell::new(None),
123
6.72M
            children: RefCell::new(Vec::new()),
124
6.72M
        })
125
6.72M
    }
126
}
127
128
impl Drop for Node {
129
6.72M
    fn drop(&mut self) {
130
6.72M
        let mut nodes = mem::take(&mut *self.children.borrow_mut());
131
13.4M
        while let Some(node) = nodes.pop() {
132
6.71M
            let children = mem::take(&mut *node.children.borrow_mut());
133
6.71M
            nodes.extend(children.into_iter());
134
            if let NodeData::Element {
135
1.88M
                ref template_contents,
136
                ..
137
6.71M
            } = node.data
138
            {
139
1.88M
                if let Some(template_contents) = template_contents.borrow_mut().take() {
140
714
                    nodes.push(template_contents);
141
1.88M
                }
142
4.82M
            }
143
        }
144
6.72M
    }
145
}
146
147
impl fmt::Debug for Node {
148
0
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
149
0
        fmt.debug_struct("Node")
150
0
            .field("data", &self.data)
151
0
            .field("children", &self.children)
152
0
            .finish()
153
0
    }
154
}
155
156
/// Reference to a DOM node.
157
pub type Handle = Rc<Node>;
158
159
/// Weak reference to a DOM node, used for parent pointers.
160
pub type WeakHandle = Weak<Node>;
161
162
/// Append a parentless node to another nodes' children
163
6.71M
fn append(new_parent: &Handle, child: Handle) {
164
6.71M
    let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent)));
165
6.71M
    // Invariant: child cannot have existing parent
166
6.71M
    assert!(previous_parent.is_none());
167
6.71M
    new_parent.children.borrow_mut().push(child);
168
6.71M
}
169
170
/// If the node has a parent, get it and this node's position in its children
171
0
fn get_parent_and_index(target: &Handle) -> Option<(Handle, usize)> {
172
0
    if let Some(weak) = target.parent.take() {
173
0
        let parent = weak.upgrade().expect("dangling weak pointer");
174
0
        target.parent.set(Some(weak));
175
0
        let i = match parent
176
0
            .children
177
0
            .borrow()
178
0
            .iter()
179
0
            .enumerate()
180
0
            .find(|&(_, child)| Rc::ptr_eq(child, target))
181
        {
182
0
            Some((i, _)) => i,
183
0
            None => panic!("have parent but couldn't find in parent's children!"),
184
        };
185
0
        Some((parent, i))
186
    } else {
187
0
        None
188
    }
189
0
}
190
191
5.53M
fn append_to_existing_text(prev: &Handle, text: &str) -> bool {
192
5.53M
    match prev.data {
193
3.46M
        NodeData::Text { ref contents } => {
194
3.46M
            contents.borrow_mut().push_slice(text);
195
3.46M
            true
196
        },
197
2.07M
        _ => false,
198
    }
199
5.53M
}
200
201
0
fn remove_from_parent(target: &Handle) {
202
0
    if let Some((parent, i)) = get_parent_and_index(target) {
203
0
        parent.children.borrow_mut().remove(i);
204
0
        target.parent.set(None);
205
0
    }
206
0
}
207
208
/// The DOM itself; the result of parsing.
209
pub struct RcDom {
210
    /// The `Document` itself.
211
    pub document: Handle,
212
213
    /// Errors that occurred during parsing.
214
    pub errors: RefCell<Vec<Cow<'static, str>>>,
215
216
    /// The document's quirks mode.
217
    pub quirks_mode: Cell<QuirksMode>,
218
}
219
220
impl TreeSink for RcDom {
221
    type Output = Self;
222
12.1k
    fn finish(self) -> Self {
223
12.1k
        self
224
12.1k
    }
225
226
    type Handle = Handle;
227
228
    type ElemName<'a>
229
        = ExpandedName<'a>
230
    where
231
        Self: 'a;
232
233
44.6M
    fn parse_error(&self, msg: Cow<'static, str>) {
234
44.6M
        self.errors.borrow_mut().push(msg);
235
44.6M
    }
236
237
12.1k
    fn get_document(&self) -> Handle {
238
12.1k
        self.document.clone()
239
12.1k
    }
240
241
0
    fn get_template_contents(&self, target: &Handle) -> Handle {
242
        if let NodeData::Element {
243
0
            ref template_contents,
244
            ..
245
0
        } = target.data
246
        {
247
0
            template_contents
248
0
                .borrow()
249
0
                .as_ref()
250
0
                .expect("not a template element!")
251
0
                .clone()
252
        } else {
253
0
            panic!("not a template element!")
254
        }
255
0
    }
256
257
0
    fn set_quirks_mode(&self, mode: QuirksMode) {
258
0
        self.quirks_mode.set(mode);
259
0
    }
260
261
0
    fn same_node(&self, x: &Handle, y: &Handle) -> bool {
262
0
        Rc::ptr_eq(x, y)
263
0
    }
264
265
622M
    fn elem_name<'a>(&self, target: &'a Handle) -> ExpandedName<'a> {
266
622M
        match target.data {
267
622M
            NodeData::Element { ref name, .. } => name.expanded(),
268
0
            _ => panic!("not an element!"),
269
        }
270
622M
    }
271
272
1.88M
    fn create_element(&self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags) -> Handle {
273
1.88M
        Node::new(NodeData::Element {
274
1.88M
            name,
275
1.88M
            attrs: RefCell::new(attrs),
276
1.88M
            template_contents: RefCell::new(if flags.template {
277
714
                Some(Node::new(NodeData::Document))
278
            } else {
279
1.88M
                None
280
            }),
281
1.88M
            mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
282
1.88M
        })
283
1.88M
    }
284
285
2.52M
    fn create_comment(&self, text: StrTendril) -> Handle {
286
2.52M
        Node::new(NodeData::Comment { contents: text })
287
2.52M
    }
288
289
16.2k
    fn create_pi(&self, target: StrTendril, data: StrTendril) -> Handle {
290
16.2k
        Node::new(NodeData::ProcessingInstruction {
291
16.2k
            target,
292
16.2k
            contents: data,
293
16.2k
        })
294
16.2k
    }
295
296
10.1M
    fn append(&self, parent: &Handle, child: NodeOrText<Handle>) {
297
        // Append to an existing Text node if we have one.
298
10.1M
        if let NodeOrText::AppendText(text) = &child {
299
5.68M
            if let Some(h) = parent.children.borrow().last() {
300
5.53M
                if append_to_existing_text(h, text) {
301
3.46M
                    return;
302
2.07M
                }
303
144k
            }
304
4.42M
        }
305
306
        append(
307
6.64M
            parent,
308
6.64M
            match child {
309
2.21M
                NodeOrText::AppendText(text) => Node::new(NodeData::Text {
310
2.21M
                    contents: RefCell::new(text),
311
2.21M
                }),
312
4.42M
                NodeOrText::AppendNode(node) => node,
313
            },
314
        );
315
10.1M
    }
316
317
0
    fn append_before_sibling(&self, sibling: &Handle, child: NodeOrText<Handle>) {
318
0
        let (parent, i) = get_parent_and_index(sibling)
319
0
            .expect("append_before_sibling called on node without parent");
320
321
0
        let child = match (child, i) {
322
            // No previous node.
323
0
            (NodeOrText::AppendText(text), 0) => Node::new(NodeData::Text {
324
0
                contents: RefCell::new(text),
325
0
            }),
326
327
            // Look for a text node before the insertion point.
328
0
            (NodeOrText::AppendText(text), i) => {
329
0
                let children = parent.children.borrow();
330
0
                let prev = &children[i - 1];
331
0
                if append_to_existing_text(prev, &text) {
332
0
                    return;
333
0
                }
334
0
                Node::new(NodeData::Text {
335
0
                    contents: RefCell::new(text),
336
0
                })
337
            },
338
339
            // The tree builder promises we won't have a text node after
340
            // the insertion point.
341
342
            // Any other kind of node.
343
0
            (NodeOrText::AppendNode(node), _) => node,
344
        };
345
346
0
        remove_from_parent(&child);
347
0
348
0
        child.parent.set(Some(Rc::downgrade(&parent)));
349
0
        parent.children.borrow_mut().insert(i, child);
350
0
    }
351
352
0
    fn append_based_on_parent_node(
353
0
        &self,
354
0
        element: &Self::Handle,
355
0
        prev_element: &Self::Handle,
356
0
        child: NodeOrText<Self::Handle>,
357
0
    ) {
358
0
        let parent = element.parent.take();
359
0
        let has_parent = parent.is_some();
360
0
        element.parent.set(parent);
361
0
362
0
        if has_parent {
363
0
            self.append_before_sibling(element, child);
364
0
        } else {
365
0
            self.append(prev_element, child);
366
0
        }
367
0
    }
368
369
65.5k
    fn append_doctype_to_document(
370
65.5k
        &self,
371
65.5k
        name: StrTendril,
372
65.5k
        public_id: StrTendril,
373
65.5k
        system_id: StrTendril,
374
65.5k
    ) {
375
65.5k
        append(
376
65.5k
            &self.document,
377
65.5k
            Node::new(NodeData::Doctype {
378
65.5k
                name,
379
65.5k
                public_id,
380
65.5k
                system_id,
381
65.5k
            }),
382
65.5k
        );
383
65.5k
    }
384
385
0
    fn add_attrs_if_missing(&self, target: &Handle, attrs: Vec<Attribute>) {
386
0
        let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
387
0
            attrs.borrow_mut()
388
        } else {
389
0
            panic!("not an element")
390
        };
391
392
0
        let existing_names = existing
393
0
            .iter()
394
0
            .map(|e| e.name.clone())
395
0
            .collect::<HashSet<_>>();
396
0
        existing.extend(
397
0
            attrs
398
0
                .into_iter()
399
0
                .filter(|attr| !existing_names.contains(&attr.name)),
400
0
        );
401
0
    }
402
403
0
    fn remove_from_parent(&self, target: &Handle) {
404
0
        remove_from_parent(target);
405
0
    }
406
407
0
    fn reparent_children(&self, node: &Handle, new_parent: &Handle) {
408
0
        let mut children = node.children.borrow_mut();
409
0
        let mut new_children = new_parent.children.borrow_mut();
410
0
        for child in children.iter() {
411
0
            let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent)));
412
0
            assert!(Rc::ptr_eq(
413
0
                node,
414
0
                &previous_parent.unwrap().upgrade().expect("dangling weak")
415
0
            ))
416
        }
417
0
        new_children.extend(mem::take(&mut *children));
418
0
    }
419
420
0
    fn is_mathml_annotation_xml_integration_point(&self, target: &Handle) -> bool {
421
        if let NodeData::Element {
422
0
            mathml_annotation_xml_integration_point,
423
            ..
424
0
        } = target.data
425
        {
426
0
            mathml_annotation_xml_integration_point
427
        } else {
428
0
            panic!("not an element!")
429
        }
430
0
    }
431
}
432
433
impl Default for RcDom {
434
12.1k
    fn default() -> RcDom {
435
12.1k
        RcDom {
436
12.1k
            document: Node::new(NodeData::Document),
437
12.1k
            errors: Default::default(),
438
12.1k
            quirks_mode: Cell::new(tree_builder::NoQuirks),
439
12.1k
        }
440
12.1k
    }
441
}
442
443
enum SerializeOp {
444
    Open(Handle),
445
    Close(QualName),
446
}
447
448
pub struct SerializableHandle(Handle);
449
450
impl From<Handle> for SerializableHandle {
451
12.1k
    fn from(h: Handle) -> SerializableHandle {
452
12.1k
        SerializableHandle(h)
453
12.1k
    }
454
}
455
456
impl Serialize for SerializableHandle {
457
12.1k
    fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
458
12.1k
    where
459
12.1k
        S: Serializer,
460
12.1k
    {
461
12.1k
        let mut ops = VecDeque::new();
462
12.1k
        match traversal_scope {
463
0
            IncludeNode => ops.push_back(SerializeOp::Open(self.0.clone())),
464
12.1k
            ChildrenOnly(_) => ops.extend(
465
12.1k
                self.0
466
12.1k
                    .children
467
12.1k
                    .borrow()
468
12.1k
                    .iter()
469
523k
                    .map(|h| SerializeOp::Open(h.clone())),
<markup5ever_rcdom::SerializableHandle as markup5ever::serialize::Serialize>::serialize::<xml5ever::serialize::XmlSerializer<&mut std::io::util::Sink>>::{closure#0}
Line
Count
Source
469
523k
                    .map(|h| SerializeOp::Open(h.clone())),
Unexecuted instantiation: <markup5ever_rcdom::SerializableHandle as markup5ever::serialize::Serialize>::serialize::<_>::{closure#0}
470
12.1k
            ),
471
        }
472
473
8.60M
        while let Some(op) = ops.pop_front() {
474
8.59M
            match op {
475
6.71M
                SerializeOp::Open(handle) => match handle.data {
476
                    NodeData::Element {
477
1.88M
                        ref name,
478
1.88M
                        ref attrs,
479
1.88M
                        ..
480
1.88M
                    } => {
481
1.88M
                        serializer.start_elem(
482
1.88M
                            name.clone(),
483
1.88M
                            attrs.borrow().iter().map(|at| (&at.name, &at.value[..])),
<markup5ever_rcdom::SerializableHandle as markup5ever::serialize::Serialize>::serialize::<xml5ever::serialize::XmlSerializer<&mut std::io::util::Sink>>::{closure#1}
Line
Count
Source
483
1.31M
                            attrs.borrow().iter().map(|at| (&at.name, &at.value[..])),
Unexecuted instantiation: <markup5ever_rcdom::SerializableHandle as markup5ever::serialize::Serialize>::serialize::<_>::{closure#1}
484
1.88M
                        )?;
485
486
1.88M
                        ops.reserve(1 + handle.children.borrow().len());
487
1.88M
                        ops.push_front(SerializeOp::Close(name.clone()));
488
489
6.18M
                        for child in handle.children.borrow().iter().rev() {
490
6.18M
                            ops.push_front(SerializeOp::Open(child.clone()));
491
6.18M
                        }
492
                    },
493
494
65.5k
                    NodeData::Doctype { ref name, .. } => serializer.write_doctype(name)?,
495
496
2.21M
                    NodeData::Text { ref contents } => serializer.write_text(&contents.borrow())?,
497
498
2.52M
                    NodeData::Comment { ref contents } => serializer.write_comment(contents)?,
499
500
                    NodeData::ProcessingInstruction {
501
16.2k
                        ref target,
502
16.2k
                        ref contents,
503
16.2k
                    } => serializer.write_processing_instruction(target, contents)?,
504
505
0
                    NodeData::Document => panic!("Can't serialize Document node itself"),
506
                },
507
508
1.88M
                SerializeOp::Close(name) => {
509
1.88M
                    serializer.end_elem(name)?;
510
                },
511
            }
512
        }
513
514
12.1k
        Ok(())
515
12.1k
    }
<markup5ever_rcdom::SerializableHandle as markup5ever::serialize::Serialize>::serialize::<xml5ever::serialize::XmlSerializer<&mut std::io::util::Sink>>
Line
Count
Source
457
12.1k
    fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
458
12.1k
    where
459
12.1k
        S: Serializer,
460
12.1k
    {
461
12.1k
        let mut ops = VecDeque::new();
462
12.1k
        match traversal_scope {
463
0
            IncludeNode => ops.push_back(SerializeOp::Open(self.0.clone())),
464
12.1k
            ChildrenOnly(_) => ops.extend(
465
12.1k
                self.0
466
12.1k
                    .children
467
12.1k
                    .borrow()
468
12.1k
                    .iter()
469
12.1k
                    .map(|h| SerializeOp::Open(h.clone())),
470
12.1k
            ),
471
        }
472
473
8.60M
        while let Some(op) = ops.pop_front() {
474
8.59M
            match op {
475
6.71M
                SerializeOp::Open(handle) => match handle.data {
476
                    NodeData::Element {
477
1.88M
                        ref name,
478
1.88M
                        ref attrs,
479
1.88M
                        ..
480
1.88M
                    } => {
481
1.88M
                        serializer.start_elem(
482
1.88M
                            name.clone(),
483
1.88M
                            attrs.borrow().iter().map(|at| (&at.name, &at.value[..])),
484
1.88M
                        )?;
485
486
1.88M
                        ops.reserve(1 + handle.children.borrow().len());
487
1.88M
                        ops.push_front(SerializeOp::Close(name.clone()));
488
489
6.18M
                        for child in handle.children.borrow().iter().rev() {
490
6.18M
                            ops.push_front(SerializeOp::Open(child.clone()));
491
6.18M
                        }
492
                    },
493
494
65.5k
                    NodeData::Doctype { ref name, .. } => serializer.write_doctype(name)?,
495
496
2.21M
                    NodeData::Text { ref contents } => serializer.write_text(&contents.borrow())?,
497
498
2.52M
                    NodeData::Comment { ref contents } => serializer.write_comment(contents)?,
499
500
                    NodeData::ProcessingInstruction {
501
16.2k
                        ref target,
502
16.2k
                        ref contents,
503
16.2k
                    } => serializer.write_processing_instruction(target, contents)?,
504
505
0
                    NodeData::Document => panic!("Can't serialize Document node itself"),
506
                },
507
508
1.88M
                SerializeOp::Close(name) => {
509
1.88M
                    serializer.end_elem(name)?;
510
                },
511
            }
512
        }
513
514
12.1k
        Ok(())
515
12.1k
    }
Unexecuted instantiation: <markup5ever_rcdom::SerializableHandle as markup5ever::serialize::Serialize>::serialize::<_>
516
}