Coverage Report

Created: 2025-12-31 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/html5ever/rcdom/lib.rs
Line
Count
Source
1
// Copyright 2014-2017 The html5ever Project Developers. See the
2
// COPYRIGHT file at the top-level directory of this distribution.
3
//
4
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
// option. This file may not be copied, modified, or distributed
8
// except according to those terms.
9
10
//! A simple reference-counted DOM.
11
//!
12
//! This is sufficient as a static parse tree, but don't build a
13
//! web browser using it. :)
14
//!
15
//! A DOM is a [tree structure] with ordered children that can be represented in an XML-like
16
//! format. For example, the following graph
17
//!
18
//! ```text
19
//! div
20
//!  +- "text node"
21
//!  +- span
22
//! ```
23
//! in HTML would be serialized as
24
//!
25
//! ```html
26
//! <div>text node<span></span></div>
27
//! ```
28
//!
29
//! See the [document object model article on wikipedia][dom wiki] for more information.
30
//!
31
//! This implementation stores the information associated with each node once, and then hands out
32
//! refs to children. The nodes themselves are reference-counted to avoid copying - you can create
33
//! a new ref and then a node will outlive the document. Nodes own their children, but only have
34
//! weak references to their parents.
35
//!
36
//! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure)
37
//! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model
38
39
extern crate markup5ever;
40
extern crate tendril;
41
42
use std::borrow::Cow;
43
use std::cell::{Cell, RefCell};
44
use std::collections::{HashSet, VecDeque};
45
use std::default::Default;
46
use std::fmt;
47
use std::io;
48
use std::mem;
49
use std::rc::{Rc, Weak};
50
51
use tendril::StrTendril;
52
53
use markup5ever::interface::tree_builder;
54
use markup5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
55
use markup5ever::serialize::TraversalScope;
56
use markup5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
57
use markup5ever::serialize::{Serialize, Serializer};
58
use markup5ever::Attribute;
59
use markup5ever::ExpandedName;
60
use markup5ever::QualName;
61
62
/// The different kinds of nodes in the DOM.
63
#[derive(Debug, Clone)]
64
pub enum NodeData {
65
    /// The `Document` itself - the root node of a HTML document.
66
    Document,
67
68
    /// A `DOCTYPE` with name, public id, and system id. See
69
    /// [document type declaration on wikipedia][dtd wiki].
70
    ///
71
    /// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration
72
    Doctype {
73
        name: StrTendril,
74
        public_id: StrTendril,
75
        system_id: StrTendril,
76
    },
77
78
    /// A text node.
79
    Text { contents: RefCell<StrTendril> },
80
81
    /// A comment.
82
    Comment { contents: StrTendril },
83
84
    /// An element with attributes.
85
    Element {
86
        name: QualName,
87
        attrs: RefCell<Vec<Attribute>>,
88
89
        /// For HTML \<template\> elements, the [template contents].
90
        ///
91
        /// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents
92
        template_contents: RefCell<Option<Handle>>,
93
94
        /// Whether the node is a [HTML integration point].
95
        ///
96
        /// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point
97
        mathml_annotation_xml_integration_point: bool,
98
    },
99
100
    /// A Processing instruction.
101
    ProcessingInstruction {
102
        target: StrTendril,
103
        contents: StrTendril,
104
    },
105
}
106
107
/// A DOM node.
108
pub struct Node {
109
    /// Parent node.
110
    pub parent: Cell<Option<WeakHandle>>,
111
    /// Child nodes of this node.
112
    pub children: RefCell<Vec<Handle>>,
113
    /// Represents this node's data.
114
    pub data: NodeData,
115
}
116
117
impl Node {
118
    /// Create a new node from its contents
119
12.2M
    pub fn new(data: NodeData) -> Rc<Self> {
120
12.2M
        Rc::new(Node {
121
12.2M
            data,
122
12.2M
            parent: Cell::new(None),
123
12.2M
            children: RefCell::new(Vec::new()),
124
12.2M
        })
125
12.2M
    }
126
}
127
128
impl Drop for Node {
129
12.2M
    fn drop(&mut self) {
130
12.2M
        let mut nodes = mem::take(&mut *self.children.borrow_mut());
131
24.4M
        while let Some(node) = nodes.pop() {
132
12.2M
            let children = mem::take(&mut *node.children.borrow_mut());
133
12.2M
            nodes.extend(children.into_iter());
134
            if let NodeData::Element {
135
2.72M
                ref template_contents,
136
                ..
137
12.2M
            } = node.data
138
            {
139
2.72M
                if let Some(template_contents) = template_contents.borrow_mut().take() {
140
1.38k
                    nodes.push(template_contents);
141
2.72M
                }
142
9.51M
            }
143
        }
144
12.2M
    }
145
}
146
147
impl fmt::Debug for Node {
148
0
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
149
0
        fmt.debug_struct("Node")
150
0
            .field("data", &self.data)
151
0
            .field("children", &self.children)
152
0
            .finish()
153
0
    }
154
}
155
156
/// Reference to a DOM node.
157
pub type Handle = Rc<Node>;
158
159
/// Weak reference to a DOM node, used for parent pointers.
160
pub type WeakHandle = Weak<Node>;
161
162
/// Append a parentless node to another nodes' children
163
12.2M
fn append(new_parent: &Handle, child: Handle) {
164
12.2M
    let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent)));
165
    // Invariant: child cannot have existing parent
166
12.2M
    assert!(previous_parent.is_none());
167
12.2M
    new_parent.children.borrow_mut().push(child);
168
12.2M
}
169
170
/// If the node has a parent, get it and this node's position in its children
171
0
fn get_parent_and_index(target: &Handle) -> Option<(Handle, usize)> {
172
0
    if let Some(weak) = target.parent.take() {
173
0
        let parent = weak.upgrade().expect("dangling weak pointer");
174
0
        target.parent.set(Some(weak));
175
0
        let i = match parent
176
0
            .children
177
0
            .borrow()
178
0
            .iter()
179
0
            .enumerate()
180
0
            .find(|&(_, child)| Rc::ptr_eq(child, target))
181
        {
182
0
            Some((i, _)) => i,
183
0
            None => panic!("have parent but couldn't find in parent's children!"),
184
        };
185
0
        Some((parent, i))
186
    } else {
187
0
        None
188
    }
189
0
}
190
191
7.32M
fn append_to_existing_text(prev: &Handle, text: &str) -> bool {
192
7.32M
    match prev.data {
193
3.10M
        NodeData::Text { ref contents } => {
194
3.10M
            contents.borrow_mut().push_slice(text);
195
3.10M
            true
196
        },
197
4.22M
        _ => false,
198
    }
199
7.32M
}
200
201
0
fn remove_from_parent(target: &Handle) {
202
0
    if let Some((parent, i)) = get_parent_and_index(target) {
203
0
        parent.children.borrow_mut().remove(i);
204
0
        target.parent.set(None);
205
0
    }
206
0
}
207
208
/// The DOM itself; the result of parsing.
209
pub struct RcDom {
210
    /// The `Document` itself.
211
    pub document: Handle,
212
213
    /// Errors that occurred during parsing.
214
    pub errors: RefCell<Vec<Cow<'static, str>>>,
215
216
    /// The document's quirks mode.
217
    pub quirks_mode: Cell<QuirksMode>,
218
}
219
220
impl TreeSink for RcDom {
221
    type Output = Self;
222
12.8k
    fn finish(self) -> Self {
223
12.8k
        self
224
12.8k
    }
225
226
    type Handle = Handle;
227
228
    type ElemName<'a>
229
        = ExpandedName<'a>
230
    where
231
        Self: 'a;
232
233
42.3M
    fn parse_error(&self, msg: Cow<'static, str>) {
234
42.3M
        self.errors.borrow_mut().push(msg);
235
42.3M
    }
236
237
12.8k
    fn get_document(&self) -> Handle {
238
12.8k
        self.document.clone()
239
12.8k
    }
240
241
0
    fn get_template_contents(&self, target: &Handle) -> Handle {
242
        if let NodeData::Element {
243
0
            ref template_contents,
244
            ..
245
0
        } = target.data
246
        {
247
0
            template_contents
248
0
                .borrow()
249
0
                .as_ref()
250
0
                .expect("not a template element!")
251
0
                .clone()
252
        } else {
253
0
            panic!("not a template element!")
254
        }
255
0
    }
256
257
0
    fn set_quirks_mode(&self, mode: QuirksMode) {
258
0
        self.quirks_mode.set(mode);
259
0
    }
260
261
0
    fn same_node(&self, x: &Handle, y: &Handle) -> bool {
262
0
        Rc::ptr_eq(x, y)
263
0
    }
264
265
246M
    fn elem_name<'a>(&self, target: &'a Handle) -> ExpandedName<'a> {
266
246M
        match target.data {
267
246M
            NodeData::Element { ref name, .. } => name.expanded(),
268
0
            _ => panic!("not an element!"),
269
        }
270
246M
    }
271
272
2.72M
    fn create_element(&self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags) -> Handle {
273
2.72M
        Node::new(NodeData::Element {
274
2.72M
            name,
275
2.72M
            attrs: RefCell::new(attrs),
276
2.72M
            template_contents: RefCell::new(if flags.template {
277
1.38k
                Some(Node::new(NodeData::Document))
278
            } else {
279
2.72M
                None
280
            }),
281
2.72M
            mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
282
        })
283
2.72M
    }
284
285
5.01M
    fn create_comment(&self, text: StrTendril) -> Handle {
286
5.01M
        Node::new(NodeData::Comment { contents: text })
287
5.01M
    }
288
289
9.45k
    fn create_pi(&self, target: StrTendril, data: StrTendril) -> Handle {
290
9.45k
        Node::new(NodeData::ProcessingInstruction {
291
9.45k
            target,
292
9.45k
            contents: data,
293
9.45k
        })
294
9.45k
    }
295
296
15.2M
    fn append(&self, parent: &Handle, child: NodeOrText<Handle>) {
297
        // Append to an existing Text node if we have one.
298
15.2M
        if let NodeOrText::AppendText(text) = &child {
299
7.48M
            if let Some(h) = parent.children.borrow().last() {
300
7.32M
                if append_to_existing_text(h, text) {
301
3.10M
                    return;
302
4.22M
                }
303
153k
            }
304
7.75M
        }
305
306
12.1M
        append(
307
12.1M
            parent,
308
12.1M
            match child {
309
4.38M
                NodeOrText::AppendText(text) => Node::new(NodeData::Text {
310
4.38M
                    contents: RefCell::new(text),
311
4.38M
                }),
312
7.75M
                NodeOrText::AppendNode(node) => node,
313
            },
314
        );
315
15.2M
    }
316
317
0
    fn append_before_sibling(&self, sibling: &Handle, child: NodeOrText<Handle>) {
318
0
        let (parent, i) = get_parent_and_index(sibling)
319
0
            .expect("append_before_sibling called on node without parent");
320
321
0
        let child = match (child, i) {
322
            // No previous node.
323
0
            (NodeOrText::AppendText(text), 0) => Node::new(NodeData::Text {
324
0
                contents: RefCell::new(text),
325
0
            }),
326
327
            // Look for a text node before the insertion point.
328
0
            (NodeOrText::AppendText(text), i) => {
329
0
                let children = parent.children.borrow();
330
0
                let prev = &children[i - 1];
331
0
                if append_to_existing_text(prev, &text) {
332
0
                    return;
333
0
                }
334
0
                Node::new(NodeData::Text {
335
0
                    contents: RefCell::new(text),
336
0
                })
337
            },
338
339
            // The tree builder promises we won't have a text node after
340
            // the insertion point.
341
342
            // Any other kind of node.
343
0
            (NodeOrText::AppendNode(node), _) => node,
344
        };
345
346
0
        remove_from_parent(&child);
347
348
0
        child.parent.set(Some(Rc::downgrade(&parent)));
349
0
        parent.children.borrow_mut().insert(i, child);
350
0
    }
351
352
0
    fn append_based_on_parent_node(
353
0
        &self,
354
0
        element: &Self::Handle,
355
0
        prev_element: &Self::Handle,
356
0
        child: NodeOrText<Self::Handle>,
357
0
    ) {
358
0
        let parent = element.parent.take();
359
0
        let has_parent = parent.is_some();
360
0
        element.parent.set(parent);
361
362
0
        if has_parent {
363
0
            self.append_before_sibling(element, child);
364
0
        } else {
365
0
            self.append(prev_element, child);
366
0
        }
367
0
    }
368
369
103k
    fn append_doctype_to_document(
370
103k
        &self,
371
103k
        name: StrTendril,
372
103k
        public_id: StrTendril,
373
103k
        system_id: StrTendril,
374
103k
    ) {
375
103k
        append(
376
103k
            &self.document,
377
103k
            Node::new(NodeData::Doctype {
378
103k
                name,
379
103k
                public_id,
380
103k
                system_id,
381
103k
            }),
382
        );
383
103k
    }
384
385
0
    fn add_attrs_if_missing(&self, target: &Handle, attrs: Vec<Attribute>) {
386
0
        let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
387
0
            attrs.borrow_mut()
388
        } else {
389
0
            panic!("not an element")
390
        };
391
392
0
        let existing_names = existing
393
0
            .iter()
394
0
            .map(|e| e.name.clone())
395
0
            .collect::<HashSet<_>>();
396
0
        existing.extend(
397
0
            attrs
398
0
                .into_iter()
399
0
                .filter(|attr| !existing_names.contains(&attr.name)),
400
        );
401
0
    }
402
403
0
    fn remove_from_parent(&self, target: &Handle) {
404
0
        remove_from_parent(target);
405
0
    }
406
407
0
    fn reparent_children(&self, node: &Handle, new_parent: &Handle) {
408
0
        let mut children = node.children.borrow_mut();
409
0
        let mut new_children = new_parent.children.borrow_mut();
410
0
        for child in children.iter() {
411
0
            let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent)));
412
0
            assert!(Rc::ptr_eq(
413
0
                node,
414
0
                &previous_parent.unwrap().upgrade().expect("dangling weak")
415
0
            ))
416
        }
417
0
        new_children.extend(mem::take(&mut *children));
418
0
    }
419
420
0
    fn clone_subtree(&self, node: &Handle) -> Handle {
421
0
        let cloned_node = Node::new(node.data.clone());
422
423
        // Clone all children recursively
424
0
        for child in node.children.borrow().iter() {
425
0
            let cloned_child = self.clone_subtree(child);
426
0
            append(&cloned_node, cloned_child);
427
0
        }
428
429
0
        cloned_node
430
0
    }
431
432
0
    fn is_mathml_annotation_xml_integration_point(&self, target: &Handle) -> bool {
433
        if let NodeData::Element {
434
0
            mathml_annotation_xml_integration_point,
435
            ..
436
0
        } = target.data
437
        {
438
0
            mathml_annotation_xml_integration_point
439
        } else {
440
0
            panic!("not an element!")
441
        }
442
0
    }
443
}
444
445
impl Default for RcDom {
446
12.8k
    fn default() -> RcDom {
447
12.8k
        RcDom {
448
12.8k
            document: Node::new(NodeData::Document),
449
12.8k
            errors: Default::default(),
450
12.8k
            quirks_mode: Cell::new(tree_builder::NoQuirks),
451
12.8k
        }
452
12.8k
    }
453
}
454
455
enum SerializeOp {
456
    Open(Handle),
457
    Close(QualName),
458
}
459
460
pub struct SerializableHandle(Handle);
461
462
impl From<Handle> for SerializableHandle {
463
12.8k
    fn from(h: Handle) -> SerializableHandle {
464
12.8k
        SerializableHandle(h)
465
12.8k
    }
466
}
467
468
impl Serialize for SerializableHandle {
469
12.8k
    fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
470
12.8k
    where
471
12.8k
        S: Serializer,
472
    {
473
12.8k
        let mut ops = VecDeque::new();
474
12.8k
        match traversal_scope {
475
0
            IncludeNode => ops.push_back(SerializeOp::Open(self.0.clone())),
476
12.8k
            ChildrenOnly(_) => ops.extend(
477
12.8k
                self.0
478
12.8k
                    .children
479
12.8k
                    .borrow()
480
12.8k
                    .iter()
481
842k
                    .map(|h| SerializeOp::Open(h.clone())),
482
            ),
483
        }
484
485
14.9M
        while let Some(op) = ops.pop_front() {
486
14.9M
            match op {
487
12.2M
                SerializeOp::Open(handle) => match handle.data {
488
                    NodeData::Element {
489
2.72M
                        ref name,
490
2.72M
                        ref attrs,
491
                        ..
492
                    } => {
493
2.72M
                        serializer.start_elem(
494
2.72M
                            name.clone(),
495
2.72M
                            attrs.borrow().iter().map(|at| (&at.name, &at.value[..])),
496
0
                        )?;
497
498
2.72M
                        ops.reserve(1 + handle.children.borrow().len());
499
2.72M
                        ops.push_front(SerializeOp::Close(name.clone()));
500
501
11.3M
                        for child in handle.children.borrow().iter().rev() {
502
11.3M
                            ops.push_front(SerializeOp::Open(child.clone()));
503
11.3M
                        }
504
                    },
505
506
103k
                    NodeData::Doctype { ref name, .. } => serializer.write_doctype(name)?,
507
508
4.38M
                    NodeData::Text { ref contents } => serializer.write_text(&contents.borrow())?,
509
510
5.01M
                    NodeData::Comment { ref contents } => serializer.write_comment(contents)?,
511
512
                    NodeData::ProcessingInstruction {
513
9.45k
                        ref target,
514
9.45k
                        ref contents,
515
9.45k
                    } => serializer.write_processing_instruction(target, contents)?,
516
517
0
                    NodeData::Document => panic!("Can't serialize Document node itself"),
518
                },
519
520
2.72M
                SerializeOp::Close(name) => {
521
2.72M
                    serializer.end_elem(name)?;
522
                },
523
            }
524
        }
525
526
12.8k
        Ok(())
527
12.8k
    }
528
}