Coverage Report

Created: 2025-05-08 06:13

/src/html5ever/xml5ever/src/driver.rs
Line
Count
Source
1
// Copyright 2014-2017 The html5ever Project Developers. See the
2
// COPYRIGHT file at the top-level directory of this distribution.
3
//
4
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7
// option. This file may not be copied, modified, or distributed
8
// except according to those terms.
9
10
use crate::tokenizer::{XmlTokenizer, XmlTokenizerOpts};
11
use crate::tree_builder::{TreeSink, XmlTreeBuilder, XmlTreeBuilderOpts};
12
13
use std::borrow::Cow;
14
15
use crate::tendril;
16
use crate::tendril::stream::{TendrilSink, Utf8LossyDecoder};
17
use crate::tendril::StrTendril;
18
use markup5ever::buffer_queue::BufferQueue;
19
20
/// All-encompasing parser setting structure.
21
#[derive(Clone, Default)]
22
pub struct XmlParseOpts {
23
    /// Xml tokenizer options.
24
    pub tokenizer: XmlTokenizerOpts,
25
    /// Xml tree builder .
26
    pub tree_builder: XmlTreeBuilderOpts,
27
}
28
29
/// Parse and send results to a `TreeSink`.
30
///
31
/// ## Example
32
///
33
/// ```ignore
34
/// let mut sink = MySink;
35
/// parse_document(&mut sink, iter::once(my_str), Default::default());
36
/// ```
37
12.1k
pub fn parse_document<Sink>(sink: Sink, opts: XmlParseOpts) -> XmlParser<Sink>
38
12.1k
where
39
12.1k
    Sink: TreeSink,
40
12.1k
{
41
12.1k
    let tb = XmlTreeBuilder::new(sink, opts.tree_builder);
42
12.1k
    let tok = XmlTokenizer::new(tb, opts.tokenizer);
43
12.1k
    XmlParser {
44
12.1k
        tokenizer: tok,
45
12.1k
        input_buffer: BufferQueue::default(),
46
12.1k
    }
47
12.1k
}
xml5ever::driver::parse_document::<markup5ever_rcdom::RcDom>
Line
Count
Source
37
12.1k
pub fn parse_document<Sink>(sink: Sink, opts: XmlParseOpts) -> XmlParser<Sink>
38
12.1k
where
39
12.1k
    Sink: TreeSink,
40
12.1k
{
41
12.1k
    let tb = XmlTreeBuilder::new(sink, opts.tree_builder);
42
12.1k
    let tok = XmlTokenizer::new(tb, opts.tokenizer);
43
12.1k
    XmlParser {
44
12.1k
        tokenizer: tok,
45
12.1k
        input_buffer: BufferQueue::default(),
46
12.1k
    }
47
12.1k
}
Unexecuted instantiation: xml5ever::driver::parse_document::<_>
48
49
/// An XML parser,
50
/// ready to receive Unicode input through the `tendril::TendrilSink` trait’s methods.
51
pub struct XmlParser<Sink>
52
where
53
    Sink: TreeSink,
54
{
55
    /// Tokenizer used by XmlParser.
56
    pub tokenizer: XmlTokenizer<XmlTreeBuilder<Sink::Handle, Sink>>,
57
    /// Input used by XmlParser.
58
    pub input_buffer: BufferQueue,
59
}
60
61
impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for XmlParser<Sink> {
62
    type Output = Sink::Output;
63
64
39.5M
    fn process(&mut self, t: StrTendril) {
65
39.5M
        self.input_buffer.push_back(t);
66
39.5M
        // FIXME: Properly support </script> somehow.
67
39.5M
        let _ = self.tokenizer.feed(&self.input_buffer);
68
39.5M
    }
<xml5ever::driver::XmlParser<markup5ever_rcdom::RcDom> as tendril::stream::TendrilSink<tendril::fmt::UTF8>>::process
Line
Count
Source
64
39.5M
    fn process(&mut self, t: StrTendril) {
65
39.5M
        self.input_buffer.push_back(t);
66
39.5M
        // FIXME: Properly support </script> somehow.
67
39.5M
        let _ = self.tokenizer.feed(&self.input_buffer);
68
39.5M
    }
Unexecuted instantiation: <xml5ever::driver::XmlParser<_> as tendril::stream::TendrilSink<tendril::fmt::UTF8>>::process
69
70
    // FIXME: Is it too noisy to report every character decoding error?
71
36.2M
    fn error(&mut self, desc: Cow<'static, str>) {
72
36.2M
        self.tokenizer.sink.sink.parse_error(desc)
73
36.2M
    }
<xml5ever::driver::XmlParser<markup5ever_rcdom::RcDom> as tendril::stream::TendrilSink<tendril::fmt::UTF8>>::error
Line
Count
Source
71
36.2M
    fn error(&mut self, desc: Cow<'static, str>) {
72
36.2M
        self.tokenizer.sink.sink.parse_error(desc)
73
36.2M
    }
Unexecuted instantiation: <xml5ever::driver::XmlParser<_> as tendril::stream::TendrilSink<tendril::fmt::UTF8>>::error
74
75
12.1k
    fn finish(self) -> Self::Output {
76
12.1k
        self.tokenizer.end();
77
12.1k
        self.tokenizer.sink.sink.finish()
78
12.1k
    }
<xml5ever::driver::XmlParser<markup5ever_rcdom::RcDom> as tendril::stream::TendrilSink<tendril::fmt::UTF8>>::finish
Line
Count
Source
75
12.1k
    fn finish(self) -> Self::Output {
76
12.1k
        self.tokenizer.end();
77
12.1k
        self.tokenizer.sink.sink.finish()
78
12.1k
    }
Unexecuted instantiation: <xml5ever::driver::XmlParser<_> as tendril::stream::TendrilSink<tendril::fmt::UTF8>>::finish
79
}
80
81
impl<Sink: TreeSink> XmlParser<Sink> {
82
    /// Wrap this parser into a `TendrilSink` that accepts UTF-8 bytes.
83
    ///
84
    /// Use this when your input is bytes that are known to be in the UTF-8 encoding.
85
    /// Decoding is lossy, like `String::from_utf8_lossy`.
86
    #[allow(clippy::wrong_self_convention)]
87
12.1k
    pub fn from_utf8(self) -> Utf8LossyDecoder<Self> {
88
12.1k
        Utf8LossyDecoder::new(self)
89
12.1k
    }
<xml5ever::driver::XmlParser<markup5ever_rcdom::RcDom>>::from_utf8
Line
Count
Source
87
12.1k
    pub fn from_utf8(self) -> Utf8LossyDecoder<Self> {
88
12.1k
        Utf8LossyDecoder::new(self)
89
12.1k
    }
Unexecuted instantiation: <xml5ever::driver::XmlParser<_>>::from_utf8
90
}