/src/html5ever/xml5ever/src/driver.rs
Line | Count | Source |
1 | | // Copyright 2014-2017 The html5ever Project Developers. See the |
2 | | // COPYRIGHT file at the top-level directory of this distribution. |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | | // option. This file may not be copied, modified, or distributed |
8 | | // except according to those terms. |
9 | | |
10 | | use crate::tokenizer::{XmlTokenizer, XmlTokenizerOpts}; |
11 | | use crate::tree_builder::{TreeSink, XmlTreeBuilder, XmlTreeBuilderOpts}; |
12 | | |
13 | | use std::borrow::Cow; |
14 | | |
15 | | use crate::tendril; |
16 | | use crate::tendril::stream::{TendrilSink, Utf8LossyDecoder}; |
17 | | use crate::tendril::StrTendril; |
18 | | use markup5ever::buffer_queue::BufferQueue; |
19 | | use markup5ever::TokenizerResult; |
20 | | |
21 | | /// All-encompasing parser setting structure. |
22 | | #[derive(Clone, Default)] |
23 | | pub struct XmlParseOpts { |
24 | | /// Xml tokenizer options. |
25 | | pub tokenizer: XmlTokenizerOpts, |
26 | | /// Xml tree builder . |
27 | | pub tree_builder: XmlTreeBuilderOpts, |
28 | | } |
29 | | |
30 | | /// Parse and send results to a `TreeSink`. |
31 | | /// |
32 | | /// ## Example |
33 | | /// |
34 | | /// ```ignore |
35 | | /// let mut sink = MySink; |
36 | | /// parse_document(&mut sink, iter::once(my_str), Default::default()); |
37 | | /// ``` |
38 | 12.7k | pub fn parse_document<Sink>(sink: Sink, opts: XmlParseOpts) -> XmlParser<Sink> |
39 | 12.7k | where |
40 | 12.7k | Sink: TreeSink, |
41 | | { |
42 | 12.7k | let tb = XmlTreeBuilder::new(sink, opts.tree_builder); |
43 | 12.7k | let tok = XmlTokenizer::new(tb, opts.tokenizer); |
44 | 12.7k | XmlParser { |
45 | 12.7k | tokenizer: tok, |
46 | 12.7k | input_buffer: BufferQueue::default(), |
47 | 12.7k | } |
48 | 12.7k | } |
49 | | |
50 | | /// An XML parser, |
51 | | /// ready to receive Unicode input through the `tendril::TendrilSink` trait’s methods. |
52 | | pub struct XmlParser<Sink> |
53 | | where |
54 | | Sink: TreeSink, |
55 | | { |
56 | | /// Tokenizer used by XmlParser. |
57 | | pub tokenizer: XmlTokenizer<XmlTreeBuilder<Sink::Handle, Sink>>, |
58 | | /// Input used by XmlParser. |
59 | | pub input_buffer: BufferQueue, |
60 | | } |
61 | | |
62 | | impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for XmlParser<Sink> { |
63 | | type Output = Sink::Output; |
64 | | |
65 | 33.7M | fn process(&mut self, t: StrTendril) { |
66 | 33.7M | self.input_buffer.push_back(t); |
67 | | // FIXME: Properly support </script> somehow. |
68 | 33.8M | while let TokenizerResult::Script(_) = self.tokenizer.feed(&self.input_buffer) {} |
69 | 33.7M | } |
70 | | |
71 | | // FIXME: Is it too noisy to report every character decoding error? |
72 | 29.9M | fn error(&mut self, desc: Cow<'static, str>) { |
73 | 29.9M | self.tokenizer.sink.sink.parse_error(desc) |
74 | 29.9M | } |
75 | | |
76 | 12.7k | fn finish(self) -> Self::Output { |
77 | 12.7k | self.tokenizer.end(); |
78 | 12.7k | self.tokenizer.sink.sink.finish() |
79 | 12.7k | } |
80 | | } |
81 | | |
82 | | impl<Sink: TreeSink> XmlParser<Sink> { |
83 | | /// Wrap this parser into a `TendrilSink` that accepts UTF-8 bytes. |
84 | | /// |
85 | | /// Use this when your input is bytes that are known to be in the UTF-8 encoding. |
86 | | /// Decoding is lossy, like `String::from_utf8_lossy`. |
87 | | #[allow(clippy::wrong_self_convention)] |
88 | 12.7k | pub fn from_utf8(self) -> Utf8LossyDecoder<Self> { |
89 | 12.7k | Utf8LossyDecoder::new(self) |
90 | 12.7k | } |
91 | | } |