/src/html5ever/xml5ever/src/tokenizer/mod.rs
Line | Count | Source |
1 | | // Copyright 2014-2017 The html5ever Project Developers. See the |
2 | | // COPYRIGHT file at the top-level directory of this distribution. |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | | // option. This file may not be copied, modified, or distributed |
8 | | // except according to those terms. |
9 | | |
10 | | mod char_ref; |
11 | | mod interface; |
12 | | mod qname; |
13 | | pub mod states; |
14 | | |
15 | | pub use self::interface::{ |
16 | | Doctype, EmptyTag, EndTag, Pi, ShortTag, StartTag, Tag, TagKind, Token, TokenSink, |
17 | | }; |
18 | | pub use crate::{LocalName, Namespace, Prefix}; |
19 | | |
20 | | use crate::macros::time; |
21 | | use crate::tendril::StrTendril; |
22 | | use crate::{buffer_queue, Attribute, QualName, SmallCharSet}; |
23 | | use log::debug; |
24 | | use markup5ever::{local_name, namespace_prefix, ns, small_char_set, TokenizerResult}; |
25 | | use std::borrow::Cow::{self, Borrowed}; |
26 | | use std::cell::{Cell, RefCell, RefMut}; |
27 | | use std::collections::BTreeMap; |
28 | | use std::mem::replace; |
29 | | |
30 | | use buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult}; |
31 | | use char_ref::{CharRef, CharRefTokenizer}; |
32 | | use qname::QualNameTokenizer; |
33 | | use states::{AttrValueKind::*, DoctypeKind, DoctypeKind::*, XmlState}; |
34 | | |
35 | | /// Copy of Tokenizer options, with an impl for `Default`. |
36 | | #[derive(Copy, Clone)] |
37 | | pub struct XmlTokenizerOpts { |
38 | | /// Report all parse errors described in the spec, at some |
39 | | /// performance penalty? Default: false |
40 | | pub exact_errors: bool, |
41 | | |
42 | | /// Discard a `U+FEFF BYTE ORDER MARK` if we see one at the beginning |
43 | | /// of the stream? Default: true |
44 | | pub discard_bom: bool, |
45 | | |
46 | | /// Keep a record of how long we spent in each state? Printed |
47 | | /// when `end()` is called. Default: false |
48 | | pub profile: bool, |
49 | | |
50 | | /// Initial state override. Only the test runner should use |
51 | | /// a non-`None` value! |
52 | | pub initial_state: Option<XmlState>, |
53 | | } |
54 | | |
55 | 4.17M | fn process_qname(tag_name: StrTendril) -> QualName { |
56 | | // If tag name can't possibly contain full namespace, skip qualified name |
57 | | // parsing altogether. For a tag to have namespace it must look like: |
58 | | // a:b |
59 | | // Since StrTendril are UTF-8, we know that minimal size in bytes must be |
60 | | // three bytes minimum. |
61 | 4.17M | let split = if (*tag_name).len() < 3 { |
62 | 874k | None |
63 | | } else { |
64 | 3.29M | QualNameTokenizer::new((*tag_name).as_bytes()).run() |
65 | | }; |
66 | | |
67 | 4.17M | match split { |
68 | 1.85M | None => QualName::new(None, ns!(), LocalName::from(&*tag_name)), |
69 | 2.31M | Some(col) => { |
70 | 2.31M | let len = (*tag_name).len() as u32; |
71 | 2.31M | let prefix = tag_name.subtendril(0, col); |
72 | 2.31M | let local = tag_name.subtendril(col + 1, len - col - 1); |
73 | 2.31M | let ns = ns!(); // Actual namespace URL set in XmlTreeBuilder::bind_qname |
74 | 2.31M | QualName::new(Some(Prefix::from(&*prefix)), ns, LocalName::from(&*local)) |
75 | | }, |
76 | | } |
77 | 4.17M | } |
78 | | |
79 | 3.54M | fn option_push(opt_str: &mut Option<StrTendril>, c: char) { |
80 | 3.54M | match *opt_str { |
81 | 3.34M | Some(ref mut s) => s.push_char(c), |
82 | 205k | None => *opt_str = Some(StrTendril::from_char(c)), |
83 | | } |
84 | 3.54M | } |
85 | | |
86 | | impl Default for XmlTokenizerOpts { |
87 | 12.9k | fn default() -> XmlTokenizerOpts { |
88 | 12.9k | XmlTokenizerOpts { |
89 | 12.9k | exact_errors: false, |
90 | 12.9k | discard_bom: true, |
91 | 12.9k | profile: false, |
92 | 12.9k | initial_state: None, |
93 | 12.9k | } |
94 | 12.9k | } |
95 | | } |
96 | | /// The Xml tokenizer. |
97 | | pub struct XmlTokenizer<Sink> { |
98 | | /// Options controlling the behavior of the tokenizer. |
99 | | opts: XmlTokenizerOpts, |
100 | | |
101 | | /// Destination for tokens we emit. |
102 | | pub sink: Sink, |
103 | | |
104 | | /// The abstract machine state as described in the spec. |
105 | | state: Cell<XmlState>, |
106 | | |
107 | | /// Are we at the end of the file, once buffers have been processed |
108 | | /// completely? This affects whether we will wait for lookahead or not. |
109 | | at_eof: Cell<bool>, |
110 | | |
111 | | /// Tokenizer for character references, if we're tokenizing |
112 | | /// one at the moment. |
113 | | char_ref_tokenizer: RefCell<Option<Box<CharRefTokenizer>>>, |
114 | | |
115 | | /// Current input character. Just consumed, may reconsume. |
116 | | current_char: Cell<char>, |
117 | | |
118 | | /// Should we reconsume the current input character? |
119 | | reconsume: Cell<bool>, |
120 | | |
121 | | /// Did we just consume \r, translating it to \n? In that case we need |
122 | | /// to ignore the next character if it's \n. |
123 | | ignore_lf: Cell<bool>, |
124 | | |
125 | | /// Discard a U+FEFF BYTE ORDER MARK if we see one? Only done at the |
126 | | /// beginning of the stream. |
127 | | discard_bom: Cell<bool>, |
128 | | |
129 | | /// Temporary buffer |
130 | | temp_buf: RefCell<StrTendril>, |
131 | | |
132 | | /// Current tag kind. |
133 | | current_tag_kind: Cell<TagKind>, |
134 | | |
135 | | /// Current tag name. |
136 | | current_tag_name: RefCell<StrTendril>, |
137 | | |
138 | | /// Current tag attributes. |
139 | | current_tag_attrs: RefCell<Vec<Attribute>>, |
140 | | |
141 | | /// Current attribute name. |
142 | | current_attr_name: RefCell<StrTendril>, |
143 | | |
144 | | /// Current attribute value. |
145 | | current_attr_value: RefCell<StrTendril>, |
146 | | |
147 | | current_doctype: RefCell<Doctype>, |
148 | | |
149 | | /// Current comment. |
150 | | current_comment: RefCell<StrTendril>, |
151 | | |
152 | | /// Current processing instruction target. |
153 | | current_pi_target: RefCell<StrTendril>, |
154 | | |
155 | | /// Current processing instruction value. |
156 | | current_pi_data: RefCell<StrTendril>, |
157 | | |
158 | | /// Record of how many ns we spent in each state, if profiling is enabled. |
159 | | state_profile: RefCell<BTreeMap<XmlState, u64>>, |
160 | | |
161 | | /// Record of how many ns we spent in the token sink. |
162 | | time_in_sink: Cell<u64>, |
163 | | } |
164 | | |
165 | | impl<Sink: TokenSink> XmlTokenizer<Sink> { |
166 | | /// Create a new tokenizer which feeds tokens to a particular `TokenSink`. |
167 | 12.9k | pub fn new(sink: Sink, opts: XmlTokenizerOpts) -> XmlTokenizer<Sink> { |
168 | 12.9k | if opts.profile && cfg!(for_c) { |
169 | 0 | panic!("Can't profile tokenizer when built as a C library"); |
170 | 12.9k | } |
171 | | |
172 | 12.9k | let state = *opts.initial_state.as_ref().unwrap_or(&XmlState::Data); |
173 | 12.9k | let discard_bom = opts.discard_bom; |
174 | 12.9k | XmlTokenizer { |
175 | 12.9k | opts, |
176 | 12.9k | sink, |
177 | 12.9k | state: Cell::new(state), |
178 | 12.9k | char_ref_tokenizer: RefCell::new(None), |
179 | 12.9k | at_eof: Cell::new(false), |
180 | 12.9k | current_char: Cell::new('\0'), |
181 | 12.9k | reconsume: Cell::new(false), |
182 | 12.9k | ignore_lf: Cell::new(false), |
183 | 12.9k | temp_buf: RefCell::new(StrTendril::new()), |
184 | 12.9k | discard_bom: Cell::new(discard_bom), |
185 | 12.9k | current_tag_kind: Cell::new(StartTag), |
186 | 12.9k | current_tag_name: RefCell::new(StrTendril::new()), |
187 | 12.9k | current_tag_attrs: RefCell::new(vec![]), |
188 | 12.9k | current_attr_name: RefCell::new(StrTendril::new()), |
189 | 12.9k | current_attr_value: RefCell::new(StrTendril::new()), |
190 | 12.9k | current_comment: RefCell::new(StrTendril::new()), |
191 | 12.9k | current_pi_data: RefCell::new(StrTendril::new()), |
192 | 12.9k | current_pi_target: RefCell::new(StrTendril::new()), |
193 | 12.9k | current_doctype: RefCell::new(Doctype::default()), |
194 | 12.9k | state_profile: RefCell::new(BTreeMap::new()), |
195 | 12.9k | time_in_sink: Cell::new(0), |
196 | 12.9k | } |
197 | 12.9k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::new Line | Count | Source | 167 | 12.9k | pub fn new(sink: Sink, opts: XmlTokenizerOpts) -> XmlTokenizer<Sink> { | 168 | 12.9k | if opts.profile && cfg!(for_c) { | 169 | 0 | panic!("Can't profile tokenizer when built as a C library"); | 170 | 12.9k | } | 171 | | | 172 | 12.9k | let state = *opts.initial_state.as_ref().unwrap_or(&XmlState::Data); | 173 | 12.9k | let discard_bom = opts.discard_bom; | 174 | 12.9k | XmlTokenizer { | 175 | 12.9k | opts, | 176 | 12.9k | sink, | 177 | 12.9k | state: Cell::new(state), | 178 | 12.9k | char_ref_tokenizer: RefCell::new(None), | 179 | 12.9k | at_eof: Cell::new(false), | 180 | 12.9k | current_char: Cell::new('\0'), | 181 | 12.9k | reconsume: Cell::new(false), | 182 | 12.9k | ignore_lf: Cell::new(false), | 183 | 12.9k | temp_buf: RefCell::new(StrTendril::new()), | 184 | 12.9k | discard_bom: Cell::new(discard_bom), | 185 | 12.9k | current_tag_kind: Cell::new(StartTag), | 186 | 12.9k | current_tag_name: RefCell::new(StrTendril::new()), | 187 | 12.9k | current_tag_attrs: RefCell::new(vec![]), | 188 | 12.9k | current_attr_name: RefCell::new(StrTendril::new()), | 189 | 12.9k | current_attr_value: RefCell::new(StrTendril::new()), | 190 | 12.9k | current_comment: RefCell::new(StrTendril::new()), | 191 | 12.9k | current_pi_data: RefCell::new(StrTendril::new()), | 192 | 12.9k | current_pi_target: RefCell::new(StrTendril::new()), | 193 | 12.9k | current_doctype: RefCell::new(Doctype::default()), | 194 | 12.9k | state_profile: RefCell::new(BTreeMap::new()), | 195 | 12.9k | time_in_sink: Cell::new(0), | 196 | 12.9k | } | 197 | 12.9k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::new |
198 | | |
199 | | /// Feed an input string into the tokenizer. |
200 | 35.8M | pub fn feed(&self, input: &BufferQueue) -> TokenizerResult<Sink::Handle> { |
201 | 35.8M | if input.is_empty() { |
202 | 868 | return TokenizerResult::Done; |
203 | 35.8M | } |
204 | | |
205 | 35.8M | if self.discard_bom.get() { |
206 | 35.8M | if let Some(c) = input.peek() { |
207 | 35.8M | if c == '\u{feff}' { |
208 | 221k | input.next(); |
209 | 35.6M | } |
210 | | } else { |
211 | 0 | return TokenizerResult::Done; |
212 | | } |
213 | 0 | }; |
214 | | |
215 | 35.8M | self.run(input) |
216 | 35.8M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::feed Line | Count | Source | 200 | 35.8M | pub fn feed(&self, input: &BufferQueue) -> TokenizerResult<Sink::Handle> { | 201 | 35.8M | if input.is_empty() { | 202 | 868 | return TokenizerResult::Done; | 203 | 35.8M | } | 204 | | | 205 | 35.8M | if self.discard_bom.get() { | 206 | 35.8M | if let Some(c) = input.peek() { | 207 | 35.8M | if c == '\u{feff}' { | 208 | 221k | input.next(); | 209 | 35.6M | } | 210 | | } else { | 211 | 0 | return TokenizerResult::Done; | 212 | | } | 213 | 0 | }; | 214 | | | 215 | 35.8M | self.run(input) | 216 | 35.8M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::feed |
217 | | |
218 | 23.7M | fn process_token(&self, token: Token) -> ProcessResult<Sink::Handle> { |
219 | 23.7M | if self.opts.profile { |
220 | 0 | let (result, dt) = time!(self.sink.process_token(token)); |
221 | 0 | self.time_in_sink.set(self.time_in_sink.get() + dt); |
222 | 0 | result |
223 | | } else { |
224 | 23.7M | self.sink.process_token(token) |
225 | | } |
226 | 23.7M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::process_token Line | Count | Source | 218 | 23.7M | fn process_token(&self, token: Token) -> ProcessResult<Sink::Handle> { | 219 | 23.7M | if self.opts.profile { | 220 | 0 | let (result, dt) = time!(self.sink.process_token(token)); | 221 | 0 | self.time_in_sink.set(self.time_in_sink.get() + dt); | 222 | 0 | result | 223 | | } else { | 224 | 23.7M | self.sink.process_token(token) | 225 | | } | 226 | 23.7M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::process_token |
227 | | |
228 | | // Get the next input character, which might be the character |
229 | | // 'c' that we already consumed from the buffers. |
230 | 81.9M | fn get_preprocessed_char(&self, mut c: char, input: &BufferQueue) -> Option<char> { |
231 | 81.9M | if self.ignore_lf.get() { |
232 | 285k | self.ignore_lf.set(false); |
233 | 285k | if c == '\n' { |
234 | 19.9k | c = input.next()?; |
235 | 265k | } |
236 | 81.6M | } |
237 | | |
238 | 81.9M | if c == '\r' { |
239 | 285k | self.ignore_lf.set(true); |
240 | 285k | c = '\n'; |
241 | 81.6M | } |
242 | | |
243 | | // Normalize \x00 into \uFFFD |
244 | 81.9M | if c == '\x00' { |
245 | 6.12M | c = '\u{FFFD}' |
246 | 75.8M | } |
247 | | |
248 | | // Exclude forbidden Unicode characters |
249 | 81.9M | if self.opts.exact_errors |
250 | 0 | && match c as u32 { |
251 | 0 | 0x01..=0x08 | 0x0B | 0x0E..=0x1F | 0x7F..=0x9F | 0xFDD0..=0xFDEF => true, |
252 | 0 | n if (n & 0xFFFE) == 0xFFFE => true, |
253 | 0 | _ => false, |
254 | | } |
255 | 0 | { |
256 | 0 | let msg = format!("Bad character {c}"); |
257 | 0 | self.emit_error(Cow::Owned(msg)); |
258 | 81.9M | } |
259 | | |
260 | 81.9M | debug!("got character {c}"); |
261 | 81.9M | self.current_char.set(c); |
262 | 81.9M | Some(c) |
263 | 81.9M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::get_preprocessed_char Line | Count | Source | 230 | 81.9M | fn get_preprocessed_char(&self, mut c: char, input: &BufferQueue) -> Option<char> { | 231 | 81.9M | if self.ignore_lf.get() { | 232 | 285k | self.ignore_lf.set(false); | 233 | 285k | if c == '\n' { | 234 | 19.9k | c = input.next()?; | 235 | 265k | } | 236 | 81.6M | } | 237 | | | 238 | 81.9M | if c == '\r' { | 239 | 285k | self.ignore_lf.set(true); | 240 | 285k | c = '\n'; | 241 | 81.6M | } | 242 | | | 243 | | // Normalize \x00 into \uFFFD | 244 | 81.9M | if c == '\x00' { | 245 | 6.12M | c = '\u{FFFD}' | 246 | 75.8M | } | 247 | | | 248 | | // Exclude forbidden Unicode characters | 249 | 81.9M | if self.opts.exact_errors | 250 | 0 | && match c as u32 { | 251 | 0 | 0x01..=0x08 | 0x0B | 0x0E..=0x1F | 0x7F..=0x9F | 0xFDD0..=0xFDEF => true, | 252 | 0 | n if (n & 0xFFFE) == 0xFFFE => true, | 253 | 0 | _ => false, | 254 | | } | 255 | 0 | { | 256 | 0 | let msg = format!("Bad character {c}"); | 257 | 0 | self.emit_error(Cow::Owned(msg)); | 258 | 81.9M | } | 259 | | | 260 | 81.9M | debug!("got character {c}"); | 261 | 81.9M | self.current_char.set(c); | 262 | 81.9M | Some(c) | 263 | 81.9M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::get_preprocessed_char |
264 | | |
265 | 9.71k | fn bad_eof_error(&self) { |
266 | 9.71k | let msg = if self.opts.exact_errors { |
267 | 0 | Cow::from(format!("Saw EOF in state {:?}", self.state)) |
268 | | } else { |
269 | 9.71k | Cow::from("Unexpected EOF") |
270 | | }; |
271 | 9.71k | self.emit_error(msg); |
272 | 9.71k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::bad_eof_error Line | Count | Source | 265 | 9.71k | fn bad_eof_error(&self) { | 266 | 9.71k | let msg = if self.opts.exact_errors { | 267 | 0 | Cow::from(format!("Saw EOF in state {:?}", self.state)) | 268 | | } else { | 269 | 9.71k | Cow::from("Unexpected EOF") | 270 | | }; | 271 | 9.71k | self.emit_error(msg); | 272 | 9.71k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::bad_eof_error |
273 | | |
274 | 40.2M | fn pop_except_from(&self, input: &BufferQueue, set: SmallCharSet) -> Option<SetResult> { |
275 | | // Bail to the slow path for various corner cases. |
276 | | // This means that `FromSet` can contain characters not in the set! |
277 | | // It shouldn't matter because the fallback `FromSet` case should |
278 | | // always do the same thing as the `NotFromSet` case. |
279 | 40.2M | if self.opts.exact_errors || self.reconsume.get() || self.ignore_lf.get() { |
280 | 171k | return self.get_char(input).map(FromSet); |
281 | 40.0M | } |
282 | | |
283 | 40.0M | let d = input.pop_except_from(set); |
284 | 40.0M | debug!("got characters {d:?}"); |
285 | 26.6M | match d { |
286 | 8.13M | Some(FromSet(c)) => self.get_preprocessed_char(c, input).map(FromSet), |
287 | | |
288 | | // NB: We don't set self.current_char for a run of characters not |
289 | | // in the set. It shouldn't matter for the codepaths that use |
290 | | // this. |
291 | 31.9M | _ => d, |
292 | | } |
293 | 40.2M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::pop_except_from Line | Count | Source | 274 | 40.2M | fn pop_except_from(&self, input: &BufferQueue, set: SmallCharSet) -> Option<SetResult> { | 275 | | // Bail to the slow path for various corner cases. | 276 | | // This means that `FromSet` can contain characters not in the set! | 277 | | // It shouldn't matter because the fallback `FromSet` case should | 278 | | // always do the same thing as the `NotFromSet` case. | 279 | 40.2M | if self.opts.exact_errors || self.reconsume.get() || self.ignore_lf.get() { | 280 | 171k | return self.get_char(input).map(FromSet); | 281 | 40.0M | } | 282 | | | 283 | 40.0M | let d = input.pop_except_from(set); | 284 | 40.0M | debug!("got characters {d:?}"); | 285 | 26.6M | match d { | 286 | 8.13M | Some(FromSet(c)) => self.get_preprocessed_char(c, input).map(FromSet), | 287 | | | 288 | | // NB: We don't set self.current_char for a run of characters not | 289 | | // in the set. It shouldn't matter for the codepaths that use | 290 | | // this. | 291 | 31.9M | _ => d, | 292 | | } | 293 | 40.2M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::pop_except_from |
294 | | |
295 | | // Check if the next characters are an ASCII case-insensitive match. See |
296 | | // BufferQueue::eat. |
297 | | // |
298 | | // NB: this doesn't do input stream preprocessing or set the current input |
299 | | // character. |
300 | 15.1M | fn eat(&self, input: &BufferQueue, pat: &str) -> Option<bool> { |
301 | 15.1M | input.push_front(replace(&mut *self.temp_buf.borrow_mut(), StrTendril::new())); |
302 | 15.1M | match input.eat(pat, u8::eq_ignore_ascii_case) { |
303 | 16.2k | None if self.at_eof.get() => Some(false), |
304 | | None => { |
305 | 15.5k | let mut temp_buf = self.temp_buf.borrow_mut(); |
306 | 32.2k | while let Some(data) = input.next() { |
307 | 16.7k | temp_buf.push_char(data); |
308 | 16.7k | } |
309 | 15.5k | None |
310 | | }, |
311 | 15.1M | Some(matched) => Some(matched), |
312 | | } |
313 | 15.1M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::eat Line | Count | Source | 300 | 15.1M | fn eat(&self, input: &BufferQueue, pat: &str) -> Option<bool> { | 301 | 15.1M | input.push_front(replace(&mut *self.temp_buf.borrow_mut(), StrTendril::new())); | 302 | 15.1M | match input.eat(pat, u8::eq_ignore_ascii_case) { | 303 | 16.2k | None if self.at_eof.get() => Some(false), | 304 | | None => { | 305 | 15.5k | let mut temp_buf = self.temp_buf.borrow_mut(); | 306 | 32.2k | while let Some(data) = input.next() { | 307 | 16.7k | temp_buf.push_char(data); | 308 | 16.7k | } | 309 | 15.5k | None | 310 | | }, | 311 | 15.1M | Some(matched) => Some(matched), | 312 | | } | 313 | 15.1M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::eat |
314 | | |
315 | | /// Run the state machine for as long as we can. |
316 | 35.8M | pub fn run(&self, input: &BufferQueue) -> TokenizerResult<Sink::Handle> { |
317 | 35.8M | if self.opts.profile { |
318 | | loop { |
319 | 0 | let state = self.state.get(); |
320 | 0 | let old_sink = self.time_in_sink.get(); |
321 | 0 | let (run, mut dt) = time!(self.step(input)); |
322 | 0 | dt -= self.time_in_sink.get() - old_sink; |
323 | 0 | let new = match self.state_profile.borrow_mut().get_mut(&state) { |
324 | 0 | Some(x) => { |
325 | 0 | *x += dt; |
326 | 0 | false |
327 | | }, |
328 | 0 | None => true, |
329 | | }; |
330 | 0 | if new { |
331 | 0 | // do this here because of borrow shenanigans |
332 | 0 | self.state_profile.borrow_mut().insert(state, dt); |
333 | 0 | } |
334 | 0 | match run { |
335 | 0 | ProcessResult::Continue => continue, |
336 | 0 | ProcessResult::Done => return TokenizerResult::Done, |
337 | 0 | ProcessResult::Script(handle) => return TokenizerResult::Script(handle), |
338 | | } |
339 | | } |
340 | | } else { |
341 | | loop { |
342 | 80.9M | match self.step(input) { |
343 | 45.1M | ProcessResult::Continue => continue, |
344 | 35.7M | ProcessResult::Done => return TokenizerResult::Done, |
345 | 62.1k | ProcessResult::Script(handle) => return TokenizerResult::Script(handle), |
346 | | } |
347 | | } |
348 | | } |
349 | 35.8M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::run Line | Count | Source | 316 | 35.8M | pub fn run(&self, input: &BufferQueue) -> TokenizerResult<Sink::Handle> { | 317 | 35.8M | if self.opts.profile { | 318 | | loop { | 319 | 0 | let state = self.state.get(); | 320 | 0 | let old_sink = self.time_in_sink.get(); | 321 | 0 | let (run, mut dt) = time!(self.step(input)); | 322 | 0 | dt -= self.time_in_sink.get() - old_sink; | 323 | 0 | let new = match self.state_profile.borrow_mut().get_mut(&state) { | 324 | 0 | Some(x) => { | 325 | 0 | *x += dt; | 326 | 0 | false | 327 | | }, | 328 | 0 | None => true, | 329 | | }; | 330 | 0 | if new { | 331 | 0 | // do this here because of borrow shenanigans | 332 | 0 | self.state_profile.borrow_mut().insert(state, dt); | 333 | 0 | } | 334 | 0 | match run { | 335 | 0 | ProcessResult::Continue => continue, | 336 | 0 | ProcessResult::Done => return TokenizerResult::Done, | 337 | 0 | ProcessResult::Script(handle) => return TokenizerResult::Script(handle), | 338 | | } | 339 | | } | 340 | | } else { | 341 | | loop { | 342 | 80.9M | match self.step(input) { | 343 | 45.1M | ProcessResult::Continue => continue, | 344 | 35.7M | ProcessResult::Done => return TokenizerResult::Done, | 345 | 62.1k | ProcessResult::Script(handle) => return TokenizerResult::Script(handle), | 346 | | } | 347 | | } | 348 | | } | 349 | 35.8M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::run |
350 | | |
351 | | //§ tokenization |
352 | | // Get the next input character, if one is available. |
353 | 96.7M | fn get_char(&self, input: &BufferQueue) -> Option<char> { |
354 | 96.7M | if self.reconsume.get() { |
355 | 510k | self.reconsume.set(false); |
356 | 510k | Some(self.current_char.get()) |
357 | | } else { |
358 | 96.2M | input |
359 | 96.2M | .next() |
360 | 96.2M | .and_then(|c| self.get_preprocessed_char(c, input)) <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::get_char::{closure#0}Line | Count | Source | 360 | 73.8M | .and_then(|c| self.get_preprocessed_char(c, input)) |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::get_char::{closure#0} |
361 | | } |
362 | 96.7M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::get_char Line | Count | Source | 353 | 96.7M | fn get_char(&self, input: &BufferQueue) -> Option<char> { | 354 | 96.7M | if self.reconsume.get() { | 355 | 510k | self.reconsume.set(false); | 356 | 510k | Some(self.current_char.get()) | 357 | | } else { | 358 | 96.2M | input | 359 | 96.2M | .next() | 360 | 96.2M | .and_then(|c| self.get_preprocessed_char(c, input)) | 361 | | } | 362 | 96.7M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::get_char |
363 | | |
364 | 5.20M | fn bad_char_error(&self) { |
365 | 5.20M | let msg = if self.opts.exact_errors { |
366 | 0 | let c = self.current_char.get(); |
367 | 0 | let state = self.state.get(); |
368 | 0 | Cow::from(format!("Saw {c} in state {state:?}")) |
369 | | } else { |
370 | 5.20M | Cow::from("Bad character") |
371 | | }; |
372 | 5.20M | self.emit_error(msg); |
373 | 5.20M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::bad_char_error Line | Count | Source | 364 | 5.20M | fn bad_char_error(&self) { | 365 | 5.20M | let msg = if self.opts.exact_errors { | 366 | 0 | let c = self.current_char.get(); | 367 | 0 | let state = self.state.get(); | 368 | 0 | Cow::from(format!("Saw {c} in state {state:?}")) | 369 | | } else { | 370 | 5.20M | Cow::from("Bad character") | 371 | | }; | 372 | 5.20M | self.emit_error(msg); | 373 | 5.20M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::bad_char_error |
374 | | |
375 | 1.39M | fn discard_tag(&self) { |
376 | 1.39M | *self.current_tag_name.borrow_mut() = StrTendril::new(); |
377 | 1.39M | *self.current_tag_attrs.borrow_mut() = Vec::new(); |
378 | 1.39M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::discard_tag Line | Count | Source | 375 | 1.39M | fn discard_tag(&self) { | 376 | 1.39M | *self.current_tag_name.borrow_mut() = StrTendril::new(); | 377 | 1.39M | *self.current_tag_attrs.borrow_mut() = Vec::new(); | 378 | 1.39M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::discard_tag |
379 | | |
380 | 1.39M | fn create_tag(&self, kind: TagKind, c: char) { |
381 | 1.39M | self.discard_tag(); |
382 | 1.39M | self.current_tag_name.borrow_mut().push_char(c); |
383 | 1.39M | self.current_tag_kind.set(kind); |
384 | 1.39M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::create_tag Line | Count | Source | 380 | 1.39M | fn create_tag(&self, kind: TagKind, c: char) { | 381 | 1.39M | self.discard_tag(); | 382 | 1.39M | self.current_tag_name.borrow_mut().push_char(c); | 383 | 1.39M | self.current_tag_kind.set(kind); | 384 | 1.39M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::create_tag |
385 | | |
386 | | // This method creates a PI token and |
387 | | // sets its target to given char |
388 | 21.4k | fn create_pi(&self, c: char) { |
389 | 21.4k | *self.current_pi_target.borrow_mut() = StrTendril::new(); |
390 | 21.4k | *self.current_pi_data.borrow_mut() = StrTendril::new(); |
391 | 21.4k | self.current_pi_target.borrow_mut().push_char(c); |
392 | 21.4k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::create_pi Line | Count | Source | 388 | 21.4k | fn create_pi(&self, c: char) { | 389 | 21.4k | *self.current_pi_target.borrow_mut() = StrTendril::new(); | 390 | 21.4k | *self.current_pi_data.borrow_mut() = StrTendril::new(); | 391 | 21.4k | self.current_pi_target.borrow_mut().push_char(c); | 392 | 21.4k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::create_pi |
393 | | |
394 | 2.51M | fn emit_char(&self, c: char) { |
395 | 2.51M | self.process_token(Token::Characters(StrTendril::from_char(match c { |
396 | 0 | '\0' => '\u{FFFD}', |
397 | 2.51M | c => c, |
398 | | }))); |
399 | 2.51M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::emit_char Line | Count | Source | 394 | 2.51M | fn emit_char(&self, c: char) { | 395 | 2.51M | self.process_token(Token::Characters(StrTendril::from_char(match c { | 396 | 0 | '\0' => '\u{FFFD}', | 397 | 2.51M | c => c, | 398 | | }))); | 399 | 2.51M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::emit_char |
400 | | |
401 | 1.00k | fn emit_short_tag(&self) -> ProcessResult<Sink::Handle> { |
402 | 1.00k | self.current_tag_kind.set(ShortTag); |
403 | 1.00k | *self.current_tag_name.borrow_mut() = StrTendril::new(); |
404 | 1.00k | self.emit_current_tag() |
405 | 1.00k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::emit_short_tag Line | Count | Source | 401 | 1.00k | fn emit_short_tag(&self) -> ProcessResult<Sink::Handle> { | 402 | 1.00k | self.current_tag_kind.set(ShortTag); | 403 | 1.00k | *self.current_tag_name.borrow_mut() = StrTendril::new(); | 404 | 1.00k | self.emit_current_tag() | 405 | 1.00k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::emit_short_tag |
406 | | |
407 | 51.8k | fn emit_empty_tag(&self) -> ProcessResult<Sink::Handle> { |
408 | 51.8k | self.current_tag_kind.set(EmptyTag); |
409 | 51.8k | self.emit_current_tag() |
410 | 51.8k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::emit_empty_tag Line | Count | Source | 407 | 51.8k | fn emit_empty_tag(&self) -> ProcessResult<Sink::Handle> { | 408 | 51.8k | self.current_tag_kind.set(EmptyTag); | 409 | 51.8k | self.emit_current_tag() | 410 | 51.8k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::emit_empty_tag |
411 | | |
412 | 185k | fn set_empty_tag(&self) { |
413 | 185k | self.current_tag_kind.set(EmptyTag); |
414 | 185k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::set_empty_tag Line | Count | Source | 412 | 185k | fn set_empty_tag(&self) { | 413 | 185k | self.current_tag_kind.set(EmptyTag); | 414 | 185k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::set_empty_tag |
415 | | |
416 | 2.15k | fn emit_start_tag(&self) -> ProcessResult<Sink::Handle> { |
417 | 2.15k | self.current_tag_kind.set(StartTag); |
418 | 2.15k | self.emit_current_tag() |
419 | 2.15k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::emit_start_tag Line | Count | Source | 416 | 2.15k | fn emit_start_tag(&self) -> ProcessResult<Sink::Handle> { | 417 | 2.15k | self.current_tag_kind.set(StartTag); | 418 | 2.15k | self.emit_current_tag() | 419 | 2.15k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::emit_start_tag |
420 | | |
421 | 1.39M | fn emit_current_tag(&self) -> ProcessResult<Sink::Handle> { |
422 | 1.39M | self.finish_attribute(); |
423 | | |
424 | 1.39M | let qname = process_qname(replace( |
425 | 1.39M | &mut *self.current_tag_name.borrow_mut(), |
426 | 1.39M | StrTendril::new(), |
427 | | )); |
428 | | |
429 | 1.39M | match self.current_tag_kind.get() { |
430 | 1.32M | StartTag | EmptyTag => {}, |
431 | | EndTag => { |
432 | 77.7k | if !self.current_tag_attrs.borrow().is_empty() { |
433 | 0 | self.emit_error(Borrowed("Attributes on an end tag")); |
434 | 77.7k | } |
435 | | }, |
436 | | ShortTag => { |
437 | 1.00k | if !self.current_tag_attrs.borrow().is_empty() { |
438 | 0 | self.emit_error(Borrowed("Attributes on a short tag")); |
439 | 1.00k | } |
440 | | }, |
441 | | } |
442 | | |
443 | 1.39M | let token = Token::Tag(Tag { |
444 | 1.39M | kind: self.current_tag_kind.get(), |
445 | 1.39M | name: qname, |
446 | 1.39M | attrs: self.current_tag_attrs.take(), |
447 | 1.39M | }); |
448 | | |
449 | 1.39M | self.process_token(token) |
450 | 1.39M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::emit_current_tag Line | Count | Source | 421 | 1.39M | fn emit_current_tag(&self) -> ProcessResult<Sink::Handle> { | 422 | 1.39M | self.finish_attribute(); | 423 | | | 424 | 1.39M | let qname = process_qname(replace( | 425 | 1.39M | &mut *self.current_tag_name.borrow_mut(), | 426 | 1.39M | StrTendril::new(), | 427 | | )); | 428 | | | 429 | 1.39M | match self.current_tag_kind.get() { | 430 | 1.32M | StartTag | EmptyTag => {}, | 431 | | EndTag => { | 432 | 77.7k | if !self.current_tag_attrs.borrow().is_empty() { | 433 | 0 | self.emit_error(Borrowed("Attributes on an end tag")); | 434 | 77.7k | } | 435 | | }, | 436 | | ShortTag => { | 437 | 1.00k | if !self.current_tag_attrs.borrow().is_empty() { | 438 | 0 | self.emit_error(Borrowed("Attributes on a short tag")); | 439 | 1.00k | } | 440 | | }, | 441 | | } | 442 | | | 443 | 1.39M | let token = Token::Tag(Tag { | 444 | 1.39M | kind: self.current_tag_kind.get(), | 445 | 1.39M | name: qname, | 446 | 1.39M | attrs: self.current_tag_attrs.take(), | 447 | 1.39M | }); | 448 | | | 449 | 1.39M | self.process_token(token) | 450 | 1.39M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::emit_current_tag |
451 | | |
452 | | // The string must not contain '\0'! |
453 | 8.31M | fn emit_chars(&self, b: StrTendril) { |
454 | 8.31M | self.process_token(Token::Characters(b)); |
455 | 8.31M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::emit_chars Line | Count | Source | 453 | 8.31M | fn emit_chars(&self, b: StrTendril) { | 454 | 8.31M | self.process_token(Token::Characters(b)); | 455 | 8.31M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::emit_chars |
456 | | |
457 | | // Emits the current Processing Instruction |
458 | 21.4k | fn emit_pi(&self) -> ProcessResult<<Sink as TokenSink>::Handle> { |
459 | 21.4k | let token = Token::ProcessingInstruction(Pi { |
460 | 21.4k | target: replace(&mut *self.current_pi_target.borrow_mut(), StrTendril::new()), |
461 | 21.4k | data: replace(&mut *self.current_pi_data.borrow_mut(), StrTendril::new()), |
462 | 21.4k | }); |
463 | 21.4k | self.process_token(token) |
464 | 21.4k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::emit_pi Line | Count | Source | 458 | 21.4k | fn emit_pi(&self) -> ProcessResult<<Sink as TokenSink>::Handle> { | 459 | 21.4k | let token = Token::ProcessingInstruction(Pi { | 460 | 21.4k | target: replace(&mut *self.current_pi_target.borrow_mut(), StrTendril::new()), | 461 | 21.4k | data: replace(&mut *self.current_pi_data.borrow_mut(), StrTendril::new()), | 462 | 21.4k | }); | 463 | 21.4k | self.process_token(token) | 464 | 21.4k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::emit_pi |
465 | | |
466 | 891k | fn consume_char_ref(&self, addnl_allowed: Option<char>) { |
467 | | // NB: The char ref tokenizer assumes we have an additional allowed |
468 | | // character iff we're tokenizing in an attribute value. |
469 | 891k | *self.char_ref_tokenizer.borrow_mut() = |
470 | 891k | Some(Box::new(CharRefTokenizer::new(addnl_allowed))); |
471 | 891k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::consume_char_ref Line | Count | Source | 466 | 891k | fn consume_char_ref(&self, addnl_allowed: Option<char>) { | 467 | | // NB: The char ref tokenizer assumes we have an additional allowed | 468 | | // character iff we're tokenizing in an attribute value. | 469 | 891k | *self.char_ref_tokenizer.borrow_mut() = | 470 | 891k | Some(Box::new(CharRefTokenizer::new(addnl_allowed))); | 471 | 891k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::consume_char_ref |
472 | | |
473 | 12.8k | fn emit_eof(&self) { |
474 | 12.8k | self.process_token(Token::EndOfFile); |
475 | 12.8k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::emit_eof Line | Count | Source | 473 | 12.8k | fn emit_eof(&self) { | 474 | 12.8k | self.process_token(Token::EndOfFile); | 475 | 12.8k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::emit_eof |
476 | | |
477 | 6.44M | fn emit_error(&self, error: Cow<'static, str>) { |
478 | 6.44M | self.process_token(Token::ParseError(error)); |
479 | 6.44M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::emit_error Line | Count | Source | 477 | 6.44M | fn emit_error(&self, error: Cow<'static, str>) { | 478 | 6.44M | self.process_token(Token::ParseError(error)); | 479 | 6.44M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::emit_error |
480 | | |
481 | 4.73M | fn emit_current_comment(&self) { |
482 | 4.73M | let comment = self.current_comment.take(); |
483 | 4.73M | self.process_token(Token::Comment(comment)); |
484 | 4.73M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::emit_current_comment Line | Count | Source | 481 | 4.73M | fn emit_current_comment(&self) { | 482 | 4.73M | let comment = self.current_comment.take(); | 483 | 4.73M | self.process_token(Token::Comment(comment)); | 484 | 4.73M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::emit_current_comment |
485 | | |
486 | 334k | fn emit_current_doctype(&self) { |
487 | 334k | let doctype = self.current_doctype.take(); |
488 | 334k | self.process_token(Token::Doctype(doctype)); |
489 | 334k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::emit_current_doctype Line | Count | Source | 486 | 334k | fn emit_current_doctype(&self) { | 487 | 334k | let doctype = self.current_doctype.take(); | 488 | 334k | self.process_token(Token::Doctype(doctype)); | 489 | 334k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::emit_current_doctype |
490 | | |
491 | 1.57M | fn doctype_id(&self, kind: DoctypeKind) -> RefMut<'_, Option<StrTendril>> { |
492 | 1.57M | let current_doctype = self.current_doctype.borrow_mut(); |
493 | 1.57M | match kind { |
494 | 1.15M | DoctypeKind::Public => RefMut::map(current_doctype, |d| &mut d.public_id), |
495 | 426k | DoctypeKind::System => RefMut::map(current_doctype, |d| &mut d.system_id), |
496 | | } |
497 | 1.57M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::doctype_id Line | Count | Source | 491 | 1.57M | fn doctype_id(&self, kind: DoctypeKind) -> RefMut<'_, Option<StrTendril>> { | 492 | 1.57M | let current_doctype = self.current_doctype.borrow_mut(); | 493 | 1.57M | match kind { | 494 | 1.15M | DoctypeKind::Public => RefMut::map(current_doctype, |d| &mut d.public_id), | 495 | 426k | DoctypeKind::System => RefMut::map(current_doctype, |d| &mut d.system_id), | 496 | | } | 497 | 1.57M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::doctype_id |
498 | | |
499 | 27.8k | fn clear_doctype_id(&self, kind: DoctypeKind) { |
500 | 27.8k | let mut id = self.doctype_id(kind); |
501 | 27.8k | match *id { |
502 | 0 | Some(ref mut s) => s.clear(), |
503 | 27.8k | None => *id = Some(StrTendril::new()), |
504 | | } |
505 | 27.8k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::clear_doctype_id Line | Count | Source | 499 | 27.8k | fn clear_doctype_id(&self, kind: DoctypeKind) { | 500 | 27.8k | let mut id = self.doctype_id(kind); | 501 | 27.8k | match *id { | 502 | 0 | Some(ref mut s) => s.clear(), | 503 | 27.8k | None => *id = Some(StrTendril::new()), | 504 | | } | 505 | 27.8k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::clear_doctype_id |
506 | | |
507 | 1.47M | fn peek(&self, input: &BufferQueue) -> Option<char> { |
508 | 1.47M | if self.reconsume.get() { |
509 | 0 | Some(self.current_char.get()) |
510 | | } else { |
511 | 1.47M | input.peek() |
512 | | } |
513 | 1.47M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::peek Line | Count | Source | 507 | 1.47M | fn peek(&self, input: &BufferQueue) -> Option<char> { | 508 | 1.47M | if self.reconsume.get() { | 509 | 0 | Some(self.current_char.get()) | 510 | | } else { | 511 | 1.47M | input.peek() | 512 | | } | 513 | 1.47M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::peek |
514 | | |
515 | 377k | fn discard_char(&self, input: &BufferQueue) { |
516 | 377k | let c = self.get_char(input); |
517 | 377k | assert!(c.is_some()); |
518 | 377k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::discard_char Line | Count | Source | 515 | 377k | fn discard_char(&self, input: &BufferQueue) { | 516 | 377k | let c = self.get_char(input); | 517 | 377k | assert!(c.is_some()); | 518 | 377k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::discard_char |
519 | | |
520 | 755k | fn unconsume(&self, input: &BufferQueue, buf: StrTendril) { |
521 | 755k | input.push_front(buf); |
522 | 755k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::unconsume Line | Count | Source | 520 | 755k | fn unconsume(&self, input: &BufferQueue, buf: StrTendril) { | 521 | 755k | input.push_front(buf); | 522 | 755k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::unconsume |
523 | | } |
524 | | |
525 | | // Shorthand for common state machine behaviors. |
526 | | macro_rules! shorthand ( |
527 | | ( $me:ident : emit $c:expr ) => ( $me.emit_char($c) ); |
528 | | ( $me:ident : create_tag $kind:ident $c:expr ) => ( $me.create_tag($kind, $c) ); |
529 | | ( $me:ident : push_tag $c:expr ) => ( $me.current_tag_name.borrow_mut().push_char($c) ); |
530 | | ( $me:ident : discard_tag $input:expr ) => ( $me.discard_tag($input) ); |
531 | | ( $me:ident : discard_char ) => ( $me.discard_char() ); |
532 | | ( $me:ident : push_temp $c:expr ) => ( $me.temp_buf.borrow_mut().push_char($c) ); |
533 | | ( $me:ident : emit_temp ) => ( $me.emit_temp_buf() ); |
534 | | ( $me:ident : clear_temp ) => ( $me.clear_temp_buf() ); |
535 | | ( $me:ident : create_attr $c:expr ) => ( $me.create_attribute($c) ); |
536 | | ( $me:ident : push_name $c:expr ) => ( $me.current_attr_name.borrow_mut().push_char($c) ); |
537 | | ( $me:ident : push_value $c:expr ) => ( $me.current_attr_value.borrow_mut().push_char($c) ); |
538 | | ( $me:ident : append_value $c:expr ) => ( $me.current_attr_value.borrow_mut().push_tendril($c)); |
539 | | ( $me:ident : push_comment $c:expr ) => ( $me.current_comment.borrow_mut().push_char($c) ); |
540 | | ( $me:ident : append_comment $c:expr ) => ( $me.current_comment.borrow_mut().push_slice($c) ); |
541 | | ( $me:ident : emit_comment ) => ( $me.emit_current_comment() ); |
542 | | ( $me:ident : clear_comment ) => ( $me.current_comment.borrow_mut().clear() ); |
543 | | ( $me:ident : create_doctype ) => ( *$me.current_doctype.borrow_mut() = Doctype::default() ); |
544 | | ( $me:ident : push_doctype_name $c:expr ) => ( option_push(&mut $me.current_doctype.borrow_mut().name, $c) ); |
545 | | ( $me:ident : push_doctype_id $k:ident $c:expr ) => ( option_push(&mut $me.doctype_id($k), $c) ); |
546 | | ( $me:ident : clear_doctype_id $k:ident ) => ( $me.clear_doctype_id($k) ); |
547 | | ( $me:ident : emit_doctype ) => ( $me.emit_current_doctype() ); |
548 | | ( $me:ident : error ) => ( $me.bad_char_error() ); |
549 | | ( $me:ident : error_eof ) => ( $me.bad_eof_error() ); |
550 | | ( $me:ident : create_pi $c:expr ) => ( $me.create_pi($c) ); |
551 | | ( $me:ident : push_pi_target $c:expr ) => ( $me.current_pi_target.borrow_mut().push_char($c) ); |
552 | | ( $me:ident : push_pi_data $c:expr ) => ( $me.current_pi_data.borrow_mut().push_char($c) ); |
553 | | ( $me:ident : set_empty_tag ) => ( $me.set_empty_tag() ); |
554 | | ); |
555 | | |
556 | | // Tracing of tokenizer actions. This adds significant bloat and compile time, |
557 | | // so it's behind a cfg flag. |
558 | | #[cfg(feature = "trace_tokenizer")] |
559 | | macro_rules! sh_trace ( ( $me:ident : $($cmds:tt)* ) => ({ |
560 | | debug!(" {:?}", stringify!($($cmds)*)); |
561 | | shorthand!($me : $($cmds)*); |
562 | | })); |
563 | | |
564 | | #[cfg(not(feature = "trace_tokenizer"))] |
565 | | macro_rules! sh_trace ( ( $me:ident : $($cmds:tt)* ) => ( shorthand!($me: $($cmds)*) ) ); |
566 | | |
567 | | // A little DSL for sequencing shorthand actions. |
568 | | macro_rules! go ( |
569 | | // A pattern like $($cmd:tt)* ; $($rest:tt)* causes parse ambiguity. |
570 | | // We have to tell the parser how much lookahead we need. |
571 | | |
572 | | ( $me:ident : $a:tt ; $($rest:tt)* ) => ({ sh_trace!($me: $a); go!($me: $($rest)*); }); |
573 | | ( $me:ident : $a:tt $b:tt ; $($rest:tt)* ) => ({ sh_trace!($me: $a $b); go!($me: $($rest)*); }); |
574 | | ( $me:ident : $a:tt $b:tt $c:tt ; $($rest:tt)* ) => ({ sh_trace!($me: $a $b $c); go!($me: $($rest)*); }); |
575 | | ( $me:ident : $a:tt $b:tt $c:tt $d:tt ; $($rest:tt)* ) => ({ sh_trace!($me: $a $b $c $d); go!($me: $($rest)*); }); |
576 | | |
577 | | // These can only come at the end. |
578 | | |
579 | | ( $me:ident : to $s:ident ) => ({ $me.state.set(XmlState::$s); return ProcessResult::Continue; }); |
580 | | ( $me:ident : to $s:ident $k1:expr ) => ({ $me.state.set(XmlState::$s($k1)); return ProcessResult::Continue; }); |
581 | | ( $me:ident : to $s:ident $k1:ident $k2:expr ) => ({ $me.state.set(XmlState::$s($k1($k2))); return ProcessResult::Continue; }); |
582 | | |
583 | | ( $me:ident : reconsume $s:ident ) => ({ $me.reconsume.set(true); go!($me: to $s); }); |
584 | | ( $me:ident : reconsume $s:ident $k1:expr ) => ({ $me.reconsume.set(true); go!($me: to $s $k1); }); |
585 | | ( $me:ident : reconsume $s:ident $k1:ident $k2:expr ) => ({ $me.reconsume.set(true); go!($me: to $s $k1 $k2); }); |
586 | | |
587 | | ( $me:ident : consume_char_ref ) => ({ $me.consume_char_ref(None); return ProcessResult::Continue; }); |
588 | | ( $me:ident : consume_char_ref $addnl:expr ) => ({ $me.consume_char_ref(Some($addnl)); return ProcessResult::Continue; }); |
589 | | |
590 | | // We have a default next state after emitting a tag, but the sink can override. |
591 | | ( $me:ident : emit_tag $s:ident ) => ({ |
592 | | $me.state.set(XmlState::$s); |
593 | | return $me.emit_current_tag(); |
594 | | }); |
595 | | |
596 | | // We have a special when dealing with empty and short tags in Xml |
597 | | ( $me:ident : emit_short_tag $s:ident ) => ({ |
598 | | $me.state.set(XmlState::$s); |
599 | | return $me.emit_short_tag(); |
600 | | }); |
601 | | |
602 | | ( $me:ident : emit_empty_tag $s:ident ) => ({ |
603 | | $me.state.set(XmlState::$s); |
604 | | return $me.emit_empty_tag(); |
605 | | }); |
606 | | |
607 | | ( $me:ident : emit_start_tag $s:ident ) => ({ |
608 | | $me.state.set(XmlState::$s); |
609 | | return $me.emit_start_tag(); |
610 | | }); |
611 | | |
612 | | ( $me:ident : emit_pi $s:ident ) => ({ |
613 | | $me.state.set(XmlState::$s); |
614 | | return $me.emit_pi(); |
615 | | }); |
616 | | |
617 | | ( $me:ident : eof ) => ({ $me.emit_eof(); return ProcessResult::Done; }); |
618 | | |
619 | | // If nothing else matched, it's a single command |
620 | | ( $me:ident : $($cmd:tt)+ ) => ( sh_trace!($me: $($cmd)+) ); |
621 | | |
622 | | // or nothing. |
623 | | ( $me:ident : ) => (()); |
624 | | ); |
625 | | |
626 | | // This is a macro because it can cause early return |
627 | | // from the function where it is used. |
628 | | macro_rules! get_char ( ($me:expr, $input:expr) => {{ |
629 | | let Some(character) = $me.get_char($input) else { |
630 | | return ProcessResult::Done; |
631 | | }; |
632 | | character |
633 | | }}); |
634 | | |
635 | | macro_rules! pop_except_from ( ($me:expr, $input:expr, $set:expr) => {{ |
636 | | let Some(popped_element) = $me.pop_except_from($input, $set) else { |
637 | | return ProcessResult::Done; |
638 | | }; |
639 | | popped_element |
640 | | }}); |
641 | | |
642 | | macro_rules! eat ( ($me:expr, $input:expr, $pat:expr) => {{ |
643 | | let Some(value) = $me.eat($input, $pat) else { |
644 | | return ProcessResult::Done; |
645 | | }; |
646 | | value |
647 | | }}); |
648 | | |
649 | | /// The result of a single tokenization operation |
650 | | pub enum ProcessResult<Handle> { |
651 | | /// The tokenizer needs more input before it can continue |
652 | | Done, |
653 | | /// The tokenizer can be invoked again immediately |
654 | | Continue, |
655 | | /// The tokenizer encountered a script element that must be executed |
656 | | /// before tokenization can continue |
657 | | Script(Handle), |
658 | | } |
659 | | |
660 | | impl<Sink: TokenSink> XmlTokenizer<Sink> { |
661 | | // Run the state machine for a while. |
662 | | #[allow(clippy::never_loop)] |
663 | 80.9M | fn step(&self, input: &BufferQueue) -> ProcessResult<Sink::Handle> { |
664 | 80.9M | if self.char_ref_tokenizer.borrow().is_some() { |
665 | 9.70M | return self.step_char_ref_tokenizer(input); |
666 | 71.2M | } |
667 | | |
668 | 71.2M | debug!("processing in state {:?}", self.state); |
669 | 71.2M | match self.state.get() { |
670 | | //§ data-state |
671 | | XmlState::Data => loop { |
672 | 18.8M | match pop_except_from!(self, input, small_char_set!('\r' '&' '<')) { |
673 | 382k | FromSet('&') => go!(self: consume_char_ref), |
674 | 6.58M | FromSet('<') => go!(self: to TagState), |
675 | 84.3k | FromSet(c) => go!(self: emit c), |
676 | 8.31M | NotFromSet(b) => self.emit_chars(b), |
677 | | } |
678 | | }, |
679 | | //§ tag-state |
680 | | XmlState::TagState => loop { |
681 | 6.88M | match get_char!(self, input) { |
682 | 5.01M | '!' => go!(self: to MarkupDecl), |
683 | 80.5k | '/' => go!(self: to EndTagState), |
684 | 75.8k | '?' => go!(self: to Pi), |
685 | | '\t' | '\n' | ' ' | ':' | '<' | '>' => { |
686 | 86.7k | go!(self: error; emit '<'; reconsume Data) |
687 | | }, |
688 | 1.32M | cl => go!(self: create_tag StartTag cl; to TagName), |
689 | | } |
690 | | }, |
691 | | //§ end-tag-state |
692 | | XmlState::EndTagState => loop { |
693 | 83.0k | match get_char!(self, input) { |
694 | 1.00k | '>' => go!(self: emit_short_tag Data), |
695 | | '\t' | '\n' | ' ' | '<' | ':' => { |
696 | 1.75k | go!(self: error; emit '<'; emit '/'; reconsume Data) |
697 | | }, |
698 | 77.7k | cl => go!(self: create_tag EndTag cl; to EndTagName), |
699 | | } |
700 | | }, |
701 | | //§ end-tag-name-state |
702 | | XmlState::EndTagName => loop { |
703 | 2.30M | match get_char!(self, input) { |
704 | 2.74k | '\t' | '\n' | ' ' => go!(self: to EndTagNameAfter), |
705 | 1.80k | '/' => go!(self: error; to EndTagNameAfter), |
706 | 72.6k | '>' => go!(self: emit_tag Data), |
707 | 1.25M | cl => go!(self: push_tag cl), |
708 | | } |
709 | | }, |
710 | | //§ end-tag-name-after-state |
711 | | XmlState::EndTagNameAfter => loop { |
712 | 733k | match get_char!(self, input) { |
713 | 4.43k | '>' => go!(self: emit_tag Data), |
714 | 8.32k | '\t' | '\n' | ' ' => (), |
715 | 457k | _ => self.emit_error(Borrowed("Unexpected element in tag name")), |
716 | | } |
717 | | }, |
718 | | //§ pi-state |
719 | | XmlState::Pi => loop { |
720 | 83.5k | match get_char!(self, input) { |
721 | 54.3k | '\t' | '\n' | ' ' => go!(self: error; reconsume BogusComment), |
722 | 21.4k | cl => go!(self: create_pi cl; to PiTarget), |
723 | | } |
724 | | }, |
725 | | //§ pi-target-state |
726 | | XmlState::PiTarget => loop { |
727 | 341k | match get_char!(self, input) { |
728 | 12.9k | '\t' | '\n' | ' ' => go!(self: to PiTargetAfter), |
729 | 8.32k | '?' => go!(self: to PiAfter), |
730 | 205k | cl => go!(self: push_pi_target cl), |
731 | | } |
732 | | }, |
733 | | //§ pi-target-after-state |
734 | | XmlState::PiTargetAfter => loop { |
735 | 16.7k | match get_char!(self, input) { |
736 | 2.27k | '\t' | '\n' | ' ' => (), |
737 | 12.8k | _ => go!(self: reconsume PiData), |
738 | | } |
739 | | }, |
740 | | //§ pi-data-state |
741 | | XmlState::PiData => loop { |
742 | 1.10M | match get_char!(self, input) { |
743 | 12.6k | '?' => go!(self: to PiAfter), |
744 | 837k | cl => go!(self: push_pi_data cl), |
745 | | } |
746 | | }, |
747 | | //§ pi-after-state |
748 | | XmlState::PiAfter => loop { |
749 | 672k | match get_char!(self, input) { |
750 | 20.8k | '>' => go!(self: emit_pi Data), |
751 | 5.60k | '?' => go!(self: to PiAfter), |
752 | 506k | cl => go!(self: push_pi_data cl), |
753 | | } |
754 | | }, |
755 | | //§ markup-declaration-state |
756 | | XmlState::MarkupDecl => loop { |
757 | 5.02M | if eat!(self, input, "--") { |
758 | 47.7k | go!(self: clear_comment; to CommentStart); |
759 | 4.97M | } else if eat!(self, input, "[CDATA[") { |
760 | 387 | go!(self: to Cdata); |
761 | 4.97M | } else if eat!(self, input, "DOCTYPE") { |
762 | 334k | go!(self: to Doctype); |
763 | | } else { |
764 | | // FIXME: 'error' gives wrong message |
765 | 4.63M | go!(self: error; to BogusComment); |
766 | | } |
767 | | }, |
768 | | //§ comment-start-state |
769 | | XmlState::CommentStart => loop { |
770 | 48.1k | match get_char!(self, input) { |
771 | 35.7k | '-' => go!(self: to CommentStartDash), |
772 | 8.29k | '>' => go!(self: error; emit_comment; to Data), |
773 | 3.70k | _ => go!(self: reconsume Comment), |
774 | | } |
775 | | }, |
776 | | //§ comment-start-dash-state |
777 | | XmlState::CommentStartDash => loop { |
778 | 35.9k | match get_char!(self, input) { |
779 | 34.7k | '-' => go!(self: to CommentEnd), |
780 | 229 | '>' => go!(self: error; emit_comment; to Data), |
781 | 695 | _ => go!(self: push_comment '-'; reconsume Comment), |
782 | | } |
783 | | }, |
784 | | //§ comment-state |
785 | | XmlState::Comment => loop { |
786 | 1.28M | match get_char!(self, input) { |
787 | 12.3k | '<' => go!(self: push_comment '<'; to CommentLessThan), |
788 | 7.08k | '-' => go!(self: to CommentEndDash), |
789 | 883k | c => go!(self: push_comment c), |
790 | | } |
791 | | }, |
792 | | //§ comment-less-than-sign-state |
793 | | XmlState::CommentLessThan => loop { |
794 | 16.6k | match get_char!(self, input) { |
795 | 5.57k | '!' => go!(self: push_comment '!';to CommentLessThanBang), |
796 | 1.45k | '<' => go!(self: push_comment '<'), |
797 | 6.71k | _ => go!(self: reconsume Comment), |
798 | | } |
799 | | }, |
800 | | //§ comment-less-than-sign-bang-state |
801 | | XmlState::CommentLessThanBang => loop { |
802 | 5.79k | match get_char!(self, input) { |
803 | 1.62k | '-' => go!(self: to CommentLessThanBangDash), |
804 | 3.93k | _ => go!(self: reconsume Comment), |
805 | | } |
806 | | }, |
807 | | //§ comment-less-than-sign-bang-dash-state |
808 | | XmlState::CommentLessThanBangDash => loop { |
809 | 1.84k | match get_char!(self, input) { |
810 | 1.14k | '-' => go!(self: to CommentLessThanBangDashDash), |
811 | 471 | _ => go!(self: reconsume CommentEndDash), |
812 | | } |
813 | | }, |
814 | | //§ comment-less-than-sign-bang-dash-dash-state |
815 | | XmlState::CommentLessThanBangDashDash => loop { |
816 | 1.34k | match get_char!(self, input) { |
817 | 369 | '>' => go!(self: reconsume CommentEnd), |
818 | 762 | _ => go!(self: error; reconsume CommentEnd), |
819 | | } |
820 | | }, |
821 | | //§ comment-end-dash-state |
822 | | XmlState::CommentEndDash => loop { |
823 | 8.43k | match get_char!(self, input) { |
824 | 4.93k | '-' => go!(self: to CommentEnd), |
825 | 2.95k | _ => go!(self: push_comment '-'; reconsume Comment), |
826 | | } |
827 | | }, |
828 | | //§ comment-end-state |
829 | | XmlState::CommentEnd => loop { |
830 | 42.8k | match get_char!(self, input) { |
831 | 4.57k | '>' => go!(self: emit_comment; to Data), |
832 | 34.8k | '!' => go!(self: to CommentEndBang), |
833 | 1.62k | '-' => go!(self: push_comment '-'), |
834 | 1.43k | _ => go!(self: append_comment "--"; reconsume Comment), |
835 | | } |
836 | | }, |
837 | | //§ comment-end-bang-state |
838 | | XmlState::CommentEndBang => loop { |
839 | 35.1k | match get_char!(self, input) { |
840 | 355 | '-' => go!(self: append_comment "--!"; to CommentEndDash), |
841 | 34.0k | '>' => go!(self: error; emit_comment; to Data), |
842 | 415 | _ => go!(self: append_comment "--!"; reconsume Comment), |
843 | | } |
844 | | }, |
845 | | //§ bogus-comment-state |
846 | | XmlState::BogusComment => loop { |
847 | 6.62M | match get_char!(self, input) { |
848 | 4.68M | '>' => go!(self: emit_comment; to Data), |
849 | 1.56M | c => go!(self: push_comment c), |
850 | | } |
851 | | }, |
852 | | //§ cdata-state |
853 | | XmlState::Cdata => loop { |
854 | 2.36M | match get_char!(self, input) { |
855 | 1.39k | ']' => go!(self: to CdataBracket), |
856 | 1.94M | cl => go!(self: emit cl), |
857 | | } |
858 | | }, |
859 | | //§ cdata-bracket-state |
860 | | XmlState::CdataBracket => loop { |
861 | 1.78k | match get_char!(self, input) { |
862 | 778 | ']' => go!(self: to CdataEnd), |
863 | 600 | cl => go!(self: emit ']'; emit cl; to Cdata), |
864 | | } |
865 | | }, |
866 | | //§ cdata-end-state |
867 | | XmlState::CdataEnd => loop { |
868 | 1.66k | match get_char!(self, input) { |
869 | 208 | '>' => go!(self: to Data), |
870 | 378 | ']' => go!(self: emit ']'), |
871 | 547 | cl => go!(self: emit ']'; emit ']'; emit cl; to Cdata), |
872 | | } |
873 | | }, |
874 | | //§ tag-name-state |
875 | | XmlState::TagName => loop { |
876 | 12.2M | match get_char!(self, input) { |
877 | 258k | '\t' | '\n' | ' ' => go!(self: to TagAttrNameBefore), |
878 | 1.04M | '>' => go!(self: emit_tag Data), |
879 | 12.3k | '/' => go!(self: set_empty_tag; to TagEmpty), |
880 | 6.87M | cl => go!(self: push_tag cl), |
881 | | } |
882 | | }, |
883 | | //§ empty-tag-state |
884 | | XmlState::TagEmpty => loop { |
885 | 236k | match get_char!(self, input) { |
886 | 51.8k | '>' => go!(self: emit_empty_tag Data), |
887 | 133k | _ => go!(self: reconsume TagAttrValueBefore), |
888 | | } |
889 | | }, |
890 | | //§ tag-attribute-name-before-state |
891 | | XmlState::TagAttrNameBefore => loop { |
892 | 768k | match get_char!(self, input) { |
893 | 18.4k | '\t' | '\n' | ' ' => (), |
894 | 11.6k | '>' => go!(self: emit_tag Data), |
895 | 7.43k | '/' => go!(self: set_empty_tag; to TagEmpty), |
896 | 525 | ':' => go!(self: error), |
897 | 622k | cl => go!(self: create_attr cl; to TagAttrName), |
898 | | } |
899 | | }, |
900 | | //§ tag-attribute-name-state |
901 | | XmlState::TagAttrName => loop { |
902 | 40.2M | match get_char!(self, input) { |
903 | 313k | '=' => go!(self: to TagAttrValueBefore), |
904 | 124k | '>' => go!(self: emit_tag Data), |
905 | 2.53M | '\t' | '\n' | ' ' => go!(self: to TagAttrNameAfter), |
906 | 120k | '/' => go!(self: set_empty_tag; to TagEmpty), |
907 | 25.8M | cl => go!(self: push_name cl), |
908 | | } |
909 | | }, |
910 | | //§ tag-attribute-name-after-state |
911 | | XmlState::TagAttrNameAfter => loop { |
912 | 3.60M | match get_char!(self, input) { |
913 | 91.0k | '\t' | '\n' | ' ' => (), |
914 | 2.14k | '=' => go!(self: to TagAttrValueBefore), |
915 | 14.3k | '>' => go!(self: emit_tag Data), |
916 | 44.9k | '/' => go!(self: set_empty_tag; to TagEmpty), |
917 | 2.47M | cl => go!(self: create_attr cl; to TagAttrName), |
918 | | } |
919 | | }, |
920 | | //§ tag-attribute-value-before-state |
921 | | XmlState::TagAttrValueBefore => loop { |
922 | 467k | match get_char!(self, input) { |
923 | 8.24k | '\t' | '\n' | ' ' => (), |
924 | 248k | '"' => go!(self: to TagAttrValue DoubleQuoted), |
925 | 5.34k | '\'' => go!(self: to TagAttrValue SingleQuoted), |
926 | 4.27k | '&' => go!(self: reconsume TagAttrValue(Unquoted)), |
927 | 1.99k | '>' => go!(self: emit_tag Data), |
928 | 188k | cl => go!(self: push_value cl; to TagAttrValue(Unquoted)), |
929 | | } |
930 | | }, |
931 | | //§ tag-attribute-value-double-quoted-state |
932 | | XmlState::TagAttrValue(DoubleQuoted) => loop { |
933 | 6.17M | match pop_except_from!(self, input, small_char_set!('\n' '"' '&')) { |
934 | 248k | FromSet('"') => go!(self: to TagAttrNameBefore), |
935 | 175k | FromSet('&') => go!(self: consume_char_ref '"' ), |
936 | 111k | FromSet(c) => go!(self: push_value c), |
937 | 2.90M | NotFromSet(ref b) => go!(self: append_value b), |
938 | | } |
939 | | }, |
940 | | //§ tag-attribute-value-single-quoted-state |
941 | | XmlState::TagAttrValue(SingleQuoted) => loop { |
942 | 1.19M | match pop_except_from!(self, input, small_char_set!('\n' '\'' '&')) { |
943 | 5.07k | FromSet('\'') => go!(self: to TagAttrNameBefore), |
944 | 110k | FromSet('&') => go!(self: consume_char_ref '\''), |
945 | 185k | FromSet(c) => go!(self: push_value c), |
946 | 513k | NotFromSet(ref b) => go!(self: append_value b), |
947 | | } |
948 | | }, |
949 | | //§ tag-attribute-value-double-quoted-state |
950 | | XmlState::TagAttrValue(Unquoted) => loop { |
951 | 13.9M | match pop_except_from!(self, input, small_char_set!('\n' '\t' ' ' '&' '>')) { |
952 | 129k | FromSet('\t') | FromSet('\n') | FromSet(' ') => go!(self: to TagAttrNameBefore), |
953 | 222k | FromSet('&') => go!(self: consume_char_ref), |
954 | 61.5k | FromSet('>') => go!(self: emit_tag Data), |
955 | 861 | FromSet(c) => go!(self: push_value c), |
956 | 6.80M | NotFromSet(ref b) => go!(self: append_value b), |
957 | | } |
958 | | }, |
959 | | |
960 | | //§ doctype-state |
961 | | XmlState::Doctype => loop { |
962 | 461k | match get_char!(self, input) { |
963 | 139k | '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeName), |
964 | 195k | _ => go!(self: error; reconsume BeforeDoctypeName), |
965 | | } |
966 | | }, |
967 | | //§ before-doctype-name-state |
968 | | XmlState::BeforeDoctypeName => loop { |
969 | 339k | match get_char!(self, input) { |
970 | 4.31k | '\t' | '\n' | '\x0C' | ' ' => (), |
971 | 129k | '>' => go!(self: error; emit_doctype; to Data), |
972 | 204k | c => go!(self: create_doctype; push_doctype_name (c.to_ascii_lowercase()); |
973 | | to DoctypeName), |
974 | | } |
975 | | }, |
976 | | //§ doctype-name-state |
977 | | XmlState::DoctypeName => loop { |
978 | 3.43M | match get_char!(self, input) { |
979 | 83.1k | '\t' | '\n' | '\x0C' | ' ' => go!(self: to AfterDoctypeName), |
980 | 121k | '>' => go!(self: emit_doctype; to Data), |
981 | 1.79M | c => go!(self: push_doctype_name (c.to_ascii_lowercase()); |
982 | | to DoctypeName), |
983 | | } |
984 | | }, |
985 | | //§ after-doctype-name-state |
986 | | XmlState::AfterDoctypeName => loop { |
987 | 91.7k | if eat!(self, input, "public") { |
988 | 15.1k | go!(self: to AfterDoctypeKeyword Public); |
989 | 72.2k | } else if eat!(self, input, "system") { |
990 | 10.4k | go!(self: to AfterDoctypeKeyword System); |
991 | | } else { |
992 | 61.0k | match get_char!(self, input) { |
993 | 3.32k | '\t' | '\n' | '\x0C' | ' ' => (), |
994 | 48.2k | '>' => go!(self: emit_doctype; to Data), |
995 | 9.13k | _ => go!(self: error; to BogusDoctype), |
996 | | } |
997 | | } |
998 | | }, |
999 | | //§ after-doctype-public-keyword-state |
1000 | | XmlState::AfterDoctypeKeyword(Public) => loop { |
1001 | 15.4k | match get_char!(self, input) { |
1002 | 1.65k | '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeIdentifier Public), |
1003 | | '"' => { |
1004 | 7.43k | go!(self: error; clear_doctype_id Public; to DoctypeIdentifierDoubleQuoted Public) |
1005 | | }, |
1006 | | '\'' => { |
1007 | 5.57k | go!(self: error; clear_doctype_id Public; to DoctypeIdentifierSingleQuoted Public) |
1008 | | }, |
1009 | 229 | '>' => go!(self: error; emit_doctype; to Data), |
1010 | 299 | _ => go!(self: error; to BogusDoctype), |
1011 | | } |
1012 | | }, |
1013 | | //§ after-doctype-system-keyword-state |
1014 | | XmlState::AfterDoctypeKeyword(System) => loop { |
1015 | 10.6k | match get_char!(self, input) { |
1016 | 1.33k | '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeIdentifier System), |
1017 | | '"' => { |
1018 | 907 | go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System) |
1019 | | }, |
1020 | | '\'' => { |
1021 | 7.22k | go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted System) |
1022 | | }, |
1023 | 459 | '>' => go!(self: error; emit_doctype; to Data), |
1024 | 494 | _ => go!(self: error; to BogusDoctype), |
1025 | | } |
1026 | | }, |
1027 | | //§ before_doctype_public_identifier_state before_doctype_system_identifier_state |
1028 | 3.91k | XmlState::BeforeDoctypeIdentifier(kind) => loop { |
1029 | 4.87k | match get_char!(self, input) { |
1030 | 958 | '\t' | '\n' | '\x0C' | ' ' => (), |
1031 | 326 | '"' => go!(self: clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind), |
1032 | 194 | '\'' => go!(self: clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind), |
1033 | 1.39k | '>' => go!(self: error; emit_doctype; to Data), |
1034 | 971 | _ => go!(self: error; to BogusDoctype), |
1035 | | } |
1036 | | }, |
1037 | | //§ doctype_public_identifier_double_quoted_state doctype_system_identifier_double_quoted_state |
1038 | 56.2k | XmlState::DoctypeIdentifierDoubleQuoted(kind) => loop { |
1039 | 1.18M | match get_char!(self, input) { |
1040 | 7.60k | '"' => go!(self: to AfterDoctypeIdentifier kind), |
1041 | 1.84k | '>' => go!(self: error; emit_doctype; to Data), |
1042 | 1.12M | c => go!(self: push_doctype_id kind c), |
1043 | | } |
1044 | | }, |
1045 | | //§ doctype_public_identifier_single_quoted_state doctype_system_identifier_single_quoted_state |
1046 | 387k | XmlState::DoctypeIdentifierSingleQuoted(kind) => loop { |
1047 | 814k | match get_char!(self, input) { |
1048 | 8.74k | '\'' => go!(self: to AfterDoctypeIdentifier kind), |
1049 | 9.92k | '>' => go!(self: error; emit_doctype; to Data), |
1050 | 427k | c => go!(self: push_doctype_id kind c), |
1051 | | } |
1052 | | }, |
1053 | | //§ doctype_public_identifier_single_quoted_state |
1054 | | XmlState::AfterDoctypeIdentifier(Public) => loop { |
1055 | 11.1k | match get_char!(self, input) { |
1056 | | '\t' | '\n' | '\x0C' | ' ' => { |
1057 | 3.59k | go!(self: to BetweenDoctypePublicAndSystemIdentifiers) |
1058 | | }, |
1059 | | '\'' => { |
1060 | 5.46k | go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted(System)) |
1061 | | }, |
1062 | | '"' => { |
1063 | 708 | go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted(System)) |
1064 | | }, |
1065 | 273 | '>' => go!(self: emit_doctype; to Data), |
1066 | 875 | _ => go!(self: error; to BogusDoctype), |
1067 | | } |
1068 | | }, |
1069 | | //§ doctype_system_identifier_single_quoted_state |
1070 | | XmlState::AfterDoctypeIdentifier(System) => loop { |
1071 | 15.2k | match get_char!(self, input) { |
1072 | 8.84k | '\t' | '\n' | '\x0C' | ' ' => (), |
1073 | 4.34k | '>' => go!(self: emit_doctype; to Data), |
1074 | 1.02k | _ => go!(self: error; to BogusDoctype), |
1075 | | } |
1076 | | }, |
1077 | | //§ between_doctype_public_and_system_identifier_state |
1078 | | XmlState::BetweenDoctypePublicAndSystemIdentifiers => loop { |
1079 | 6.74k | match get_char!(self, input) { |
1080 | 963 | '\t' | '\n' | '\x0C' | ' ' => (), |
1081 | 775 | '>' => go!(self: emit_doctype; to Data), |
1082 | 338 | '\'' => go!(self: to DoctypeIdentifierSingleQuoted System), |
1083 | 204 | '"' => go!(self: to DoctypeIdentifierDoubleQuoted System), |
1084 | 2.20k | _ => go!(self: error; to BogusDoctype), |
1085 | | } |
1086 | | }, |
1087 | | //§ bogus_doctype_state |
1088 | | XmlState::BogusDoctype => loop { |
1089 | 1.35M | if get_char!(self, input) == '>' { |
1090 | 14.6k | go!(self: emit_doctype; to Data); |
1091 | 991k | } |
1092 | | }, |
1093 | | } |
1094 | 80.9M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::step Line | Count | Source | 663 | 80.9M | fn step(&self, input: &BufferQueue) -> ProcessResult<Sink::Handle> { | 664 | 80.9M | if self.char_ref_tokenizer.borrow().is_some() { | 665 | 9.70M | return self.step_char_ref_tokenizer(input); | 666 | 71.2M | } | 667 | | | 668 | 71.2M | debug!("processing in state {:?}", self.state); | 669 | 71.2M | match self.state.get() { | 670 | | //§ data-state | 671 | | XmlState::Data => loop { | 672 | 18.8M | match pop_except_from!(self, input, small_char_set!('\r' '&' '<')) { | 673 | 382k | FromSet('&') => go!(self: consume_char_ref), | 674 | 6.58M | FromSet('<') => go!(self: to TagState), | 675 | 84.3k | FromSet(c) => go!(self: emit c), | 676 | 8.31M | NotFromSet(b) => self.emit_chars(b), | 677 | | } | 678 | | }, | 679 | | //§ tag-state | 680 | | XmlState::TagState => loop { | 681 | 6.88M | match get_char!(self, input) { | 682 | 5.01M | '!' => go!(self: to MarkupDecl), | 683 | 80.5k | '/' => go!(self: to EndTagState), | 684 | 75.8k | '?' => go!(self: to Pi), | 685 | | '\t' | '\n' | ' ' | ':' | '<' | '>' => { | 686 | 86.7k | go!(self: error; emit '<'; reconsume Data) | 687 | | }, | 688 | 1.32M | cl => go!(self: create_tag StartTag cl; to TagName), | 689 | | } | 690 | | }, | 691 | | //§ end-tag-state | 692 | | XmlState::EndTagState => loop { | 693 | 83.0k | match get_char!(self, input) { | 694 | 1.00k | '>' => go!(self: emit_short_tag Data), | 695 | | '\t' | '\n' | ' ' | '<' | ':' => { | 696 | 1.75k | go!(self: error; emit '<'; emit '/'; reconsume Data) | 697 | | }, | 698 | 77.7k | cl => go!(self: create_tag EndTag cl; to EndTagName), | 699 | | } | 700 | | }, | 701 | | //§ end-tag-name-state | 702 | | XmlState::EndTagName => loop { | 703 | 2.30M | match get_char!(self, input) { | 704 | 2.74k | '\t' | '\n' | ' ' => go!(self: to EndTagNameAfter), | 705 | 1.80k | '/' => go!(self: error; to EndTagNameAfter), | 706 | 72.6k | '>' => go!(self: emit_tag Data), | 707 | 1.25M | cl => go!(self: push_tag cl), | 708 | | } | 709 | | }, | 710 | | //§ end-tag-name-after-state | 711 | | XmlState::EndTagNameAfter => loop { | 712 | 733k | match get_char!(self, input) { | 713 | 4.43k | '>' => go!(self: emit_tag Data), | 714 | 8.32k | '\t' | '\n' | ' ' => (), | 715 | 457k | _ => self.emit_error(Borrowed("Unexpected element in tag name")), | 716 | | } | 717 | | }, | 718 | | //§ pi-state | 719 | | XmlState::Pi => loop { | 720 | 83.5k | match get_char!(self, input) { | 721 | 54.3k | '\t' | '\n' | ' ' => go!(self: error; reconsume BogusComment), | 722 | 21.4k | cl => go!(self: create_pi cl; to PiTarget), | 723 | | } | 724 | | }, | 725 | | //§ pi-target-state | 726 | | XmlState::PiTarget => loop { | 727 | 341k | match get_char!(self, input) { | 728 | 12.9k | '\t' | '\n' | ' ' => go!(self: to PiTargetAfter), | 729 | 8.32k | '?' => go!(self: to PiAfter), | 730 | 205k | cl => go!(self: push_pi_target cl), | 731 | | } | 732 | | }, | 733 | | //§ pi-target-after-state | 734 | | XmlState::PiTargetAfter => loop { | 735 | 16.7k | match get_char!(self, input) { | 736 | 2.27k | '\t' | '\n' | ' ' => (), | 737 | 12.8k | _ => go!(self: reconsume PiData), | 738 | | } | 739 | | }, | 740 | | //§ pi-data-state | 741 | | XmlState::PiData => loop { | 742 | 1.10M | match get_char!(self, input) { | 743 | 12.6k | '?' => go!(self: to PiAfter), | 744 | 837k | cl => go!(self: push_pi_data cl), | 745 | | } | 746 | | }, | 747 | | //§ pi-after-state | 748 | | XmlState::PiAfter => loop { | 749 | 672k | match get_char!(self, input) { | 750 | 20.8k | '>' => go!(self: emit_pi Data), | 751 | 5.60k | '?' => go!(self: to PiAfter), | 752 | 506k | cl => go!(self: push_pi_data cl), | 753 | | } | 754 | | }, | 755 | | //§ markup-declaration-state | 756 | | XmlState::MarkupDecl => loop { | 757 | 5.02M | if eat!(self, input, "--") { | 758 | 47.7k | go!(self: clear_comment; to CommentStart); | 759 | 4.97M | } else if eat!(self, input, "[CDATA[") { | 760 | 387 | go!(self: to Cdata); | 761 | 4.97M | } else if eat!(self, input, "DOCTYPE") { | 762 | 334k | go!(self: to Doctype); | 763 | | } else { | 764 | | // FIXME: 'error' gives wrong message | 765 | 4.63M | go!(self: error; to BogusComment); | 766 | | } | 767 | | }, | 768 | | //§ comment-start-state | 769 | | XmlState::CommentStart => loop { | 770 | 48.1k | match get_char!(self, input) { | 771 | 35.7k | '-' => go!(self: to CommentStartDash), | 772 | 8.29k | '>' => go!(self: error; emit_comment; to Data), | 773 | 3.70k | _ => go!(self: reconsume Comment), | 774 | | } | 775 | | }, | 776 | | //§ comment-start-dash-state | 777 | | XmlState::CommentStartDash => loop { | 778 | 35.9k | match get_char!(self, input) { | 779 | 34.7k | '-' => go!(self: to CommentEnd), | 780 | 229 | '>' => go!(self: error; emit_comment; to Data), | 781 | 695 | _ => go!(self: push_comment '-'; reconsume Comment), | 782 | | } | 783 | | }, | 784 | | //§ comment-state | 785 | | XmlState::Comment => loop { | 786 | 1.28M | match get_char!(self, input) { | 787 | 12.3k | '<' => go!(self: push_comment '<'; to CommentLessThan), | 788 | 7.08k | '-' => go!(self: to CommentEndDash), | 789 | 883k | c => go!(self: push_comment c), | 790 | | } | 791 | | }, | 792 | | //§ comment-less-than-sign-state | 793 | | XmlState::CommentLessThan => loop { | 794 | 16.6k | match get_char!(self, input) { | 795 | 5.57k | '!' => go!(self: push_comment '!';to CommentLessThanBang), | 796 | 1.45k | '<' => go!(self: push_comment '<'), | 797 | 6.71k | _ => go!(self: reconsume Comment), | 798 | | } | 799 | | }, | 800 | | //§ comment-less-than-sign-bang-state | 801 | | XmlState::CommentLessThanBang => loop { | 802 | 5.79k | match get_char!(self, input) { | 803 | 1.62k | '-' => go!(self: to CommentLessThanBangDash), | 804 | 3.93k | _ => go!(self: reconsume Comment), | 805 | | } | 806 | | }, | 807 | | //§ comment-less-than-sign-bang-dash-state | 808 | | XmlState::CommentLessThanBangDash => loop { | 809 | 1.84k | match get_char!(self, input) { | 810 | 1.14k | '-' => go!(self: to CommentLessThanBangDashDash), | 811 | 471 | _ => go!(self: reconsume CommentEndDash), | 812 | | } | 813 | | }, | 814 | | //§ comment-less-than-sign-bang-dash-dash-state | 815 | | XmlState::CommentLessThanBangDashDash => loop { | 816 | 1.34k | match get_char!(self, input) { | 817 | 369 | '>' => go!(self: reconsume CommentEnd), | 818 | 762 | _ => go!(self: error; reconsume CommentEnd), | 819 | | } | 820 | | }, | 821 | | //§ comment-end-dash-state | 822 | | XmlState::CommentEndDash => loop { | 823 | 8.43k | match get_char!(self, input) { | 824 | 4.93k | '-' => go!(self: to CommentEnd), | 825 | 2.95k | _ => go!(self: push_comment '-'; reconsume Comment), | 826 | | } | 827 | | }, | 828 | | //§ comment-end-state | 829 | | XmlState::CommentEnd => loop { | 830 | 42.8k | match get_char!(self, input) { | 831 | 4.57k | '>' => go!(self: emit_comment; to Data), | 832 | 34.8k | '!' => go!(self: to CommentEndBang), | 833 | 1.62k | '-' => go!(self: push_comment '-'), | 834 | 1.43k | _ => go!(self: append_comment "--"; reconsume Comment), | 835 | | } | 836 | | }, | 837 | | //§ comment-end-bang-state | 838 | | XmlState::CommentEndBang => loop { | 839 | 35.1k | match get_char!(self, input) { | 840 | 355 | '-' => go!(self: append_comment "--!"; to CommentEndDash), | 841 | 34.0k | '>' => go!(self: error; emit_comment; to Data), | 842 | 415 | _ => go!(self: append_comment "--!"; reconsume Comment), | 843 | | } | 844 | | }, | 845 | | //§ bogus-comment-state | 846 | | XmlState::BogusComment => loop { | 847 | 6.62M | match get_char!(self, input) { | 848 | 4.68M | '>' => go!(self: emit_comment; to Data), | 849 | 1.56M | c => go!(self: push_comment c), | 850 | | } | 851 | | }, | 852 | | //§ cdata-state | 853 | | XmlState::Cdata => loop { | 854 | 2.36M | match get_char!(self, input) { | 855 | 1.39k | ']' => go!(self: to CdataBracket), | 856 | 1.94M | cl => go!(self: emit cl), | 857 | | } | 858 | | }, | 859 | | //§ cdata-bracket-state | 860 | | XmlState::CdataBracket => loop { | 861 | 1.78k | match get_char!(self, input) { | 862 | 778 | ']' => go!(self: to CdataEnd), | 863 | 600 | cl => go!(self: emit ']'; emit cl; to Cdata), | 864 | | } | 865 | | }, | 866 | | //§ cdata-end-state | 867 | | XmlState::CdataEnd => loop { | 868 | 1.66k | match get_char!(self, input) { | 869 | 208 | '>' => go!(self: to Data), | 870 | 378 | ']' => go!(self: emit ']'), | 871 | 547 | cl => go!(self: emit ']'; emit ']'; emit cl; to Cdata), | 872 | | } | 873 | | }, | 874 | | //§ tag-name-state | 875 | | XmlState::TagName => loop { | 876 | 12.2M | match get_char!(self, input) { | 877 | 258k | '\t' | '\n' | ' ' => go!(self: to TagAttrNameBefore), | 878 | 1.04M | '>' => go!(self: emit_tag Data), | 879 | 12.3k | '/' => go!(self: set_empty_tag; to TagEmpty), | 880 | 6.87M | cl => go!(self: push_tag cl), | 881 | | } | 882 | | }, | 883 | | //§ empty-tag-state | 884 | | XmlState::TagEmpty => loop { | 885 | 236k | match get_char!(self, input) { | 886 | 51.8k | '>' => go!(self: emit_empty_tag Data), | 887 | 133k | _ => go!(self: reconsume TagAttrValueBefore), | 888 | | } | 889 | | }, | 890 | | //§ tag-attribute-name-before-state | 891 | | XmlState::TagAttrNameBefore => loop { | 892 | 768k | match get_char!(self, input) { | 893 | 18.4k | '\t' | '\n' | ' ' => (), | 894 | 11.6k | '>' => go!(self: emit_tag Data), | 895 | 7.43k | '/' => go!(self: set_empty_tag; to TagEmpty), | 896 | 525 | ':' => go!(self: error), | 897 | 622k | cl => go!(self: create_attr cl; to TagAttrName), | 898 | | } | 899 | | }, | 900 | | //§ tag-attribute-name-state | 901 | | XmlState::TagAttrName => loop { | 902 | 40.2M | match get_char!(self, input) { | 903 | 313k | '=' => go!(self: to TagAttrValueBefore), | 904 | 124k | '>' => go!(self: emit_tag Data), | 905 | 2.53M | '\t' | '\n' | ' ' => go!(self: to TagAttrNameAfter), | 906 | 120k | '/' => go!(self: set_empty_tag; to TagEmpty), | 907 | 25.8M | cl => go!(self: push_name cl), | 908 | | } | 909 | | }, | 910 | | //§ tag-attribute-name-after-state | 911 | | XmlState::TagAttrNameAfter => loop { | 912 | 3.60M | match get_char!(self, input) { | 913 | 91.0k | '\t' | '\n' | ' ' => (), | 914 | 2.14k | '=' => go!(self: to TagAttrValueBefore), | 915 | 14.3k | '>' => go!(self: emit_tag Data), | 916 | 44.9k | '/' => go!(self: set_empty_tag; to TagEmpty), | 917 | 2.47M | cl => go!(self: create_attr cl; to TagAttrName), | 918 | | } | 919 | | }, | 920 | | //§ tag-attribute-value-before-state | 921 | | XmlState::TagAttrValueBefore => loop { | 922 | 467k | match get_char!(self, input) { | 923 | 8.24k | '\t' | '\n' | ' ' => (), | 924 | 248k | '"' => go!(self: to TagAttrValue DoubleQuoted), | 925 | 5.34k | '\'' => go!(self: to TagAttrValue SingleQuoted), | 926 | 4.27k | '&' => go!(self: reconsume TagAttrValue(Unquoted)), | 927 | 1.99k | '>' => go!(self: emit_tag Data), | 928 | 188k | cl => go!(self: push_value cl; to TagAttrValue(Unquoted)), | 929 | | } | 930 | | }, | 931 | | //§ tag-attribute-value-double-quoted-state | 932 | | XmlState::TagAttrValue(DoubleQuoted) => loop { | 933 | 6.17M | match pop_except_from!(self, input, small_char_set!('\n' '"' '&')) { | 934 | 248k | FromSet('"') => go!(self: to TagAttrNameBefore), | 935 | 175k | FromSet('&') => go!(self: consume_char_ref '"' ), | 936 | 111k | FromSet(c) => go!(self: push_value c), | 937 | 2.90M | NotFromSet(ref b) => go!(self: append_value b), | 938 | | } | 939 | | }, | 940 | | //§ tag-attribute-value-single-quoted-state | 941 | | XmlState::TagAttrValue(SingleQuoted) => loop { | 942 | 1.19M | match pop_except_from!(self, input, small_char_set!('\n' '\'' '&')) { | 943 | 5.07k | FromSet('\'') => go!(self: to TagAttrNameBefore), | 944 | 110k | FromSet('&') => go!(self: consume_char_ref '\''), | 945 | 185k | FromSet(c) => go!(self: push_value c), | 946 | 513k | NotFromSet(ref b) => go!(self: append_value b), | 947 | | } | 948 | | }, | 949 | | //§ tag-attribute-value-double-quoted-state | 950 | | XmlState::TagAttrValue(Unquoted) => loop { | 951 | 13.9M | match pop_except_from!(self, input, small_char_set!('\n' '\t' ' ' '&' '>')) { | 952 | 129k | FromSet('\t') | FromSet('\n') | FromSet(' ') => go!(self: to TagAttrNameBefore), | 953 | 222k | FromSet('&') => go!(self: consume_char_ref), | 954 | 61.5k | FromSet('>') => go!(self: emit_tag Data), | 955 | 861 | FromSet(c) => go!(self: push_value c), | 956 | 6.80M | NotFromSet(ref b) => go!(self: append_value b), | 957 | | } | 958 | | }, | 959 | | | 960 | | //§ doctype-state | 961 | | XmlState::Doctype => loop { | 962 | 461k | match get_char!(self, input) { | 963 | 139k | '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeName), | 964 | 195k | _ => go!(self: error; reconsume BeforeDoctypeName), | 965 | | } | 966 | | }, | 967 | | //§ before-doctype-name-state | 968 | | XmlState::BeforeDoctypeName => loop { | 969 | 339k | match get_char!(self, input) { | 970 | 4.31k | '\t' | '\n' | '\x0C' | ' ' => (), | 971 | 129k | '>' => go!(self: error; emit_doctype; to Data), | 972 | 204k | c => go!(self: create_doctype; push_doctype_name (c.to_ascii_lowercase()); | 973 | | to DoctypeName), | 974 | | } | 975 | | }, | 976 | | //§ doctype-name-state | 977 | | XmlState::DoctypeName => loop { | 978 | 3.43M | match get_char!(self, input) { | 979 | 83.1k | '\t' | '\n' | '\x0C' | ' ' => go!(self: to AfterDoctypeName), | 980 | 121k | '>' => go!(self: emit_doctype; to Data), | 981 | 1.79M | c => go!(self: push_doctype_name (c.to_ascii_lowercase()); | 982 | | to DoctypeName), | 983 | | } | 984 | | }, | 985 | | //§ after-doctype-name-state | 986 | | XmlState::AfterDoctypeName => loop { | 987 | 91.7k | if eat!(self, input, "public") { | 988 | 15.1k | go!(self: to AfterDoctypeKeyword Public); | 989 | 72.2k | } else if eat!(self, input, "system") { | 990 | 10.4k | go!(self: to AfterDoctypeKeyword System); | 991 | | } else { | 992 | 61.0k | match get_char!(self, input) { | 993 | 3.32k | '\t' | '\n' | '\x0C' | ' ' => (), | 994 | 48.2k | '>' => go!(self: emit_doctype; to Data), | 995 | 9.13k | _ => go!(self: error; to BogusDoctype), | 996 | | } | 997 | | } | 998 | | }, | 999 | | //§ after-doctype-public-keyword-state | 1000 | | XmlState::AfterDoctypeKeyword(Public) => loop { | 1001 | 15.4k | match get_char!(self, input) { | 1002 | 1.65k | '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeIdentifier Public), | 1003 | | '"' => { | 1004 | 7.43k | go!(self: error; clear_doctype_id Public; to DoctypeIdentifierDoubleQuoted Public) | 1005 | | }, | 1006 | | '\'' => { | 1007 | 5.57k | go!(self: error; clear_doctype_id Public; to DoctypeIdentifierSingleQuoted Public) | 1008 | | }, | 1009 | 229 | '>' => go!(self: error; emit_doctype; to Data), | 1010 | 299 | _ => go!(self: error; to BogusDoctype), | 1011 | | } | 1012 | | }, | 1013 | | //§ after-doctype-system-keyword-state | 1014 | | XmlState::AfterDoctypeKeyword(System) => loop { | 1015 | 10.6k | match get_char!(self, input) { | 1016 | 1.33k | '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeIdentifier System), | 1017 | | '"' => { | 1018 | 907 | go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System) | 1019 | | }, | 1020 | | '\'' => { | 1021 | 7.22k | go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted System) | 1022 | | }, | 1023 | 459 | '>' => go!(self: error; emit_doctype; to Data), | 1024 | 494 | _ => go!(self: error; to BogusDoctype), | 1025 | | } | 1026 | | }, | 1027 | | //§ before_doctype_public_identifier_state before_doctype_system_identifier_state | 1028 | 3.91k | XmlState::BeforeDoctypeIdentifier(kind) => loop { | 1029 | 4.87k | match get_char!(self, input) { | 1030 | 958 | '\t' | '\n' | '\x0C' | ' ' => (), | 1031 | 326 | '"' => go!(self: clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind), | 1032 | 194 | '\'' => go!(self: clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind), | 1033 | 1.39k | '>' => go!(self: error; emit_doctype; to Data), | 1034 | 971 | _ => go!(self: error; to BogusDoctype), | 1035 | | } | 1036 | | }, | 1037 | | //§ doctype_public_identifier_double_quoted_state doctype_system_identifier_double_quoted_state | 1038 | 56.2k | XmlState::DoctypeIdentifierDoubleQuoted(kind) => loop { | 1039 | 1.18M | match get_char!(self, input) { | 1040 | 7.60k | '"' => go!(self: to AfterDoctypeIdentifier kind), | 1041 | 1.84k | '>' => go!(self: error; emit_doctype; to Data), | 1042 | 1.12M | c => go!(self: push_doctype_id kind c), | 1043 | | } | 1044 | | }, | 1045 | | //§ doctype_public_identifier_single_quoted_state doctype_system_identifier_single_quoted_state | 1046 | 387k | XmlState::DoctypeIdentifierSingleQuoted(kind) => loop { | 1047 | 814k | match get_char!(self, input) { | 1048 | 8.74k | '\'' => go!(self: to AfterDoctypeIdentifier kind), | 1049 | 9.92k | '>' => go!(self: error; emit_doctype; to Data), | 1050 | 427k | c => go!(self: push_doctype_id kind c), | 1051 | | } | 1052 | | }, | 1053 | | //§ doctype_public_identifier_single_quoted_state | 1054 | | XmlState::AfterDoctypeIdentifier(Public) => loop { | 1055 | 11.1k | match get_char!(self, input) { | 1056 | | '\t' | '\n' | '\x0C' | ' ' => { | 1057 | 3.59k | go!(self: to BetweenDoctypePublicAndSystemIdentifiers) | 1058 | | }, | 1059 | | '\'' => { | 1060 | 5.46k | go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted(System)) | 1061 | | }, | 1062 | | '"' => { | 1063 | 708 | go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted(System)) | 1064 | | }, | 1065 | 273 | '>' => go!(self: emit_doctype; to Data), | 1066 | 875 | _ => go!(self: error; to BogusDoctype), | 1067 | | } | 1068 | | }, | 1069 | | //§ doctype_system_identifier_single_quoted_state | 1070 | | XmlState::AfterDoctypeIdentifier(System) => loop { | 1071 | 15.2k | match get_char!(self, input) { | 1072 | 8.84k | '\t' | '\n' | '\x0C' | ' ' => (), | 1073 | 4.34k | '>' => go!(self: emit_doctype; to Data), | 1074 | 1.02k | _ => go!(self: error; to BogusDoctype), | 1075 | | } | 1076 | | }, | 1077 | | //§ between_doctype_public_and_system_identifier_state | 1078 | | XmlState::BetweenDoctypePublicAndSystemIdentifiers => loop { | 1079 | 6.74k | match get_char!(self, input) { | 1080 | 963 | '\t' | '\n' | '\x0C' | ' ' => (), | 1081 | 775 | '>' => go!(self: emit_doctype; to Data), | 1082 | 338 | '\'' => go!(self: to DoctypeIdentifierSingleQuoted System), | 1083 | 204 | '"' => go!(self: to DoctypeIdentifierDoubleQuoted System), | 1084 | 2.20k | _ => go!(self: error; to BogusDoctype), | 1085 | | } | 1086 | | }, | 1087 | | //§ bogus_doctype_state | 1088 | | XmlState::BogusDoctype => loop { | 1089 | 1.35M | if get_char!(self, input) == '>' { | 1090 | 14.6k | go!(self: emit_doctype; to Data); | 1091 | 991k | } | 1092 | | }, | 1093 | | } | 1094 | 80.9M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::step |
1095 | | |
1096 | | /// Indicate that we have reached the end of the input. |
1097 | 12.9k | pub fn end(&self) { |
1098 | | // Handle EOF in the char ref sub-tokenizer, if there is one. |
1099 | | // Do this first because it might un-consume stuff. |
1100 | 12.9k | let input = BufferQueue::default(); |
1101 | 12.9k | match self.char_ref_tokenizer.take() { |
1102 | 12.2k | None => (), |
1103 | 722 | Some(mut tok) => { |
1104 | 722 | tok.end_of_file(self, &input); |
1105 | 722 | self.process_char_ref(tok.get_result()); |
1106 | 722 | }, |
1107 | | } |
1108 | | |
1109 | | // Process all remaining buffered input. |
1110 | | // If we're waiting for lookahead, we're not gonna get it. |
1111 | 12.9k | self.at_eof.set(true); |
1112 | 12.9k | let _ = self.run(&input); |
1113 | | |
1114 | | loop { |
1115 | 23.0k | if !matches!(self.eof_step(), ProcessResult::Continue) { |
1116 | 12.9k | break; |
1117 | 10.0k | } |
1118 | | } |
1119 | | |
1120 | 12.9k | self.sink.end(); |
1121 | | |
1122 | 12.9k | if self.opts.profile { |
1123 | 0 | self.dump_profile(); |
1124 | 12.9k | } |
1125 | 12.9k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::end Line | Count | Source | 1097 | 12.9k | pub fn end(&self) { | 1098 | | // Handle EOF in the char ref sub-tokenizer, if there is one. | 1099 | | // Do this first because it might un-consume stuff. | 1100 | 12.9k | let input = BufferQueue::default(); | 1101 | 12.9k | match self.char_ref_tokenizer.take() { | 1102 | 12.2k | None => (), | 1103 | 722 | Some(mut tok) => { | 1104 | 722 | tok.end_of_file(self, &input); | 1105 | 722 | self.process_char_ref(tok.get_result()); | 1106 | 722 | }, | 1107 | | } | 1108 | | | 1109 | | // Process all remaining buffered input. | 1110 | | // If we're waiting for lookahead, we're not gonna get it. | 1111 | 12.9k | self.at_eof.set(true); | 1112 | 12.9k | let _ = self.run(&input); | 1113 | | | 1114 | | loop { | 1115 | 23.0k | if !matches!(self.eof_step(), ProcessResult::Continue) { | 1116 | 12.9k | break; | 1117 | 10.0k | } | 1118 | | } | 1119 | | | 1120 | 12.9k | self.sink.end(); | 1121 | | | 1122 | 12.9k | if self.opts.profile { | 1123 | 0 | self.dump_profile(); | 1124 | 12.9k | } | 1125 | 12.9k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::end |
1126 | | |
1127 | | #[cfg(for_c)] |
1128 | | fn dump_profile(&self) { |
1129 | | unreachable!(); |
1130 | | } |
1131 | | |
1132 | | #[cfg(not(for_c))] |
1133 | 0 | fn dump_profile(&self) { |
1134 | 0 | let mut results: Vec<(XmlState, u64)> = self |
1135 | 0 | .state_profile |
1136 | 0 | .borrow() |
1137 | 0 | .iter() |
1138 | 0 | .map(|(s, t)| (*s, *t)) Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::dump_profile::{closure#0}Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::dump_profile::{closure#0} |
1139 | 0 | .collect(); |
1140 | 0 | results.sort_by(|&(_, x), &(_, y)| y.cmp(&x)); Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::dump_profile::{closure#1}Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::dump_profile::{closure#1} |
1141 | | |
1142 | 0 | let total: u64 = results |
1143 | 0 | .iter() |
1144 | 0 | .map(|&(_, t)| t) |
1145 | 0 | .fold(0, ::std::ops::Add::add); |
1146 | 0 | debug!("\nTokenizer profile, in nanoseconds"); |
1147 | 0 | debug!( |
1148 | 0 | "\n{:12} total in token sink", |
1149 | 0 | self.time_in_sink.get() |
1150 | | ); |
1151 | 0 | debug!("\n{total:12} total in tokenizer"); |
1152 | | |
1153 | 0 | for (k, v) in results.into_iter() { |
1154 | 0 | let pct = 100.0 * (v as f64) / (total as f64); |
1155 | 0 | debug!("{v:12} {pct:4.1}% {k:?}"); |
1156 | | } |
1157 | 0 | } Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::dump_profile Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::dump_profile |
1158 | | |
1159 | 23.0k | fn eof_step(&self) -> ProcessResult<Sink::Handle> { |
1160 | 23.0k | debug!("processing EOF in state {:?}", self.state.get()); |
1161 | 23.0k | match self.state.get() { |
1162 | 12.2k | XmlState::Data => go!(self: eof), |
1163 | | XmlState::CommentStart | XmlState::CommentLessThan | XmlState::CommentLessThanBang => { |
1164 | 60 | go!(self: reconsume Comment) |
1165 | | }, |
1166 | 11 | XmlState::CommentLessThanBangDash => go!(self: reconsume CommentEndDash), |
1167 | 12 | XmlState::CommentLessThanBangDashDash => go!(self: reconsume CommentEnd), |
1168 | | XmlState::CommentStartDash |
1169 | | | XmlState::Comment |
1170 | | | XmlState::CommentEndDash |
1171 | | | XmlState::CommentEnd |
1172 | 604 | | XmlState::CommentEndBang => go!(self: error_eof; emit_comment; eof), |
1173 | 153 | XmlState::TagState => go!(self: error_eof; emit '<'; to Data), |
1174 | 19 | XmlState::EndTagState => go!(self: error_eof; emit '<'; emit '/'; to Data), |
1175 | 333 | XmlState::TagEmpty => go!(self: error_eof; to TagAttrNameBefore), |
1176 | | XmlState::Cdata | XmlState::CdataBracket | XmlState::CdataEnd => { |
1177 | 179 | go!(self: error_eof; to Data) |
1178 | | }, |
1179 | 13 | XmlState::Pi => go!(self: error_eof; to BogusComment), |
1180 | 234 | XmlState::PiTargetAfter | XmlState::PiAfter => go!(self: reconsume PiData), |
1181 | 0 | XmlState::MarkupDecl => go!(self: error_eof; to BogusComment), |
1182 | | XmlState::TagName |
1183 | | | XmlState::TagAttrNameBefore |
1184 | | | XmlState::EndTagName |
1185 | | | XmlState::TagAttrNameAfter |
1186 | | | XmlState::EndTagNameAfter |
1187 | | | XmlState::TagAttrValueBefore |
1188 | 4.64k | | XmlState::TagAttrValue(_) => go!(self: error_eof; emit_tag Data), |
1189 | 625 | XmlState::PiData | XmlState::PiTarget => go!(self: error_eof; emit_pi Data), |
1190 | 2.15k | XmlState::TagAttrName => go!(self: error_eof; emit_start_tag Data), |
1191 | | XmlState::BeforeDoctypeName |
1192 | | | XmlState::Doctype |
1193 | | | XmlState::DoctypeName |
1194 | | | XmlState::AfterDoctypeName |
1195 | | | XmlState::AfterDoctypeKeyword(_) |
1196 | | | XmlState::BeforeDoctypeIdentifier(_) |
1197 | | | XmlState::AfterDoctypeIdentifier(_) |
1198 | | | XmlState::DoctypeIdentifierSingleQuoted(_) |
1199 | | | XmlState::DoctypeIdentifierDoubleQuoted(_) |
1200 | | | XmlState::BetweenDoctypePublicAndSystemIdentifiers => { |
1201 | 985 | go!(self: error_eof; emit_doctype; to Data) |
1202 | | }, |
1203 | 353 | XmlState::BogusDoctype => go!(self: emit_doctype; to Data), |
1204 | 454 | XmlState::BogusComment => go!(self: emit_comment; to Data), |
1205 | | } |
1206 | 23.0k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::eof_step Line | Count | Source | 1159 | 23.0k | fn eof_step(&self) -> ProcessResult<Sink::Handle> { | 1160 | 23.0k | debug!("processing EOF in state {:?}", self.state.get()); | 1161 | 23.0k | match self.state.get() { | 1162 | 12.2k | XmlState::Data => go!(self: eof), | 1163 | | XmlState::CommentStart | XmlState::CommentLessThan | XmlState::CommentLessThanBang => { | 1164 | 60 | go!(self: reconsume Comment) | 1165 | | }, | 1166 | 11 | XmlState::CommentLessThanBangDash => go!(self: reconsume CommentEndDash), | 1167 | 12 | XmlState::CommentLessThanBangDashDash => go!(self: reconsume CommentEnd), | 1168 | | XmlState::CommentStartDash | 1169 | | | XmlState::Comment | 1170 | | | XmlState::CommentEndDash | 1171 | | | XmlState::CommentEnd | 1172 | 604 | | XmlState::CommentEndBang => go!(self: error_eof; emit_comment; eof), | 1173 | 153 | XmlState::TagState => go!(self: error_eof; emit '<'; to Data), | 1174 | 19 | XmlState::EndTagState => go!(self: error_eof; emit '<'; emit '/'; to Data), | 1175 | 333 | XmlState::TagEmpty => go!(self: error_eof; to TagAttrNameBefore), | 1176 | | XmlState::Cdata | XmlState::CdataBracket | XmlState::CdataEnd => { | 1177 | 179 | go!(self: error_eof; to Data) | 1178 | | }, | 1179 | 13 | XmlState::Pi => go!(self: error_eof; to BogusComment), | 1180 | 234 | XmlState::PiTargetAfter | XmlState::PiAfter => go!(self: reconsume PiData), | 1181 | 0 | XmlState::MarkupDecl => go!(self: error_eof; to BogusComment), | 1182 | | XmlState::TagName | 1183 | | | XmlState::TagAttrNameBefore | 1184 | | | XmlState::EndTagName | 1185 | | | XmlState::TagAttrNameAfter | 1186 | | | XmlState::EndTagNameAfter | 1187 | | | XmlState::TagAttrValueBefore | 1188 | 4.64k | | XmlState::TagAttrValue(_) => go!(self: error_eof; emit_tag Data), | 1189 | 625 | XmlState::PiData | XmlState::PiTarget => go!(self: error_eof; emit_pi Data), | 1190 | 2.15k | XmlState::TagAttrName => go!(self: error_eof; emit_start_tag Data), | 1191 | | XmlState::BeforeDoctypeName | 1192 | | | XmlState::Doctype | 1193 | | | XmlState::DoctypeName | 1194 | | | XmlState::AfterDoctypeName | 1195 | | | XmlState::AfterDoctypeKeyword(_) | 1196 | | | XmlState::BeforeDoctypeIdentifier(_) | 1197 | | | XmlState::AfterDoctypeIdentifier(_) | 1198 | | | XmlState::DoctypeIdentifierSingleQuoted(_) | 1199 | | | XmlState::DoctypeIdentifierDoubleQuoted(_) | 1200 | | | XmlState::BetweenDoctypePublicAndSystemIdentifiers => { | 1201 | 985 | go!(self: error_eof; emit_doctype; to Data) | 1202 | | }, | 1203 | 353 | XmlState::BogusDoctype => go!(self: emit_doctype; to Data), | 1204 | 454 | XmlState::BogusComment => go!(self: emit_comment; to Data), | 1205 | | } | 1206 | 23.0k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::eof_step |
1207 | | |
1208 | 891k | fn process_char_ref(&self, char_ref: CharRef) { |
1209 | | let CharRef { |
1210 | 891k | mut chars, |
1211 | 891k | mut num_chars, |
1212 | 891k | } = char_ref; |
1213 | | |
1214 | 891k | if num_chars == 0 { |
1215 | 574k | chars[0] = '&'; |
1216 | 574k | num_chars = 1; |
1217 | 574k | } |
1218 | | |
1219 | 907k | for i in 0..num_chars { |
1220 | 907k | let c = chars[i as usize]; |
1221 | 907k | match self.state.get() { |
1222 | 384k | XmlState::Data | XmlState::Cdata => go!(self: emit c), |
1223 | | |
1224 | 523k | XmlState::TagAttrValue(_) => go!(self: push_value c), |
1225 | | |
1226 | 0 | _ => panic!( |
1227 | 0 | "state {:?} should not be reachable in process_char_ref", |
1228 | 0 | self.state.get() |
1229 | | ), |
1230 | | } |
1231 | | } |
1232 | 891k | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::process_char_ref Line | Count | Source | 1208 | 891k | fn process_char_ref(&self, char_ref: CharRef) { | 1209 | | let CharRef { | 1210 | 891k | mut chars, | 1211 | 891k | mut num_chars, | 1212 | 891k | } = char_ref; | 1213 | | | 1214 | 891k | if num_chars == 0 { | 1215 | 574k | chars[0] = '&'; | 1216 | 574k | num_chars = 1; | 1217 | 574k | } | 1218 | | | 1219 | 907k | for i in 0..num_chars { | 1220 | 907k | let c = chars[i as usize]; | 1221 | 907k | match self.state.get() { | 1222 | 384k | XmlState::Data | XmlState::Cdata => go!(self: emit c), | 1223 | | | 1224 | 523k | XmlState::TagAttrValue(_) => go!(self: push_value c), | 1225 | | | 1226 | 0 | _ => panic!( | 1227 | 0 | "state {:?} should not be reachable in process_char_ref", | 1228 | 0 | self.state.get() | 1229 | | ), | 1230 | | } | 1231 | | } | 1232 | 891k | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::process_char_ref |
1233 | | |
1234 | 9.70M | fn step_char_ref_tokenizer(&self, input: &BufferQueue) -> ProcessResult<Sink::Handle> { |
1235 | 9.70M | let mut tok = self.char_ref_tokenizer.take().unwrap(); |
1236 | 9.70M | let outcome = tok.step(self, input); |
1237 | | |
1238 | 9.70M | let progress = match outcome { |
1239 | | char_ref::Done => { |
1240 | 890k | self.process_char_ref(tok.get_result()); |
1241 | 890k | return ProcessResult::Continue; |
1242 | | }, |
1243 | | |
1244 | 303k | char_ref::Stuck => ProcessResult::Done, |
1245 | 8.50M | char_ref::Progress => ProcessResult::Continue, |
1246 | | }; |
1247 | | |
1248 | 8.80M | *self.char_ref_tokenizer.borrow_mut() = Some(tok); |
1249 | 8.80M | progress |
1250 | 9.70M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::step_char_ref_tokenizer Line | Count | Source | 1234 | 9.70M | fn step_char_ref_tokenizer(&self, input: &BufferQueue) -> ProcessResult<Sink::Handle> { | 1235 | 9.70M | let mut tok = self.char_ref_tokenizer.take().unwrap(); | 1236 | 9.70M | let outcome = tok.step(self, input); | 1237 | | | 1238 | 9.70M | let progress = match outcome { | 1239 | | char_ref::Done => { | 1240 | 890k | self.process_char_ref(tok.get_result()); | 1241 | 890k | return ProcessResult::Continue; | 1242 | | }, | 1243 | | | 1244 | 303k | char_ref::Stuck => ProcessResult::Done, | 1245 | 8.50M | char_ref::Progress => ProcessResult::Continue, | 1246 | | }; | 1247 | | | 1248 | 8.80M | *self.char_ref_tokenizer.borrow_mut() = Some(tok); | 1249 | 8.80M | progress | 1250 | 9.70M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::step_char_ref_tokenizer |
1251 | | |
1252 | 4.49M | fn finish_attribute(&self) { |
1253 | 4.49M | if self.current_attr_name.borrow().is_empty() { |
1254 | 1.39M | return; |
1255 | 3.09M | } |
1256 | | |
1257 | | // Check for a duplicate attribute. |
1258 | | // FIXME: the spec says we should error as soon as the name is finished. |
1259 | | // FIXME: linear time search, do we care? |
1260 | 3.09M | let dup = { |
1261 | 3.09M | let current_attr_name = self.current_attr_name.borrow(); |
1262 | 3.09M | let name = ¤t_attr_name[..]; |
1263 | 3.09M | self.current_tag_attrs |
1264 | 3.09M | .borrow() |
1265 | 3.09M | .iter() |
1266 | 333M | .any(|a| &*a.name.local == name) <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::finish_attribute::{closure#0}Line | Count | Source | 1266 | 333M | .any(|a| &*a.name.local == name) |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::finish_attribute::{closure#0} |
1267 | | }; |
1268 | | |
1269 | 3.09M | if dup { |
1270 | 318k | self.emit_error(Borrowed("Duplicate attribute")); |
1271 | 318k | self.current_attr_name.borrow_mut().clear(); |
1272 | 318k | self.current_attr_value.borrow_mut().clear(); |
1273 | 318k | } else { |
1274 | 2.77M | let qname = process_qname(replace( |
1275 | 2.77M | &mut self.current_attr_name.borrow_mut(), |
1276 | 2.77M | StrTendril::new(), |
1277 | | )); |
1278 | 2.77M | let attr = Attribute { |
1279 | 2.77M | name: qname.clone(), |
1280 | 2.77M | value: replace(&mut self.current_attr_value.borrow_mut(), StrTendril::new()), |
1281 | 2.77M | }; |
1282 | | |
1283 | 2.77M | if qname.local == local_name!("xmlns") |
1284 | 2.75M | || qname.prefix == Some(namespace_prefix!("xmlns")) |
1285 | 258k | { |
1286 | 258k | self.current_tag_attrs.borrow_mut().insert(0, attr); |
1287 | 2.51M | } else { |
1288 | 2.51M | self.current_tag_attrs.borrow_mut().push(attr); |
1289 | 2.51M | } |
1290 | | } |
1291 | 4.49M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::finish_attribute Line | Count | Source | 1252 | 4.49M | fn finish_attribute(&self) { | 1253 | 4.49M | if self.current_attr_name.borrow().is_empty() { | 1254 | 1.39M | return; | 1255 | 3.09M | } | 1256 | | | 1257 | | // Check for a duplicate attribute. | 1258 | | // FIXME: the spec says we should error as soon as the name is finished. | 1259 | | // FIXME: linear time search, do we care? | 1260 | 3.09M | let dup = { | 1261 | 3.09M | let current_attr_name = self.current_attr_name.borrow(); | 1262 | 3.09M | let name = ¤t_attr_name[..]; | 1263 | 3.09M | self.current_tag_attrs | 1264 | 3.09M | .borrow() | 1265 | 3.09M | .iter() | 1266 | 3.09M | .any(|a| &*a.name.local == name) | 1267 | | }; | 1268 | | | 1269 | 3.09M | if dup { | 1270 | 318k | self.emit_error(Borrowed("Duplicate attribute")); | 1271 | 318k | self.current_attr_name.borrow_mut().clear(); | 1272 | 318k | self.current_attr_value.borrow_mut().clear(); | 1273 | 318k | } else { | 1274 | 2.77M | let qname = process_qname(replace( | 1275 | 2.77M | &mut self.current_attr_name.borrow_mut(), | 1276 | 2.77M | StrTendril::new(), | 1277 | | )); | 1278 | 2.77M | let attr = Attribute { | 1279 | 2.77M | name: qname.clone(), | 1280 | 2.77M | value: replace(&mut self.current_attr_value.borrow_mut(), StrTendril::new()), | 1281 | 2.77M | }; | 1282 | | | 1283 | 2.77M | if qname.local == local_name!("xmlns") | 1284 | 2.75M | || qname.prefix == Some(namespace_prefix!("xmlns")) | 1285 | 258k | { | 1286 | 258k | self.current_tag_attrs.borrow_mut().insert(0, attr); | 1287 | 2.51M | } else { | 1288 | 2.51M | self.current_tag_attrs.borrow_mut().push(attr); | 1289 | 2.51M | } | 1290 | | } | 1291 | 4.49M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::finish_attribute |
1292 | | |
1293 | 3.09M | fn create_attribute(&self, c: char) { |
1294 | 3.09M | self.finish_attribute(); |
1295 | | |
1296 | 3.09M | self.current_attr_name.borrow_mut().push_char(c); |
1297 | 3.09M | } <xml5ever::tokenizer::XmlTokenizer<xml5ever::tree_builder::XmlTreeBuilder<alloc::rc::Rc<markup5ever_rcdom::Node>, markup5ever_rcdom::RcDom>>>::create_attribute Line | Count | Source | 1293 | 3.09M | fn create_attribute(&self, c: char) { | 1294 | 3.09M | self.finish_attribute(); | 1295 | | | 1296 | 3.09M | self.current_attr_name.borrow_mut().push_char(c); | 1297 | 3.09M | } |
Unexecuted instantiation: <xml5ever::tokenizer::XmlTokenizer<_>>::create_attribute |
1298 | | } |
1299 | | |
1300 | | #[cfg(test)] |
1301 | | mod test { |
1302 | | |
1303 | | use super::process_qname; |
1304 | | use crate::tendril::SliceExt; |
1305 | | use crate::{LocalName, Prefix}; |
1306 | | |
1307 | | #[test] |
1308 | | fn simple_namespace() { |
1309 | | let qname = process_qname("prefix:local".to_tendril()); |
1310 | | assert_eq!(qname.prefix, Some(Prefix::from("prefix"))); |
1311 | | assert_eq!(qname.local, LocalName::from("local")); |
1312 | | |
1313 | | let qname = process_qname("a:b".to_tendril()); |
1314 | | assert_eq!(qname.prefix, Some(Prefix::from("a"))); |
1315 | | assert_eq!(qname.local, LocalName::from("b")); |
1316 | | } |
1317 | | |
1318 | | #[test] |
1319 | | fn wrong_namespaces() { |
1320 | | let qname = process_qname(":local".to_tendril()); |
1321 | | assert_eq!(qname.prefix, None); |
1322 | | assert_eq!(qname.local, LocalName::from(":local")); |
1323 | | |
1324 | | let qname = process_qname("::local".to_tendril()); |
1325 | | assert_eq!(qname.prefix, None); |
1326 | | assert_eq!(qname.local, LocalName::from("::local")); |
1327 | | |
1328 | | let qname = process_qname("a::local".to_tendril()); |
1329 | | assert_eq!(qname.prefix, None); |
1330 | | assert_eq!(qname.local, LocalName::from("a::local")); |
1331 | | |
1332 | | let qname = process_qname("fake::".to_tendril()); |
1333 | | assert_eq!(qname.prefix, None); |
1334 | | assert_eq!(qname.local, LocalName::from("fake::")); |
1335 | | |
1336 | | let qname = process_qname(":::".to_tendril()); |
1337 | | assert_eq!(qname.prefix, None); |
1338 | | assert_eq!(qname.local, LocalName::from(":::")); |
1339 | | |
1340 | | let qname = process_qname(":a:b:".to_tendril()); |
1341 | | assert_eq!(qname.prefix, None); |
1342 | | assert_eq!(qname.local, LocalName::from(":a:b:")); |
1343 | | } |
1344 | | } |