/src/html5ever/xml5ever/src/tree_builder/mod.rs
Line | Count | Source |
1 | | // Copyright 2014-2017 The html5ever Project Developers. See the |
2 | | // COPYRIGHT file at the top-level directory of this distribution. |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | | // option. This file may not be copied, modified, or distributed |
8 | | // except according to those terms. |
9 | | |
10 | | mod types; |
11 | | |
12 | | use log::{debug, warn}; |
13 | | use markup5ever::{local_name, namespace_prefix, ns}; |
14 | | use std::borrow::Cow; |
15 | | use std::borrow::Cow::Borrowed; |
16 | | use std::cell::{Cell, Ref, RefCell}; |
17 | | use std::collections::btree_map::Iter; |
18 | | use std::collections::{BTreeMap, HashSet, VecDeque}; |
19 | | use std::fmt::{Debug, Error, Formatter}; |
20 | | use std::mem; |
21 | | |
22 | | pub use self::interface::{ElemName, NodeOrText, Tracer, TreeSink}; |
23 | | use self::types::*; |
24 | | use crate::interface::{self, create_element, AppendNode, Attribute, QualName}; |
25 | | use crate::interface::{AppendText, ExpandedName}; |
26 | | use crate::tokenizer::{self, EndTag, ProcessResult, StartTag, Tag, TokenSink}; |
27 | | use crate::tokenizer::{Doctype, EmptyTag, Pi, ShortTag}; |
28 | | use crate::{LocalName, Namespace, Prefix}; |
29 | | |
30 | | use crate::tendril::{StrTendril, Tendril}; |
31 | | |
32 | | static XML_URI: &str = "http://www.w3.org/XML/1998/namespace"; |
33 | | static XMLNS_URI: &str = "http://www.w3.org/2000/xmlns/"; |
34 | | |
35 | | type InsResult = Result<(), Cow<'static, str>>; |
36 | | |
37 | | #[derive(Debug)] |
38 | | struct NamespaceMapStack(Vec<NamespaceMap>); |
39 | | |
40 | | impl NamespaceMapStack { |
41 | 12.7k | fn new() -> NamespaceMapStack { |
42 | 12.7k | NamespaceMapStack(vec![NamespaceMap::default()]) |
43 | 12.7k | } |
44 | | |
45 | 930k | fn push(&mut self, map: NamespaceMap) { |
46 | 930k | self.0.push(map); |
47 | 930k | } |
48 | | |
49 | 172k | fn pop(&mut self) { |
50 | 172k | self.0.pop(); |
51 | 172k | } |
52 | | } |
53 | | |
54 | | pub(crate) struct NamespaceMap { |
55 | | // Map that maps prefixes to URI. |
56 | | // |
57 | | // Key denotes namespace prefix, and value denotes |
58 | | // URI it maps to. |
59 | | // |
60 | | // If value of value is None, that means the namespace |
61 | | // denoted by key has been undeclared. |
62 | | scope: BTreeMap<Option<Prefix>, Option<Namespace>>, |
63 | | } |
64 | | |
65 | | impl Debug for NamespaceMap { |
66 | 0 | fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { |
67 | 0 | write!(f, "\nNamespaceMap[")?; |
68 | 0 | for (key, value) in &self.scope { |
69 | 0 | writeln!(f, " {key:?} : {value:?}")?; |
70 | | } |
71 | 0 | write!(f, "]") |
72 | 0 | } |
73 | | } |
74 | | |
75 | | impl NamespaceMap { |
76 | | // Returns an empty namespace. |
77 | 2.02M | pub(crate) fn empty() -> NamespaceMap { |
78 | 2.02M | NamespaceMap { |
79 | 2.02M | scope: BTreeMap::new(), |
80 | 2.02M | } |
81 | 2.02M | } |
82 | | |
83 | 12.7k | fn default() -> NamespaceMap { |
84 | 12.7k | NamespaceMap { |
85 | 12.7k | scope: { |
86 | 12.7k | let mut map = BTreeMap::new(); |
87 | 12.7k | map.insert(None, None); |
88 | 12.7k | map.insert(Some(namespace_prefix!("xml")), Some(ns!(xml))); |
89 | 12.7k | map.insert(Some(namespace_prefix!("xmlns")), Some(ns!(xmlns))); |
90 | 12.7k | map |
91 | 12.7k | }, |
92 | 12.7k | } |
93 | 12.7k | } |
94 | | |
95 | 2.76G | pub(crate) fn get(&self, prefix: &Option<Prefix>) -> Option<&Option<Namespace>> { |
96 | 2.76G | self.scope.get(prefix) |
97 | 2.76G | } |
98 | | |
99 | 970k | pub(crate) fn get_scope_iter(&self) -> Iter<'_, Option<Prefix>, Option<Namespace>> { |
100 | 970k | self.scope.iter() |
101 | 970k | } |
102 | | |
103 | 342k | pub(crate) fn insert(&mut self, name: &QualName) { |
104 | 342k | let prefix = name.prefix.as_ref().cloned(); |
105 | 342k | let namespace = Some(Namespace::from(&*name.ns)); |
106 | 342k | self.scope.insert(prefix, namespace); |
107 | 342k | } |
108 | | |
109 | 237k | fn insert_ns(&mut self, attr: &Attribute) -> InsResult { |
110 | 237k | if &*attr.value == XMLNS_URI { |
111 | 215 | return Err(Borrowed("Can't declare XMLNS URI")); |
112 | 236k | }; |
113 | | |
114 | 236k | let opt_uri = if attr.value.is_empty() { |
115 | 162k | None |
116 | | } else { |
117 | 74.2k | Some(Namespace::from(&*attr.value)) |
118 | | }; |
119 | | |
120 | 236k | let result = match (&attr.name.prefix, &*attr.name.local) { |
121 | 219k | (&Some(namespace_prefix!("xmlns")), "xml") => { |
122 | 24.0k | if &*attr.value != XML_URI { |
123 | 24.0k | Err(Borrowed("XML namespace can't be redeclared")) |
124 | | } else { |
125 | 1 | Ok(()) |
126 | | } |
127 | | }, |
128 | | |
129 | 195k | (&Some(namespace_prefix!("xmlns")), "xmlns") => { |
130 | 194 | Err(Borrowed("XMLNS namespaces can't be changed")) |
131 | | }, |
132 | | |
133 | 17.2k | (&Some(namespace_prefix!("xmlns")), _) | (&None, "xmlns") => { |
134 | | // We can have two cases of properly defined xmlns |
135 | | // First with default namespace e.g. |
136 | | // |
137 | | // <a xmlns = "www.uri.org" /> |
138 | 211k | let ns_prefix = if &*attr.name.local == "xmlns" { |
139 | 17.2k | None |
140 | | |
141 | | // Second is with named namespace e.g. |
142 | | // |
143 | | // <a xmlns:a = "www.uri.org" /> |
144 | | } else { |
145 | 194k | Some(Prefix::from(&*attr.name.local)) |
146 | | }; |
147 | | |
148 | 211k | if opt_uri.is_some() && self.scope.contains_key(&ns_prefix) { |
149 | 7.58k | Err(Borrowed("Namespace already defined")) |
150 | | } else { |
151 | 204k | self.scope.insert(ns_prefix, opt_uri); |
152 | 204k | Ok(()) |
153 | | } |
154 | | }, |
155 | | |
156 | 903 | (_, _) => Err(Borrowed("Invalid namespace declaration.")), |
157 | | }; |
158 | 236k | result |
159 | 237k | } |
160 | | } |
161 | | |
162 | | /// Tree builder options, with an impl for Default. |
163 | | #[derive(Copy, Clone, Default)] |
164 | | pub struct XmlTreeBuilderOpts {} |
165 | | |
166 | | /// The XML tree builder. |
167 | | pub struct XmlTreeBuilder<Handle, Sink> { |
168 | | /// Configuration options for XmlTreeBuilder |
169 | | _opts: XmlTreeBuilderOpts, |
170 | | |
171 | | /// Consumer of tree modifications. |
172 | | pub sink: Sink, |
173 | | |
174 | | /// The document node, which is created by the sink. |
175 | | doc_handle: Handle, |
176 | | |
177 | | /// Stack of open elements, most recently added at end. |
178 | | open_elems: RefCell<Vec<Handle>>, |
179 | | |
180 | | /// Current element pointer. |
181 | | curr_elem: RefCell<Option<Handle>>, |
182 | | |
183 | | /// Stack of namespace identifiers and namespaces. |
184 | | namespace_stack: RefCell<NamespaceMapStack>, |
185 | | |
186 | | /// Current namespace identifier |
187 | | current_namespace: RefCell<NamespaceMap>, |
188 | | |
189 | | /// Current tree builder phase. |
190 | | phase: Cell<XmlPhase>, |
191 | | } |
192 | | impl<Handle, Sink> XmlTreeBuilder<Handle, Sink> |
193 | | where |
194 | | Handle: Clone, |
195 | | Sink: TreeSink<Handle = Handle>, |
196 | | { |
197 | | /// Create a new tree builder which sends tree modifications to a particular `TreeSink`. |
198 | | /// |
199 | | /// The tree builder is also a `TokenSink`. |
200 | 12.7k | pub fn new(sink: Sink, opts: XmlTreeBuilderOpts) -> XmlTreeBuilder<Handle, Sink> { |
201 | 12.7k | let doc_handle = sink.get_document(); |
202 | 12.7k | XmlTreeBuilder { |
203 | 12.7k | _opts: opts, |
204 | 12.7k | sink, |
205 | 12.7k | doc_handle, |
206 | 12.7k | open_elems: RefCell::new(vec![]), |
207 | 12.7k | curr_elem: RefCell::new(None), |
208 | 12.7k | namespace_stack: RefCell::new(NamespaceMapStack::new()), |
209 | 12.7k | current_namespace: RefCell::new(NamespaceMap::empty()), |
210 | 12.7k | phase: Cell::new(XmlPhase::Start), |
211 | 12.7k | } |
212 | 12.7k | } |
213 | | |
214 | | /// Call the `Tracer`'s `trace_handle` method on every `Handle` in the tree builder's |
215 | | /// internal state. This is intended to support garbage-collected DOMs. |
216 | | pub fn trace_handles(&self, tracer: &dyn Tracer<Handle = Handle>) { |
217 | | tracer.trace_handle(&self.doc_handle); |
218 | | for e in self.open_elems.borrow().iter() { |
219 | | tracer.trace_handle(e); |
220 | | } |
221 | | if let Some(h) = self.curr_elem.borrow().as_ref() { |
222 | | tracer.trace_handle(h); |
223 | | } |
224 | | } |
225 | | |
226 | | // Debug helper |
227 | | #[cfg(not(for_c))] |
228 | | #[allow(dead_code)] |
229 | | fn dump_state(&self, label: String) { |
230 | | debug!("dump_state on {label}"); |
231 | | debug!(" open_elems:"); |
232 | | for node in self.open_elems.borrow().iter() { |
233 | | debug!(" {:?}", self.sink.elem_name(node)); |
234 | | } |
235 | | debug!(""); |
236 | | } |
237 | | |
238 | | #[cfg(for_c)] |
239 | | fn debug_step(&self, _mode: XmlPhase, _token: &Token) {} |
240 | | |
241 | | #[cfg(not(for_c))] |
242 | 17.4M | fn debug_step(&self, mode: XmlPhase, token: &Token) { |
243 | 17.4M | debug!( |
244 | 0 | "processing {:?} in insertion mode {:?}", |
245 | 0 | format!("{:?}", token), |
246 | | mode |
247 | | ); |
248 | 17.4M | } |
249 | | |
250 | 237k | fn declare_ns(&self, attr: &mut Attribute) { |
251 | 237k | if let Err(msg) = self.current_namespace.borrow_mut().insert_ns(attr) { |
252 | 32.9k | self.sink.parse_error(msg); |
253 | 204k | } else { |
254 | 204k | attr.name.ns = ns!(xmlns); |
255 | 204k | } |
256 | 237k | } |
257 | | |
258 | 2.25M | fn find_uri(&self, prefix: &Option<Prefix>) -> Result<Option<Namespace>, Cow<'static, str>> { |
259 | 2.25M | let mut uri = Err(Borrowed("No appropriate namespace found")); |
260 | | |
261 | 2.25M | let current_namespace = self.current_namespace.borrow(); |
262 | 1.40G | for ns in self |
263 | 2.25M | .namespace_stack |
264 | 2.25M | .borrow() |
265 | 2.25M | .0 |
266 | 2.25M | .iter() |
267 | 2.25M | .chain(Some(&*current_namespace)) |
268 | 2.25M | .rev() |
269 | | { |
270 | 1.40G | if let Some(el) = ns.get(prefix) { |
271 | 1.43M | uri = Ok(el.clone()); |
272 | 1.43M | break; |
273 | 1.40G | } |
274 | | } |
275 | 2.25M | uri |
276 | 2.25M | } |
277 | | |
278 | 2.25M | fn bind_qname(&self, name: &mut QualName) { |
279 | 2.25M | match self.find_uri(&name.prefix) { |
280 | 1.43M | Ok(uri) => { |
281 | 1.43M | let ns_uri = match uri { |
282 | 1.09M | Some(e) => e, |
283 | 336k | None => ns!(), |
284 | | }; |
285 | 1.43M | name.ns = ns_uri; |
286 | | }, |
287 | 827k | Err(msg) => { |
288 | 827k | self.sink.parse_error(msg); |
289 | 827k | }, |
290 | | } |
291 | 2.25M | } |
292 | | |
293 | | // This method takes in name qualified name and binds it to the |
294 | | // existing namespace context. |
295 | | // |
296 | | // Returns false if the attribute is a duplicate, returns true otherwise. |
297 | 1.58M | fn bind_attr_qname( |
298 | 1.58M | &self, |
299 | 1.58M | present_attrs: &mut HashSet<(Namespace, LocalName)>, |
300 | 1.58M | name: &mut QualName, |
301 | 1.58M | ) -> bool { |
302 | | // Attributes don't have default namespace |
303 | 1.58M | let mut not_duplicate = true; |
304 | | |
305 | 1.58M | if name.prefix.is_some() { |
306 | 1.22M | self.bind_qname(name); |
307 | 1.22M | not_duplicate = Self::check_duplicate_attr(present_attrs, name); |
308 | 1.22M | } |
309 | 1.58M | not_duplicate |
310 | 1.58M | } |
311 | | |
312 | 1.22M | fn check_duplicate_attr( |
313 | 1.22M | present_attrs: &mut HashSet<(Namespace, LocalName)>, |
314 | 1.22M | name: &QualName, |
315 | 1.22M | ) -> bool { |
316 | 1.22M | let pair = (name.ns.clone(), name.local.clone()); |
317 | | |
318 | 1.22M | if present_attrs.contains(&pair) { |
319 | 126k | return false; |
320 | 1.09M | } |
321 | 1.09M | present_attrs.insert(pair); |
322 | 1.09M | true |
323 | 1.22M | } |
324 | | |
325 | 1.03M | fn process_namespaces(&self, tag: &mut Tag) { |
326 | | // List of already present namespace local name attribute pairs. |
327 | 1.03M | let mut present_attrs: HashSet<(Namespace, LocalName)> = Default::default(); |
328 | | |
329 | 1.03M | let mut new_attr = vec![]; |
330 | | // First we extract all namespace declarations |
331 | 1.81M | for attr in tag.attrs.iter_mut().filter(|attr| { |
332 | 1.81M | attr.name.prefix == Some(namespace_prefix!("xmlns")) |
333 | 1.60M | || attr.name.local == local_name!("xmlns") |
334 | 1.81M | }) { |
335 | 237k | self.declare_ns(attr); |
336 | 237k | } |
337 | | |
338 | | // Then we bind those namespace declarations to attributes |
339 | 1.81M | for attr in tag.attrs.iter_mut().filter(|attr| { |
340 | 1.81M | attr.name.prefix != Some(namespace_prefix!("xmlns")) |
341 | 1.60M | && attr.name.local != local_name!("xmlns") |
342 | 1.81M | }) { |
343 | 1.58M | if self.bind_attr_qname(&mut present_attrs, &mut attr.name) { |
344 | 1.45M | new_attr.push(attr.clone()); |
345 | 1.45M | } |
346 | | } |
347 | 1.03M | tag.attrs = new_attr; |
348 | | |
349 | | // Then we bind the tags namespace. |
350 | 1.03M | self.bind_qname(&mut tag.name); |
351 | | |
352 | | // Finally, we dump current namespace if its unneeded. |
353 | 1.03M | let x = mem::replace( |
354 | 1.03M | &mut *self.current_namespace.borrow_mut(), |
355 | 1.03M | NamespaceMap::empty(), |
356 | | ); |
357 | | |
358 | | // Only start tag doesn't dump current namespace. However, <script /> is treated |
359 | | // differently than every other empty tag, so it needs to retain the current |
360 | | // namespace as well. |
361 | 1.03M | if tag.kind == StartTag || (tag.kind == EmptyTag && tag.name.local == local_name!("script")) |
362 | 930k | { |
363 | 930k | self.namespace_stack.borrow_mut().push(x); |
364 | 930k | } |
365 | 1.03M | } |
366 | | |
367 | 17.3M | fn process_to_completion( |
368 | 17.3M | &self, |
369 | 17.3M | mut token: Token, |
370 | 17.3M | ) -> ProcessResult<<Self as TokenSink>::Handle> { |
371 | | // Queue of additional tokens yet to be processed. |
372 | | // This stays empty in the common case where we don't split whitespace. |
373 | 17.3M | let mut more_tokens = VecDeque::new(); |
374 | | |
375 | | loop { |
376 | 17.4M | let phase = self.phase.get(); |
377 | | |
378 | | #[allow(clippy::unused_unit)] |
379 | 17.4M | match self.step(phase, token) { |
380 | | XmlProcessResult::Done => { |
381 | 17.3M | let Some(popped_token) = more_tokens.pop_front() else { |
382 | 17.3M | return ProcessResult::Continue; |
383 | | }; |
384 | 0 | token = popped_token; |
385 | | }, |
386 | 11.0k | XmlProcessResult::Reprocess(m, t) => { |
387 | 11.0k | self.phase.set(m); |
388 | 11.0k | token = t; |
389 | 11.0k | }, |
390 | 49.2k | XmlProcessResult::Script(node) => { |
391 | 49.2k | assert!(more_tokens.is_empty()); |
392 | 49.2k | return ProcessResult::Script(node); |
393 | | }, |
394 | | } |
395 | | } |
396 | 17.3M | } |
397 | | } |
398 | | |
399 | | impl<Handle, Sink> TokenSink for XmlTreeBuilder<Handle, Sink> |
400 | | where |
401 | | Handle: Clone, |
402 | | Sink: TreeSink<Handle = Handle>, |
403 | | { |
404 | | type Handle = Handle; |
405 | | |
406 | 24.2M | fn process_token(&self, token: tokenizer::Token) -> ProcessResult<Self::Handle> { |
407 | | // Handle `ParseError` and `DoctypeToken`; convert everything else to the local `Token` type. |
408 | 24.2M | let token = match token { |
409 | 6.83M | tokenizer::Token::ParseError(e) => { |
410 | 6.83M | self.sink.parse_error(e); |
411 | 6.83M | return ProcessResult::Done; |
412 | | }, |
413 | | |
414 | 354k | tokenizer::Token::Doctype(d) => Token::Doctype(d), |
415 | 17.4k | tokenizer::Token::ProcessingInstruction(instruction) => Token::Pi(instruction), |
416 | 1.14M | tokenizer::Token::Tag(x) => Token::Tag(x), |
417 | 5.16M | tokenizer::Token::Comment(x) => Token::Comment(x), |
418 | 0 | tokenizer::Token::NullCharacter => Token::NullCharacter, |
419 | 12.5k | tokenizer::Token::EndOfFile => Token::Eof, |
420 | 10.7M | tokenizer::Token::Characters(x) => Token::Characters(x), |
421 | | }; |
422 | | |
423 | 17.3M | self.process_to_completion(token) |
424 | 24.2M | } |
425 | | |
426 | 12.7k | fn end(&self) { |
427 | 758k | for node in self.open_elems.borrow_mut().drain(..).rev() { |
428 | 758k | self.sink.pop(&node); |
429 | 758k | } |
430 | 12.7k | } |
431 | | } |
432 | | |
433 | 13.6M | fn current_node<Handle>(open_elems: &[Handle]) -> &Handle { |
434 | 13.6M | open_elems.last().expect("no current element") |
435 | 13.6M | } |
436 | | |
437 | | impl<Handle, Sink> XmlTreeBuilder<Handle, Sink> |
438 | | where |
439 | | Handle: Clone, |
440 | | Sink: TreeSink<Handle = Handle>, |
441 | | { |
442 | 275k | fn current_node(&self) -> Ref<'_, Handle> { |
443 | 275k | Ref::map(self.open_elems.borrow(), |elems| { |
444 | 275k | elems.last().expect("no current element") |
445 | 275k | }) |
446 | 275k | } |
447 | | |
448 | 8.88M | fn insert_appropriately(&self, child: NodeOrText<Handle>) { |
449 | 8.88M | let open_elems = self.open_elems.borrow(); |
450 | 8.88M | let target = current_node(&open_elems); |
451 | 8.88M | self.sink.append(target, child); |
452 | 8.88M | } |
453 | | |
454 | 924k | fn insert_tag(&self, tag: Tag) -> XmlProcessResult<Handle> { |
455 | 924k | let child = create_element(&self.sink, tag.name, tag.attrs); |
456 | 924k | self.insert_appropriately(AppendNode(child.clone())); |
457 | 924k | self.add_to_open_elems(child) |
458 | 924k | } |
459 | | |
460 | 38.1k | fn append_tag(&self, tag: Tag) -> XmlProcessResult<Handle> { |
461 | 38.1k | let child = create_element(&self.sink, tag.name, tag.attrs); |
462 | 38.1k | self.insert_appropriately(AppendNode(child.clone())); |
463 | 38.1k | self.sink.pop(&child); |
464 | 38.1k | XmlProcessResult::Done |
465 | 38.1k | } |
466 | | |
467 | 7.76k | fn append_tag_to_doc(&self, tag: Tag) -> Handle { |
468 | 7.76k | let child = create_element(&self.sink, tag.name, tag.attrs); |
469 | | |
470 | 7.76k | self.sink |
471 | 7.76k | .append(&self.doc_handle, AppendNode(child.clone())); |
472 | 7.76k | child |
473 | 7.76k | } |
474 | | |
475 | 930k | fn add_to_open_elems(&self, el: Handle) -> XmlProcessResult<Handle> { |
476 | 930k | self.open_elems.borrow_mut().push(el); |
477 | | |
478 | 930k | XmlProcessResult::Done |
479 | 930k | } |
480 | | |
481 | 472k | fn append_comment_to_doc(&self, text: StrTendril) -> XmlProcessResult<Handle> { |
482 | 472k | let comment = self.sink.create_comment(text); |
483 | 472k | self.sink.append(&self.doc_handle, AppendNode(comment)); |
484 | 472k | XmlProcessResult::Done |
485 | 472k | } |
486 | | |
487 | 4.69M | fn append_comment_to_tag(&self, text: StrTendril) -> XmlProcessResult<Handle> { |
488 | 4.69M | let open_elems = self.open_elems.borrow(); |
489 | 4.69M | let target = current_node(&open_elems); |
490 | 4.69M | let comment = self.sink.create_comment(text); |
491 | 4.69M | self.sink.append(target, AppendNode(comment)); |
492 | 4.69M | XmlProcessResult::Done |
493 | 4.69M | } |
494 | | |
495 | 45.8k | fn append_doctype_to_doc(&self, doctype: Doctype) -> XmlProcessResult<Handle> { |
496 | 137k | fn get_tendril(opt: Option<StrTendril>) -> StrTendril { |
497 | 137k | match opt { |
498 | 75.4k | Some(expr) => expr, |
499 | 62.1k | None => Tendril::new(), |
500 | | } |
501 | 137k | } |
502 | 45.8k | self.sink.append_doctype_to_document( |
503 | 45.8k | get_tendril(doctype.name), |
504 | 45.8k | get_tendril(doctype.public_id), |
505 | 45.8k | get_tendril(doctype.system_id), |
506 | | ); |
507 | 45.8k | XmlProcessResult::Done |
508 | 45.8k | } |
509 | | |
510 | 5.63k | fn append_pi_to_doc(&self, pi: Pi) -> XmlProcessResult<Handle> { |
511 | 5.63k | let pi = self.sink.create_pi(pi.target, pi.data); |
512 | 5.63k | self.sink.append(&self.doc_handle, AppendNode(pi)); |
513 | 5.63k | XmlProcessResult::Done |
514 | 5.63k | } |
515 | | |
516 | 11.8k | fn append_pi_to_tag(&self, pi: Pi) -> XmlProcessResult<Handle> { |
517 | 11.8k | let open_elems = self.open_elems.borrow(); |
518 | 11.8k | let target = current_node(&open_elems); |
519 | 11.8k | let pi = self.sink.create_pi(pi.target, pi.data); |
520 | 11.8k | self.sink.append(target, AppendNode(pi)); |
521 | 11.8k | XmlProcessResult::Done |
522 | 11.8k | } |
523 | | |
524 | 7.92M | fn append_text(&self, chars: StrTendril) -> XmlProcessResult<Handle> { |
525 | 7.92M | self.insert_appropriately(AppendText(chars)); |
526 | 7.92M | XmlProcessResult::Done |
527 | 7.92M | } |
528 | | |
529 | 103k | fn tag_in_open_elems(&self, tag: &Tag) -> bool { |
530 | 103k | self.open_elems |
531 | 103k | .borrow() |
532 | 103k | .iter() |
533 | 64.0M | .any(|a| self.sink.elem_name(a).expanded() == tag.name.expanded()) |
534 | 103k | } |
535 | | |
536 | | // Pop elements until an element from the set has been popped. |
537 | 56.6k | fn pop_until<P>(&self, pred: P) |
538 | 56.6k | where |
539 | 56.6k | P: Fn(ExpandedName) -> bool, |
540 | | { |
541 | | loop { |
542 | 171k | if self.current_node_in(&pred) { |
543 | 56.6k | break; |
544 | 115k | } |
545 | 115k | self.pop(); |
546 | | } |
547 | 56.6k | } |
548 | | |
549 | 171k | fn current_node_in<TagSet>(&self, set: TagSet) -> bool |
550 | 171k | where |
551 | 171k | TagSet: Fn(ExpandedName) -> bool, |
552 | | { |
553 | | // FIXME: take namespace into consideration: |
554 | 171k | set(self.sink.elem_name(&self.current_node()).expanded()) |
555 | 171k | } |
556 | | |
557 | 103k | fn close_tag(&self, tag: Tag) -> XmlProcessResult<Handle> { |
558 | 103k | debug!( |
559 | 0 | "Close tag: current_node.name {:?} \n Current tag {:?}", |
560 | 0 | self.sink.elem_name(&self.current_node()), |
561 | 0 | &tag.name |
562 | | ); |
563 | | |
564 | 103k | if *self.sink.elem_name(&self.current_node()).local_name() != tag.name.local { |
565 | 49.3k | self.sink |
566 | 49.3k | .parse_error(Borrowed("Current node doesn't match tag")); |
567 | 53.8k | } |
568 | | |
569 | 103k | let is_closed = self.tag_in_open_elems(&tag); |
570 | | |
571 | 103k | if is_closed { |
572 | 171k | self.pop_until(|p| p == tag.name.expanded()); |
573 | 56.6k | self.pop(); |
574 | 46.6k | } |
575 | | |
576 | 103k | XmlProcessResult::Done |
577 | 103k | } |
578 | | |
579 | 69.2k | fn no_open_elems(&self) -> bool { |
580 | 69.2k | self.open_elems.borrow().is_empty() |
581 | 69.2k | } |
582 | | |
583 | 172k | fn pop(&self) -> Handle { |
584 | 172k | self.namespace_stack.borrow_mut().pop(); |
585 | 172k | let node = self |
586 | 172k | .open_elems |
587 | 172k | .borrow_mut() |
588 | 172k | .pop() |
589 | 172k | .expect("no current element"); |
590 | 172k | self.sink.pop(&node); |
591 | 172k | node |
592 | 172k | } |
593 | | |
594 | 12.5k | fn stop_parsing(&self) -> XmlProcessResult<Handle> { |
595 | 12.5k | warn!("stop_parsing for XML5 not implemented, full speed ahead!"); |
596 | 12.5k | XmlProcessResult::Done |
597 | 12.5k | } |
598 | | } |
599 | | |
600 | 2.77M | fn any_not_whitespace(x: &StrTendril) -> bool { |
601 | 2.77M | !x.bytes() |
602 | 2.78M | .all(|b| matches!(b, b'\t' | b'\r' | b'\n' | b'\x0C' | b' ')) |
603 | 2.77M | } |
604 | | |
605 | | impl<Handle, Sink> XmlTreeBuilder<Handle, Sink> |
606 | | where |
607 | | Handle: Clone, |
608 | | Sink: TreeSink<Handle = Handle>, |
609 | | { |
610 | 17.4M | fn step(&self, mode: XmlPhase, token: Token) -> XmlProcessResult<<Self as TokenSink>::Handle> { |
611 | 17.4M | self.debug_step(mode, &token); |
612 | | |
613 | 17.4M | match mode { |
614 | 9.30k | XmlPhase::Start => match token { |
615 | | Token::Tag(Tag { |
616 | | kind: StartTag, |
617 | 6.42k | name, |
618 | 6.42k | attrs, |
619 | | }) => { |
620 | 6.42k | let tag = { |
621 | 6.42k | let mut tag = Tag { |
622 | 6.42k | kind: StartTag, |
623 | 6.42k | name, |
624 | 6.42k | attrs, |
625 | 6.42k | }; |
626 | 6.42k | self.process_namespaces(&mut tag); |
627 | 6.42k | tag |
628 | | }; |
629 | 6.42k | self.phase.set(XmlPhase::Main); |
630 | 6.42k | let handle = self.append_tag_to_doc(tag); |
631 | 6.42k | self.add_to_open_elems(handle) |
632 | | }, |
633 | | Token::Tag(Tag { |
634 | | kind: EmptyTag, |
635 | 1.33k | name, |
636 | 1.33k | attrs, |
637 | | }) => { |
638 | 1.33k | let tag = { |
639 | 1.33k | let mut tag = Tag { |
640 | 1.33k | kind: EmptyTag, |
641 | 1.33k | name, |
642 | 1.33k | attrs, |
643 | 1.33k | }; |
644 | 1.33k | self.process_namespaces(&mut tag); |
645 | 1.33k | tag |
646 | | }; |
647 | 1.33k | self.phase.set(XmlPhase::End); |
648 | 1.33k | let handle = self.append_tag_to_doc(tag); |
649 | 1.33k | self.sink.pop(&handle); |
650 | 1.33k | XmlProcessResult::Done |
651 | | }, |
652 | 28.6k | Token::Comment(comment) => self.append_comment_to_doc(comment), |
653 | 2.82k | Token::Pi(pi) => self.append_pi_to_doc(pi), |
654 | 1.48M | Token::Characters(ref chars) if !any_not_whitespace(chars) => { |
655 | 18.5k | XmlProcessResult::Done |
656 | | }, |
657 | | Token::Eof => { |
658 | 4.96k | self.sink |
659 | 4.96k | .parse_error(Borrowed("Unexpected EOF in start phase")); |
660 | 4.96k | XmlProcessResult::Reprocess(XmlPhase::End, Token::Eof) |
661 | | }, |
662 | 45.8k | Token::Doctype(d) => { |
663 | 45.8k | self.append_doctype_to_doc(d); |
664 | 45.8k | XmlProcessResult::Done |
665 | | }, |
666 | | _ => { |
667 | 1.46M | self.sink |
668 | 1.46M | .parse_error(Borrowed("Unexpected element in start phase")); |
669 | 1.46M | XmlProcessResult::Done |
670 | | }, |
671 | | }, |
672 | 1.03M | XmlPhase::Main => match token { |
673 | 7.92M | Token::Characters(chs) => self.append_text(chs), |
674 | | Token::Tag(Tag { |
675 | | kind: StartTag, |
676 | 889k | name, |
677 | 889k | attrs, |
678 | | }) => { |
679 | 889k | let tag = { |
680 | 889k | let mut tag = Tag { |
681 | 889k | kind: StartTag, |
682 | 889k | name, |
683 | 889k | attrs, |
684 | 889k | }; |
685 | 889k | self.process_namespaces(&mut tag); |
686 | 889k | tag |
687 | | }; |
688 | 889k | self.insert_tag(tag) |
689 | | }, |
690 | | Token::Tag(Tag { |
691 | | kind: EmptyTag, |
692 | 73.0k | name, |
693 | 73.0k | attrs, |
694 | | }) => { |
695 | 73.0k | let tag = { |
696 | 73.0k | let mut tag = Tag { |
697 | 73.0k | kind: EmptyTag, |
698 | 73.0k | name, |
699 | 73.0k | attrs, |
700 | 73.0k | }; |
701 | 73.0k | self.process_namespaces(&mut tag); |
702 | 73.0k | tag |
703 | | }; |
704 | 73.0k | if tag.name.local == local_name!("script") { |
705 | 34.8k | self.insert_tag(tag.clone()); |
706 | 34.8k | let script = current_node(&self.open_elems.borrow()).clone(); |
707 | 34.8k | self.close_tag(tag); |
708 | 34.8k | XmlProcessResult::Script(script) |
709 | | } else { |
710 | 38.1k | self.append_tag(tag) |
711 | | } |
712 | | }, |
713 | | Token::Tag(Tag { |
714 | | kind: EndTag, |
715 | 68.4k | name, |
716 | 68.4k | attrs, |
717 | | }) => { |
718 | 68.4k | let tag = { |
719 | 68.4k | let mut tag = Tag { |
720 | 68.4k | kind: EndTag, |
721 | 68.4k | name, |
722 | 68.4k | attrs, |
723 | 68.4k | }; |
724 | 68.4k | self.process_namespaces(&mut tag); |
725 | 68.4k | tag |
726 | | }; |
727 | 68.4k | if tag.name.local == local_name!("script") { |
728 | 14.4k | let script = current_node(&self.open_elems.borrow()).clone(); |
729 | 14.4k | self.close_tag(tag); |
730 | 14.4k | if self.no_open_elems() { |
731 | 1 | self.phase.set(XmlPhase::End); |
732 | 14.4k | } |
733 | 14.4k | return XmlProcessResult::Script(script); |
734 | 54.0k | } |
735 | 54.0k | let retval = self.close_tag(tag); |
736 | 54.0k | if self.no_open_elems() { |
737 | 191 | self.phase.set(XmlPhase::End); |
738 | 53.8k | } |
739 | 54.0k | retval |
740 | | }, |
741 | | Token::Tag(Tag { kind: ShortTag, .. }) => { |
742 | 760 | self.pop(); |
743 | 760 | if self.no_open_elems() { |
744 | 13 | self.phase.set(XmlPhase::End); |
745 | 747 | } |
746 | 760 | XmlProcessResult::Done |
747 | | }, |
748 | 4.69M | Token::Comment(comment) => self.append_comment_to_tag(comment), |
749 | 11.8k | Token::Pi(pi) => self.append_pi_to_tag(pi), |
750 | | Token::Eof | Token::NullCharacter => { |
751 | 6.06k | XmlProcessResult::Reprocess(XmlPhase::End, Token::Eof) |
752 | | }, |
753 | | Token::Doctype(_) => { |
754 | 284k | self.sink |
755 | 284k | .parse_error(Borrowed("Unexpected element in main phase")); |
756 | 284k | XmlProcessResult::Done |
757 | | }, |
758 | | }, |
759 | 1.29M | XmlPhase::End => match token { |
760 | 444k | Token::Comment(comment) => self.append_comment_to_doc(comment), |
761 | 2.80k | Token::Pi(pi) => self.append_pi_to_doc(pi), |
762 | 1.29M | Token::Characters(ref chars) if !any_not_whitespace(chars) => { |
763 | 11.6k | XmlProcessResult::Done |
764 | | }, |
765 | 12.5k | Token::Eof => self.stop_parsing(), |
766 | | _ => { |
767 | 1.40M | self.sink |
768 | 1.40M | .parse_error(Borrowed("Unexpected element in end phase")); |
769 | 1.40M | XmlProcessResult::Done |
770 | | }, |
771 | | }, |
772 | | } |
773 | 17.4M | } |
774 | | } |