/rust/registry/src/index.crates.io-1949cf8c6b5b557f/quick-xml-0.38.3/src/reader/mod.rs
Line | Count | Source |
1 | | //! Contains high-level interface for a pull-based XML parser. |
2 | | |
3 | | #[cfg(feature = "encoding")] |
4 | | use encoding_rs::Encoding; |
5 | | use std::io; |
6 | | use std::ops::Range; |
7 | | |
8 | | use crate::encoding::Decoder; |
9 | | use crate::errors::{Error, IllFormedError, SyntaxError}; |
10 | | use crate::events::{BytesRef, Event}; |
11 | | use crate::parser::{ElementParser, Parser, PiParser}; |
12 | | use crate::reader::state::ReaderState; |
13 | | |
14 | | /// A struct that holds a parser configuration. |
15 | | /// |
16 | | /// Current parser configuration can be retrieved by calling [`Reader::config()`] |
17 | | /// and changed by changing properties of the object returned by a call to |
18 | | /// [`Reader::config_mut()`]. |
19 | | /// |
20 | | /// [`Reader::config()`]: crate::reader::Reader::config |
21 | | /// [`Reader::config_mut()`]: crate::reader::Reader::config_mut |
22 | | #[derive(Debug, Clone, PartialEq, Eq)] |
23 | | #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] |
24 | | #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] |
25 | | #[non_exhaustive] |
26 | | pub struct Config { |
27 | | /// Whether lone ampersand character (without a paired semicolon) should be |
28 | | /// allowed in textual content. Unless enabled, in case of a dangling ampersand, |
29 | | /// the [`Error::IllFormed(UnclosedReference)`] is returned from read methods. |
30 | | /// |
31 | | /// Default: `false` |
32 | | /// |
33 | | /// # Example |
34 | | /// |
35 | | /// ``` |
36 | | /// # use quick_xml::events::{BytesRef, BytesText, Event}; |
37 | | /// # use quick_xml::reader::Reader; |
38 | | /// # use pretty_assertions::assert_eq; |
39 | | /// let mut reader = Reader::from_str("text with & & & alone"); |
40 | | /// reader.config_mut().allow_dangling_amp = true; |
41 | | /// |
42 | | /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new("text with "))); |
43 | | /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& "))); |
44 | | /// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(BytesRef::new("amp"))); |
45 | | /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new(" "))); |
46 | | /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& alone"))); |
47 | | /// assert_eq!(reader.read_event().unwrap(), Event::Eof); |
48 | | /// ``` |
49 | | /// |
50 | | /// [`Error::IllFormed(UnclosedReference)`]: crate::errors::IllFormedError::UnclosedReference |
51 | | pub allow_dangling_amp: bool, |
52 | | |
53 | | /// Whether unmatched closing tag names should be allowed. Unless enabled, |
54 | | /// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`] |
55 | | /// is returned from read methods. |
56 | | /// |
57 | | /// When set to `true`, it won't check if a closing tag has a corresponding |
58 | | /// opening tag at all. For example, `<a></a></b>` will be permitted. |
59 | | /// |
60 | | /// Note that the emitted [`End`] event will not be modified if this is enabled, |
61 | | /// ie. it will contain the data of the unmatched end tag. |
62 | | /// |
63 | | /// Note, that setting this to `true` will lead to additional allocates that |
64 | | /// needed to store tag name for an [`End`] event. |
65 | | /// |
66 | | /// Default: `false` |
67 | | /// |
68 | | /// [`Error::IllFormed(UnmatchedEndTag)`]: crate::errors::IllFormedError::UnmatchedEndTag |
69 | | /// [`End`]: crate::events::Event::End |
70 | | pub allow_unmatched_ends: bool, |
71 | | |
72 | | /// Whether comments should be validated. If enabled, in case of invalid comment |
73 | | /// [`Error::IllFormed(DoubleHyphenInComment)`] is returned from read methods. |
74 | | /// |
75 | | /// When set to `true`, every [`Comment`] event will be checked for not |
76 | | /// containing `--`, which [is not allowed] in XML comments. Most of the time |
77 | | /// we don't want comments at all so we don't really care about comment |
78 | | /// correctness, thus the default value is `false` to improve performance. |
79 | | /// |
80 | | /// Default: `false` |
81 | | /// |
82 | | /// [`Error::IllFormed(DoubleHyphenInComment)`]: crate::errors::IllFormedError::DoubleHyphenInComment |
83 | | /// [`Comment`]: crate::events::Event::Comment |
84 | | /// [is not allowed]: https://www.w3.org/TR/xml11/#sec-comments |
85 | | pub check_comments: bool, |
86 | | |
87 | | /// Whether mismatched closing tag names should be detected. If enabled, in |
88 | | /// case of mismatch the [`Error::IllFormed(MismatchedEndTag)`] is returned from |
89 | | /// read methods. |
90 | | /// |
91 | | /// Note, that start and end tags [should match literally][spec], they cannot |
92 | | /// have different prefixes even if both prefixes resolve to the same namespace. |
93 | | /// The XML |
94 | | /// |
95 | | /// ```xml |
96 | | /// <outer xmlns="namespace" xmlns:p="namespace"> |
97 | | /// </p:outer> |
98 | | /// ``` |
99 | | /// |
100 | | /// is not valid, even though semantically the start tag is the same as the |
101 | | /// end tag. The reason is that namespaces are an extension of the original |
102 | | /// XML specification (without namespaces) and it should be backward-compatible. |
103 | | /// |
104 | | /// When set to `false`, it won't check if a closing tag matches the corresponding |
105 | | /// opening tag. For example, `<mytag></different_tag>` will be permitted. |
106 | | /// |
107 | | /// If the XML is known to be sane (already processed, etc.) this saves extra time. |
108 | | /// |
109 | | /// Note that the emitted [`End`] event will not be modified if this is disabled, |
110 | | /// ie. it will contain the data of the mismatched end tag. |
111 | | /// |
112 | | /// Note, that setting this to `true` will lead to additional allocates that |
113 | | /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`] |
114 | | /// is also set, only one additional allocation will be performed that support |
115 | | /// both these options. |
116 | | /// |
117 | | /// Default: `true` |
118 | | /// |
119 | | /// [`Error::IllFormed(MismatchedEndTag)`]: crate::errors::IllFormedError::MismatchedEndTag |
120 | | /// [spec]: https://www.w3.org/TR/xml11/#dt-etag |
121 | | /// [`End`]: crate::events::Event::End |
122 | | /// [`expand_empty_elements`]: Self::expand_empty_elements |
123 | | pub check_end_names: bool, |
124 | | |
125 | | /// Whether empty elements should be split into an `Open` and a `Close` event. |
126 | | /// |
127 | | /// When set to `true`, all [`Empty`] events produced by a self-closing tag |
128 | | /// like `<tag/>` are expanded into a [`Start`] event followed by an [`End`] |
129 | | /// event. When set to `false` (the default), those tags are represented by |
130 | | /// an [`Empty`] event instead. |
131 | | /// |
132 | | /// Note, that setting this to `true` will lead to additional allocates that |
133 | | /// needed to store tag name for an [`End`] event. However if [`check_end_names`] |
134 | | /// is also set, only one additional allocation will be performed that support |
135 | | /// both these options. |
136 | | /// |
137 | | /// Default: `false` |
138 | | /// |
139 | | /// [`Empty`]: crate::events::Event::Empty |
140 | | /// [`Start`]: crate::events::Event::Start |
141 | | /// [`End`]: crate::events::Event::End |
142 | | /// [`check_end_names`]: Self::check_end_names |
143 | | pub expand_empty_elements: bool, |
144 | | |
145 | | /// Whether trailing whitespace after the markup name are trimmed in closing |
146 | | /// tags `</a >`. |
147 | | /// |
148 | | /// If `true` the emitted [`End`] event is stripped of trailing whitespace |
149 | | /// after the markup name. |
150 | | /// |
151 | | /// Note that if set to `false` and [`check_end_names`] is `true` the comparison |
152 | | /// of markup names is going to fail erroneously if a closing tag contains |
153 | | /// trailing whitespace. |
154 | | /// |
155 | | /// Default: `true` |
156 | | /// |
157 | | /// [`End`]: crate::events::Event::End |
158 | | /// [`check_end_names`]: Self::check_end_names |
159 | | pub trim_markup_names_in_closing_tags: bool, |
160 | | |
161 | | /// Whether whitespace before character data should be removed. |
162 | | /// |
163 | | /// When set to `true`, leading whitespace is trimmed in [`Text`] events. |
164 | | /// If after that the event is empty it will not be pushed. |
165 | | /// |
166 | | /// Default: `false` |
167 | | /// |
168 | | /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;"> |
169 | | /// |
170 | | /// WARNING: With this option every text events will be trimmed which is |
171 | | /// incorrect behavior when text events delimited by comments, processing |
172 | | /// instructions or CDATA sections. To correctly trim data manually apply |
173 | | /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] |
174 | | /// only to necessary events. |
175 | | /// </div> |
176 | | /// |
177 | | /// [`Text`]: crate::events::Event::Text |
178 | | /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start |
179 | | /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end |
180 | | pub trim_text_start: bool, |
181 | | |
182 | | /// Whether whitespace after character data should be removed. |
183 | | /// |
184 | | /// When set to `true`, trailing whitespace is trimmed in [`Text`] events. |
185 | | /// If after that the event is empty it will not be pushed. |
186 | | /// |
187 | | /// Default: `false` |
188 | | /// |
189 | | /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;"> |
190 | | /// |
191 | | /// WARNING: With this option every text events will be trimmed which is |
192 | | /// incorrect behavior when text events delimited by comments, processing |
193 | | /// instructions or CDATA sections. To correctly trim data manually apply |
194 | | /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] |
195 | | /// only to necessary events. |
196 | | /// </div> |
197 | | /// |
198 | | /// [`Text`]: crate::events::Event::Text |
199 | | /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start |
200 | | /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end |
201 | | pub trim_text_end: bool, |
202 | | } |
203 | | |
204 | | impl Config { |
205 | | /// Set both [`trim_text_start`] and [`trim_text_end`] to the same value. |
206 | | /// |
207 | | /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;"> |
208 | | /// |
209 | | /// WARNING: With this option every text events will be trimmed which is |
210 | | /// incorrect behavior when text events delimited by comments, processing |
211 | | /// instructions or CDATA sections. To correctly trim data manually apply |
212 | | /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] |
213 | | /// only to necessary events. |
214 | | /// </div> |
215 | | /// |
216 | | /// [`trim_text_start`]: Self::trim_text_start |
217 | | /// [`trim_text_end`]: Self::trim_text_end |
218 | | /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start |
219 | | /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end |
220 | | #[inline] |
221 | 0 | pub fn trim_text(&mut self, trim: bool) { |
222 | 0 | self.trim_text_start = trim; |
223 | 0 | self.trim_text_end = trim; |
224 | 0 | } |
225 | | |
226 | | /// Turn on or off all checks for well-formedness. Currently it is that settings: |
227 | | /// - [`check_comments`](Self::check_comments) |
228 | | /// - [`check_end_names`](Self::check_end_names) |
229 | | #[inline] |
230 | 0 | pub fn enable_all_checks(&mut self, enable: bool) { |
231 | 0 | self.check_comments = enable; |
232 | 0 | self.check_end_names = enable; |
233 | 0 | } |
234 | | } |
235 | | |
236 | | impl Default for Config { |
237 | 0 | fn default() -> Self { |
238 | 0 | Self { |
239 | 0 | allow_dangling_amp: false, |
240 | 0 | allow_unmatched_ends: false, |
241 | 0 | check_comments: false, |
242 | 0 | check_end_names: true, |
243 | 0 | expand_empty_elements: false, |
244 | 0 | trim_markup_names_in_closing_tags: true, |
245 | 0 | trim_text_start: false, |
246 | 0 | trim_text_end: false, |
247 | 0 | } |
248 | 0 | } |
249 | | } |
250 | | |
251 | | //////////////////////////////////////////////////////////////////////////////////////////////////// |
252 | | |
253 | | macro_rules! read_event_impl { |
254 | | ( |
255 | | $self:ident, $buf:ident, |
256 | | $reader:expr, |
257 | | $read_until_close:ident |
258 | | $(, $await:ident)? |
259 | | ) => {{ |
260 | | let event = loop { |
261 | | break match $self.state.state { |
262 | | ParseState::Init => { // Go to InsideText state |
263 | | // If encoding set explicitly, we not need to detect it. For example, |
264 | | // explicit UTF-8 set automatically if Reader was created using `from_str`. |
265 | | // But we still need to remove BOM for consistency with no encoding |
266 | | // feature enabled path |
267 | | #[cfg(feature = "encoding")] |
268 | | if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? { |
269 | | if $self.state.encoding.can_be_refined() { |
270 | | $self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding); |
271 | | } |
272 | | } |
273 | | |
274 | | // Removes UTF-8 BOM if it is present |
275 | | #[cfg(not(feature = "encoding"))] |
276 | | $reader.remove_utf8_bom() $(.$await)? ?; |
277 | | |
278 | | $self.state.state = ParseState::InsideText; |
279 | | continue; |
280 | | }, |
281 | | ParseState::InsideRef => { // Go to InsideText |
282 | | let start = $self.state.offset; |
283 | | match $reader.read_ref($buf, &mut $self.state.offset) $(.$await)? { |
284 | | // Emit reference, go to InsideText state |
285 | | ReadRefResult::Ref(bytes) => { |
286 | | $self.state.state = ParseState::InsideText; |
287 | | // +1 to skip start `&` |
288 | | Ok(Event::GeneralRef(BytesRef::wrap(&bytes[1..], $self.decoder()))) |
289 | | } |
290 | | // Go to Done state |
291 | | ReadRefResult::UpToEof(bytes) if $self.state.config.allow_dangling_amp => { |
292 | | $self.state.state = ParseState::Done; |
293 | | Ok(Event::Text($self.state.emit_text(bytes))) |
294 | | } |
295 | | ReadRefResult::UpToEof(_) => { |
296 | | $self.state.state = ParseState::Done; |
297 | | $self.state.last_error_offset = start; |
298 | | Err(Error::IllFormed(IllFormedError::UnclosedReference)) |
299 | | } |
300 | | // Do not change state, stay in InsideRef |
301 | | ReadRefResult::UpToRef(bytes) if $self.state.config.allow_dangling_amp => { |
302 | | Ok(Event::Text($self.state.emit_text(bytes))) |
303 | | } |
304 | | ReadRefResult::UpToRef(_) => { |
305 | | $self.state.last_error_offset = start; |
306 | | Err(Error::IllFormed(IllFormedError::UnclosedReference)) |
307 | | } |
308 | | // Go to InsideMarkup state |
309 | | ReadRefResult::UpToMarkup(bytes) if $self.state.config.allow_dangling_amp => { |
310 | | $self.state.state = ParseState::InsideMarkup; |
311 | | Ok(Event::Text($self.state.emit_text(bytes))) |
312 | | } |
313 | | ReadRefResult::UpToMarkup(_) => { |
314 | | $self.state.state = ParseState::InsideMarkup; |
315 | | $self.state.last_error_offset = start; |
316 | | Err(Error::IllFormed(IllFormedError::UnclosedReference)) |
317 | | } |
318 | | ReadRefResult::Err(e) => Err(Error::Io(e.into())), |
319 | | } |
320 | | } |
321 | | ParseState::InsideText => { // Go to InsideMarkup or Done state |
322 | | if $self.state.config.trim_text_start { |
323 | | $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?; |
324 | | } |
325 | | |
326 | | match $reader.read_text($buf, &mut $self.state.offset) $(.$await)? { |
327 | | ReadTextResult::Markup(buf) => { |
328 | | $self.state.state = ParseState::InsideMarkup; |
329 | | // Pass `buf` to the next next iteration of parsing loop |
330 | | $buf = buf; |
331 | | continue; |
332 | | } |
333 | | ReadTextResult::Ref(buf) => { |
334 | | $self.state.state = ParseState::InsideRef; |
335 | | // Pass `buf` to the next next iteration of parsing loop |
336 | | $buf = buf; |
337 | | continue; |
338 | | } |
339 | | ReadTextResult::UpToMarkup(bytes) => { |
340 | | $self.state.state = ParseState::InsideMarkup; |
341 | | // FIXME: Can produce an empty event if: |
342 | | // - event contains only spaces |
343 | | // - trim_text_start = false |
344 | | // - trim_text_end = true |
345 | | Ok(Event::Text($self.state.emit_text(bytes))) |
346 | | } |
347 | | ReadTextResult::UpToRef(bytes) => { |
348 | | $self.state.state = ParseState::InsideRef; |
349 | | // Return Text event with `bytes` content or Eof if bytes is empty |
350 | | Ok(Event::Text($self.state.emit_text(bytes))) |
351 | | } |
352 | | ReadTextResult::UpToEof(bytes) => { |
353 | | $self.state.state = ParseState::Done; |
354 | | // Trim bytes from end if required |
355 | | let event = $self.state.emit_text(bytes); |
356 | | if event.is_empty() { |
357 | | Ok(Event::Eof) |
358 | | } else { |
359 | | Ok(Event::Text(event)) |
360 | | } |
361 | | } |
362 | | ReadTextResult::Err(e) => Err(Error::Io(e.into())), |
363 | | } |
364 | | }, |
365 | | // Go to InsideText state in next two arms |
366 | | ParseState::InsideMarkup => $self.$read_until_close($buf) $(.$await)?, |
367 | | ParseState::InsideEmpty => Ok(Event::End($self.state.close_expanded_empty())), |
368 | | ParseState::Done => Ok(Event::Eof), |
369 | | }; |
370 | | }; |
371 | | match event { |
372 | | // #513: In case of ill-formed errors we already consume the wrong data |
373 | | // and change the state. We can continue parsing if we wish |
374 | | Err(Error::IllFormed(_)) => {} |
375 | | Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Done, |
376 | | _ => {} |
377 | | } |
378 | | event |
379 | | }}; |
380 | | } |
381 | | |
382 | | /// Read bytes up to the `>` and skip it. This method is expected to be called |
383 | | /// after seeing the `<` symbol and skipping it. Inspects the next (current) |
384 | | /// symbol and returns an appropriate [`Event`]: |
385 | | /// |
386 | | /// |Symbol |Event |
387 | | /// |-------|------------------------------------- |
388 | | /// |`!` |[`Comment`], [`CData`] or [`DocType`] |
389 | | /// |`/` |[`End`] |
390 | | /// |`?` |[`PI`] |
391 | | /// |_other_|[`Start`] or [`Empty`] |
392 | | /// |
393 | | /// Moves parser to the `InsideText` state. |
394 | | /// |
395 | | /// [`Comment`]: Event::Comment |
396 | | /// [`CData`]: Event::CData |
397 | | /// [`DocType`]: Event::DocType |
398 | | /// [`End`]: Event::End |
399 | | /// [`PI`]: Event::PI |
400 | | /// [`Start`]: Event::Start |
401 | | /// [`Empty`]: Event::Empty |
402 | | macro_rules! read_until_close { |
403 | | ( |
404 | | $self:ident, $buf:ident, |
405 | | $reader:expr |
406 | | $(, $await:ident)? |
407 | | ) => {{ |
408 | | $self.state.state = ParseState::InsideText; |
409 | | |
410 | | let start = $self.state.offset; |
411 | | match $reader.peek_one() $(.$await)? { |
412 | | // `<!` - comment, CDATA or DOCTYPE declaration |
413 | | Ok(Some(b'!')) => match $reader |
414 | | .read_bang_element($buf, &mut $self.state.offset) |
415 | | $(.$await)? |
416 | | { |
417 | | Ok((bang_type, bytes)) => $self.state.emit_bang(bang_type, bytes), |
418 | | Err(e) => { |
419 | | // We want to report error at `<`, but offset was increased, |
420 | | // so return it back (-1 for `<`) |
421 | | $self.state.last_error_offset = start - 1; |
422 | | Err(e) |
423 | | } |
424 | | }, |
425 | | // `</` - closing tag |
426 | | // #776: We parse using ElementParser which allows us to have attributes |
427 | | // in close tags. While such tags are not allowed by the specification, |
428 | | // we anyway allow to parse them because: |
429 | | // - we do not check constraints during parsing. This is performed by the |
430 | | // optional validate step which user should call manually |
431 | | // - if we just look for `>` we will parse `</tag attr=">" >` as end tag |
432 | | // `</tag attr=">` and text `" >` which probably no one existing parser |
433 | | // does. This is malformed XML, however it is tolerated by some parsers |
434 | | // (e.g. the one used by Adobe Flash) and such documents do exist in the wild. |
435 | | Ok(Some(b'/')) => match $reader |
436 | | .read_with(ElementParser::Outside, $buf, &mut $self.state.offset) |
437 | | $(.$await)? |
438 | | { |
439 | | Ok(bytes) => $self.state.emit_end(bytes), |
440 | | Err(e) => { |
441 | | // We want to report error at `<`, but offset was increased, |
442 | | // so return it back (-1 for `<`) |
443 | | $self.state.last_error_offset = start - 1; |
444 | | Err(e) |
445 | | } |
446 | | }, |
447 | | // `<?` - processing instruction |
448 | | Ok(Some(b'?')) => match $reader |
449 | | .read_with(PiParser(false), $buf, &mut $self.state.offset) |
450 | | $(.$await)? |
451 | | { |
452 | | Ok(bytes) => $self.state.emit_question_mark(bytes), |
453 | | Err(e) => { |
454 | | // We want to report error at `<`, but offset was increased, |
455 | | // so return it back (-1 for `<`) |
456 | | $self.state.last_error_offset = start - 1; |
457 | | Err(e) |
458 | | } |
459 | | }, |
460 | | // `<...` - opening or self-closed tag |
461 | | Ok(Some(_)) => match $reader |
462 | | .read_with(ElementParser::Outside, $buf, &mut $self.state.offset) |
463 | | $(.$await)? |
464 | | { |
465 | | Ok(bytes) => Ok($self.state.emit_start(bytes)), |
466 | | Err(e) => { |
467 | | // We want to report error at `<`, but offset was increased, |
468 | | // so return it back (-1 for `<`) |
469 | | $self.state.last_error_offset = start - 1; |
470 | | Err(e) |
471 | | } |
472 | | }, |
473 | | // `<` - syntax error, tag not closed |
474 | | Ok(None) => { |
475 | | // We want to report error at `<`, but offset was increased, |
476 | | // so return it back (-1 for `<`) |
477 | | $self.state.last_error_offset = start - 1; |
478 | | Err(Error::Syntax(SyntaxError::UnclosedTag)) |
479 | | } |
480 | | Err(e) => Err(Error::Io(e.into())), |
481 | | } |
482 | | }}; |
483 | | } |
484 | | |
485 | | /// Generalization of `read_to_end` method for buffered and borrowed readers |
486 | | macro_rules! read_to_end { |
487 | | ( |
488 | | // $self: &mut Reader |
489 | | $self:expr, $end:expr, $buf:expr, |
490 | | $read_event:ident, |
491 | | // Code block that performs clearing of internal buffer after read of each event |
492 | | $clear:block |
493 | | $(, $await:ident)? |
494 | | ) => {{ |
495 | | // Because we take position after the event before the End event, |
496 | | // it is important that this position indicates beginning of the End event. |
497 | | // If between last event and the End event would be only spaces, then we |
498 | | // take position before the spaces, but spaces would be skipped without |
499 | | // generating event if `trim_text_start` is set to `true`. To prevent that |
500 | | // we temporary disable start text trimming. |
501 | | // |
502 | | // We also cannot take position after getting End event, because if |
503 | | // `trim_markup_names_in_closing_tags` is set to `true` (which is the default), |
504 | | // we do not known the real size of the End event that it is occupies in |
505 | | // the source and cannot correct the position after the End event. |
506 | | // So, we in any case should tweak parser configuration. |
507 | | let config = $self.config_mut(); |
508 | | let trim = config.trim_text_start; |
509 | | config.trim_text_start = false; |
510 | | |
511 | | let start = $self.buffer_position(); |
512 | | let mut depth = 0; |
513 | | loop { |
514 | | $clear |
515 | | let end = $self.buffer_position(); |
516 | | match $self.$read_event($buf) $(.$await)? { |
517 | | Err(e) => { |
518 | | $self.config_mut().trim_text_start = trim; |
519 | | return Err(e); |
520 | | } |
521 | | |
522 | | Ok(Event::Start(e)) if e.name() == $end => depth += 1, |
523 | | Ok(Event::End(e)) if e.name() == $end => { |
524 | | if depth == 0 { |
525 | | $self.config_mut().trim_text_start = trim; |
526 | | break start..end; |
527 | | } |
528 | | depth -= 1; |
529 | | } |
530 | | Ok(Event::Eof) => { |
531 | | $self.config_mut().trim_text_start = trim; |
532 | | return Err(Error::missed_end($end, $self.decoder())); |
533 | | } |
534 | | _ => (), |
535 | | } |
536 | | } |
537 | | }}; |
538 | | } |
539 | | |
540 | | #[cfg(feature = "async-tokio")] |
541 | | mod async_tokio; |
542 | | mod buffered_reader; |
543 | | mod ns_reader; |
544 | | mod slice_reader; |
545 | | mod state; |
546 | | |
547 | | pub use ns_reader::NsReader; |
548 | | |
549 | | /// Range of input in bytes, that corresponds to some piece of XML |
550 | | pub type Span = Range<u64>; |
551 | | |
552 | | //////////////////////////////////////////////////////////////////////////////////////////////////// |
553 | | |
554 | | /// Possible reader states. The state transition diagram (`true` and `false` shows |
555 | | /// value of [`Config::expand_empty_elements`] option): |
556 | | /// |
557 | | /// ```mermaid |
558 | | /// flowchart LR |
559 | | /// subgraph _ |
560 | | /// direction LR |
561 | | /// |
562 | | /// Init -- "(no event)"\n --> InsideMarkup |
563 | | /// InsideMarkup -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> InsideText |
564 | | /// InsideText -- "#lt;false#gt;\n(no event)"\nText --> InsideMarkup |
565 | | /// InsideRef -- "(no event)"\nGeneralRef --> InsideText |
566 | | /// end |
567 | | /// InsideText -- "#lt;true#gt;"\nStart --> InsideEmpty |
568 | | /// InsideEmpty -- End --> InsideText |
569 | | /// _ -. Eof .-> Done |
570 | | /// ``` |
571 | | #[derive(Clone, Debug)] |
572 | | enum ParseState { |
573 | | /// Initial state in which reader stay after creation. Transition from that |
574 | | /// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next |
575 | | /// state is always `InsideMarkup`. The reader will never return to this state. The |
576 | | /// event emitted during transition to `InsideMarkup` is a `StartEvent` if the |
577 | | /// first symbol not `<`, otherwise no event are emitted. |
578 | | Init, |
579 | | /// State after seeing the `&` symbol in textual content. Depending on the next symbol all other |
580 | | /// events could be generated. |
581 | | /// |
582 | | /// After generating one event the reader moves to the `ClosedTag` state. |
583 | | InsideRef, |
584 | | /// State after seeing the `<` symbol. Depending on the next symbol all other |
585 | | /// events could be generated. |
586 | | /// |
587 | | /// After generating one event the reader moves to the `InsideText` state. |
588 | | InsideMarkup, |
589 | | /// State in which reader searches the `<` symbol of a markup. All bytes before |
590 | | /// that symbol will be returned in the [`Event::Text`] event. After that |
591 | | /// the reader moves to the `InsideMarkup` state. |
592 | | InsideText, |
593 | | /// This state is used only if option [`expand_empty_elements`] is set to `true`. |
594 | | /// Reader enters to this state when it is in a `InsideText` state and emits an |
595 | | /// [`Event::Start`] event. The next event emitted will be an [`Event::End`], |
596 | | /// after which reader returned to the `InsideText` state. |
597 | | /// |
598 | | /// [`expand_empty_elements`]: Config::expand_empty_elements |
599 | | InsideEmpty, |
600 | | /// Reader enters this state when `Eof` event generated or an error occurred. |
601 | | /// This is the last state, the reader stay in it forever. |
602 | | Done, |
603 | | } |
604 | | |
605 | | /// A reference to an encoding together with information about how it was retrieved. |
606 | | /// |
607 | | /// The state transition diagram: |
608 | | /// |
609 | | /// ```mermaid |
610 | | /// flowchart LR |
611 | | /// Implicit -- from_str --> Explicit |
612 | | /// Implicit -- BOM --> BomDetected |
613 | | /// Implicit -- "encoding=..." --> XmlDetected |
614 | | /// BomDetected -- "encoding=..." --> XmlDetected |
615 | | /// ``` |
616 | | #[cfg(feature = "encoding")] |
617 | | #[derive(Clone, Copy, Debug)] |
618 | | enum EncodingRef { |
619 | | /// Encoding was implicitly assumed to have a specified value. It can be refined |
620 | | /// using BOM or by the XML declaration event (`<?xml encoding=... ?>`) |
621 | | Implicit(&'static Encoding), |
622 | | /// Encoding was explicitly set to the desired value. It cannot be changed |
623 | | /// nor by BOM, nor by parsing XML declaration (`<?xml encoding=... ?>`) |
624 | | Explicit(&'static Encoding), |
625 | | /// Encoding was detected from a byte order mark (BOM) or by the first bytes |
626 | | /// of the content. It can be refined by the XML declaration event (`<?xml encoding=... ?>`) |
627 | | BomDetected(&'static Encoding), |
628 | | /// Encoding was detected using XML declaration event (`<?xml encoding=... ?>`). |
629 | | /// It can no longer change |
630 | | XmlDetected(&'static Encoding), |
631 | | } |
632 | | #[cfg(feature = "encoding")] |
633 | | impl EncodingRef { |
634 | | #[inline] |
635 | | const fn encoding(&self) -> &'static Encoding { |
636 | | match self { |
637 | | Self::Implicit(e) => e, |
638 | | Self::Explicit(e) => e, |
639 | | Self::BomDetected(e) => e, |
640 | | Self::XmlDetected(e) => e, |
641 | | } |
642 | | } |
643 | | #[inline] |
644 | | const fn can_be_refined(&self) -> bool { |
645 | | match self { |
646 | | Self::Implicit(_) | Self::BomDetected(_) => true, |
647 | | Self::Explicit(_) | Self::XmlDetected(_) => false, |
648 | | } |
649 | | } |
650 | | } |
651 | | |
652 | | //////////////////////////////////////////////////////////////////////////////////////////////////// |
653 | | |
654 | | /// A direct stream to the underlying [`Reader`]s reader which updates |
655 | | /// [`Reader::buffer_position()`] when read from it. |
656 | | #[derive(Debug)] |
657 | | #[must_use = "streams do nothing unless read or polled"] |
658 | | pub struct BinaryStream<'r, R> { |
659 | | inner: &'r mut R, |
660 | | offset: &'r mut u64, |
661 | | } |
662 | | |
663 | | impl<'r, R> BinaryStream<'r, R> { |
664 | | /// Returns current position in bytes in the original source. |
665 | | #[inline] |
666 | 0 | pub const fn offset(&self) -> u64 { |
667 | 0 | *self.offset |
668 | 0 | } |
669 | | |
670 | | /// Gets a reference to the underlying reader. |
671 | | #[inline] |
672 | 0 | pub const fn get_ref(&self) -> &R { |
673 | 0 | self.inner |
674 | 0 | } |
675 | | |
676 | | /// Gets a mutable reference to the underlying reader. |
677 | | /// |
678 | | /// Avoid read from this reader because this will not update reader's position |
679 | | /// and will lead to incorrect positions of errors. Read from this stream instead. |
680 | | #[inline] |
681 | 0 | pub fn get_mut(&mut self) -> &mut R { |
682 | 0 | self.inner |
683 | 0 | } |
684 | | } |
685 | | |
686 | | impl<'r, R> io::Read for BinaryStream<'r, R> |
687 | | where |
688 | | R: io::Read, |
689 | | { |
690 | | #[inline] |
691 | 0 | fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
692 | 0 | let amt = self.inner.read(buf)?; |
693 | 0 | *self.offset += amt as u64; |
694 | 0 | Ok(amt) |
695 | 0 | } |
696 | | } |
697 | | |
698 | | impl<'r, R> io::BufRead for BinaryStream<'r, R> |
699 | | where |
700 | | R: io::BufRead, |
701 | | { |
702 | | #[inline] |
703 | 0 | fn fill_buf(&mut self) -> io::Result<&[u8]> { |
704 | 0 | self.inner.fill_buf() |
705 | 0 | } |
706 | | |
707 | | #[inline] |
708 | 0 | fn consume(&mut self, amt: usize) { |
709 | 0 | self.inner.consume(amt); |
710 | 0 | *self.offset += amt as u64; |
711 | 0 | } |
712 | | } |
713 | | |
714 | | //////////////////////////////////////////////////////////////////////////////////////////////////// |
715 | | |
716 | | /// A low level encoding-agnostic XML event reader. |
717 | | /// |
718 | | /// Consumes bytes and streams XML [`Event`]s. |
719 | | /// |
720 | | /// This reader does not manage namespace declarations and not able to resolve |
721 | | /// prefixes. If you want these features, use the [`NsReader`]. |
722 | | /// |
723 | | /// # Examples |
724 | | /// |
725 | | /// ``` |
726 | | /// use quick_xml::events::Event; |
727 | | /// use quick_xml::reader::Reader; |
728 | | /// |
729 | | /// let xml = r#"<tag1 att1 = "test"> |
730 | | /// <tag2><!--Test comment-->Test</tag2> |
731 | | /// <tag2>Test 2</tag2> |
732 | | /// </tag1>"#; |
733 | | /// let mut reader = Reader::from_str(xml); |
734 | | /// reader.config_mut().trim_text(true); |
735 | | /// |
736 | | /// let mut count = 0; |
737 | | /// let mut txt = Vec::new(); |
738 | | /// let mut buf = Vec::new(); |
739 | | /// |
740 | | /// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s) |
741 | | /// loop { |
742 | | /// // NOTE: this is the generic case when we don't know about the input BufRead. |
743 | | /// // when the input is a &str or a &[u8], we don't actually need to use another |
744 | | /// // buffer, we could directly call `reader.read_event()` |
745 | | /// match reader.read_event_into(&mut buf) { |
746 | | /// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), |
747 | | /// // exits the loop when reaching end of file |
748 | | /// Ok(Event::Eof) => break, |
749 | | /// |
750 | | /// Ok(Event::Start(e)) => { |
751 | | /// match e.name().as_ref() { |
752 | | /// b"tag1" => println!("attributes values: {:?}", |
753 | | /// e.attributes().map(|a| a.unwrap().value) |
754 | | /// .collect::<Vec<_>>()), |
755 | | /// b"tag2" => count += 1, |
756 | | /// _ => (), |
757 | | /// } |
758 | | /// } |
759 | | /// Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()), |
760 | | /// |
761 | | /// // There are several other `Event`s we do not consider here |
762 | | /// _ => (), |
763 | | /// } |
764 | | /// // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low |
765 | | /// buf.clear(); |
766 | | /// } |
767 | | /// ``` |
768 | | /// |
769 | | /// [`NsReader`]: crate::reader::NsReader |
770 | | #[derive(Debug, Clone)] |
771 | | pub struct Reader<R> { |
772 | | /// Source of data for parse |
773 | | reader: R, |
774 | | /// Configuration and current parse state |
775 | | state: ReaderState, |
776 | | } |
777 | | |
778 | | /// Builder methods |
779 | | impl<R> Reader<R> { |
780 | | /// Creates a `Reader` that reads from a given reader. |
781 | 0 | pub fn from_reader(reader: R) -> Self { |
782 | 0 | Self { |
783 | 0 | reader, |
784 | 0 | state: ReaderState::default(), |
785 | 0 | } |
786 | 0 | } Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<bytes::bytes::Bytes>>>::from_reader Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<opendal::types::buffer::Buffer>>>::from_reader Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<&[u8]>>>::from_reader Unexecuted instantiation: <quick_xml::reader::Reader<&[u8]>>::from_reader |
787 | | |
788 | | /// Returns reference to the parser configuration |
789 | 0 | pub const fn config(&self) -> &Config { |
790 | 0 | &self.state.config |
791 | 0 | } |
792 | | |
793 | | /// Returns mutable reference to the parser configuration |
794 | 0 | pub fn config_mut(&mut self) -> &mut Config { |
795 | 0 | &mut self.state.config |
796 | 0 | } Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<bytes::bytes::Bytes>>>::config_mut Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<opendal::types::buffer::Buffer>>>::config_mut Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<&[u8]>>>::config_mut Unexecuted instantiation: <quick_xml::reader::Reader<&[u8]>>::config_mut |
797 | | } |
798 | | |
799 | | /// Getters |
800 | | impl<R> Reader<R> { |
801 | | /// Consumes `Reader` returning the underlying reader |
802 | | /// |
803 | | /// Can be used to compute line and column of a parsing error position |
804 | | /// |
805 | | /// # Examples |
806 | | /// |
807 | | /// ``` |
808 | | /// # use pretty_assertions::assert_eq; |
809 | | /// use std::{str, io::Cursor}; |
810 | | /// use quick_xml::events::Event; |
811 | | /// use quick_xml::reader::Reader; |
812 | | /// |
813 | | /// let xml = r#"<tag1 att1 = "test"> |
814 | | /// <tag2><!--Test comment-->Test</tag2> |
815 | | /// <tag3>Test 2</tag3> |
816 | | /// </tag1>"#; |
817 | | /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes())); |
818 | | /// let mut buf = Vec::new(); |
819 | | /// |
820 | | /// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) { |
821 | | /// // We known that size cannot exceed usize::MAX because we created parser from single &[u8] |
822 | | /// let end_pos = reader.buffer_position() as usize; |
823 | | /// let mut cursor = reader.into_inner(); |
824 | | /// let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned()) |
825 | | /// .expect("can't make a string"); |
826 | | /// let mut line = 1; |
827 | | /// let mut column = 0; |
828 | | /// for c in s.chars() { |
829 | | /// if c == '\n' { |
830 | | /// line += 1; |
831 | | /// column = 0; |
832 | | /// } else { |
833 | | /// column += 1; |
834 | | /// } |
835 | | /// } |
836 | | /// (line, column) |
837 | | /// } |
838 | | /// |
839 | | /// loop { |
840 | | /// match reader.read_event_into(&mut buf) { |
841 | | /// Ok(Event::Start(ref e)) => match e.name().as_ref() { |
842 | | /// b"tag1" | b"tag2" => (), |
843 | | /// tag => { |
844 | | /// assert_eq!(b"tag3", tag); |
845 | | /// assert_eq!((3, 22), into_line_and_column(reader)); |
846 | | /// break; |
847 | | /// } |
848 | | /// }, |
849 | | /// Ok(Event::Eof) => unreachable!(), |
850 | | /// _ => (), |
851 | | /// } |
852 | | /// buf.clear(); |
853 | | /// } |
854 | | /// ``` |
855 | 0 | pub fn into_inner(self) -> R { |
856 | 0 | self.reader |
857 | 0 | } |
858 | | |
859 | | /// Gets a reference to the underlying reader. |
860 | 0 | pub const fn get_ref(&self) -> &R { |
861 | 0 | &self.reader |
862 | 0 | } |
863 | | |
864 | | /// Gets a mutable reference to the underlying reader. |
865 | | /// |
866 | | /// Avoid read from this reader because this will not update reader's position |
867 | | /// and will lead to incorrect positions of errors. If you want to read, use |
868 | | /// [`stream()`] instead. |
869 | | /// |
870 | | /// [`stream()`]: Self::stream |
871 | 0 | pub fn get_mut(&mut self) -> &mut R { |
872 | 0 | &mut self.reader |
873 | 0 | } |
874 | | |
875 | | /// Gets the byte position in the input data just after the last emitted event |
876 | | /// (i.e. this is position where data of last event ends). |
877 | | /// |
878 | | /// Note, that for text events which is originally ended with whitespace characters |
879 | | /// (` `, `\t`, `\r`, and `\n`) if [`Config::trim_text_end`] is set this is position |
880 | | /// before trim, not the position of the last byte of the [`Event::Text`] content. |
881 | 0 | pub const fn buffer_position(&self) -> u64 { |
882 | | // when internal state is InsideMarkup, we have actually read until '<', |
883 | | // which we don't want to show |
884 | 0 | if let ParseState::InsideMarkup = self.state.state { |
885 | 0 | self.state.offset - 1 |
886 | | } else { |
887 | 0 | self.state.offset |
888 | | } |
889 | 0 | } Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<bytes::bytes::Bytes>>>::buffer_position Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<opendal::types::buffer::Buffer>>>::buffer_position Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<&[u8]>>>::buffer_position Unexecuted instantiation: <quick_xml::reader::Reader<&[u8]>>::buffer_position |
890 | | |
891 | | /// Gets the last error byte position in the input data. If there is no errors |
892 | | /// yet, returns `0`. |
893 | | /// |
894 | | /// Unlike `buffer_position` it will point to the place where it is rational |
895 | | /// to report error to the end user. For example, all [`SyntaxError`]s are |
896 | | /// reported when the parser sees EOF inside of some kind of markup. The |
897 | | /// `buffer_position()` will point to the last byte of input which is not |
898 | | /// very useful. `error_position()` will point to the start of corresponding |
899 | | /// markup element (i. e. to the `<` character). |
900 | | /// |
901 | | /// This position is always `<= buffer_position()`. |
902 | 0 | pub const fn error_position(&self) -> u64 { |
903 | 0 | self.state.last_error_offset |
904 | 0 | } |
905 | | |
906 | | /// Get the decoder, used to decode bytes, read by this reader, to the strings. |
907 | | /// |
908 | | /// If [`encoding`] feature is enabled, the used encoding may change after |
909 | | /// parsing the XML declaration, otherwise encoding is fixed to UTF-8. |
910 | | /// |
911 | | /// If [`encoding`] feature is enabled and no encoding is specified in declaration, |
912 | | /// defaults to UTF-8. |
913 | | /// |
914 | | /// [`encoding`]: ../index.html#encoding |
915 | | #[inline] |
916 | 0 | pub const fn decoder(&self) -> Decoder { |
917 | 0 | self.state.decoder() |
918 | 0 | } Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<bytes::bytes::Bytes>>>::decoder Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<opendal::types::buffer::Buffer>>>::decoder Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<&[u8]>>>::decoder Unexecuted instantiation: <quick_xml::reader::Reader<&[u8]>>::decoder |
919 | | |
920 | | /// Get the direct access to the underlying reader, but tracks the amount of |
921 | | /// read data and update [`Reader::buffer_position()`] accordingly. |
922 | | /// |
923 | | /// Note, that this method gives you access to the internal reader and read |
924 | | /// data will not be returned in any subsequent events read by `read_event` |
925 | | /// family of methods. |
926 | | /// |
927 | | /// # Example |
928 | | /// |
929 | | /// This example demonstrates how to read stream raw bytes from an XML document. |
930 | | /// This could be used to implement streaming read of text, or to read raw binary |
931 | | /// bytes embedded in an XML document. (Documents with embedded raw bytes are not |
932 | | /// valid XML, but XML-derived file formats exist where such documents are valid). |
933 | | /// |
934 | | /// ``` |
935 | | /// # use pretty_assertions::assert_eq; |
936 | | /// use std::io::{BufRead, Read}; |
937 | | /// use quick_xml::events::{BytesEnd, BytesStart, Event}; |
938 | | /// use quick_xml::reader::Reader; |
939 | | /// |
940 | | /// let mut reader = Reader::from_str("<tag>binary << data&></tag>"); |
941 | | /// // ^ ^ ^ ^ |
942 | | /// // 0 5 21 27 |
943 | | /// |
944 | | /// assert_eq!( |
945 | | /// (reader.read_event().unwrap(), reader.buffer_position()), |
946 | | /// // 5 - end of the `<tag>` |
947 | | /// (Event::Start(BytesStart::new("tag")), 5) |
948 | | /// ); |
949 | | /// |
950 | | /// // Reading directly from underlying reader will not update position |
951 | | /// // let mut inner = reader.get_mut(); |
952 | | /// |
953 | | /// // Reading from the stream() advances position |
954 | | /// let mut inner = reader.stream(); |
955 | | /// |
956 | | /// // Read binary data. We must know its size |
957 | | /// let mut binary = [0u8; 16]; |
958 | | /// inner.read_exact(&mut binary).unwrap(); |
959 | | /// assert_eq!(&binary, b"binary << data&>"); |
960 | | /// // 21 - end of the `binary << data&>` |
961 | | /// assert_eq!(inner.offset(), 21); |
962 | | /// assert_eq!(reader.buffer_position(), 21); |
963 | | /// |
964 | | /// assert_eq!( |
965 | | /// (reader.read_event().unwrap(), reader.buffer_position()), |
966 | | /// // 27 - end of the `</tag>` |
967 | | /// (Event::End(BytesEnd::new("tag")), 27) |
968 | | /// ); |
969 | | /// |
970 | | /// assert_eq!(reader.read_event().unwrap(), Event::Eof); |
971 | | /// ``` |
972 | | #[inline] |
973 | 0 | pub fn stream(&mut self) -> BinaryStream<'_, R> { |
974 | 0 | BinaryStream { |
975 | 0 | inner: &mut self.reader, |
976 | 0 | offset: &mut self.state.offset, |
977 | 0 | } |
978 | 0 | } |
979 | | } |
980 | | |
981 | | /// Private sync reading methods |
982 | | impl<R> Reader<R> { |
983 | | /// Read text into the given buffer, and return an event that borrows from |
984 | | /// either that buffer or from the input itself, based on the type of the |
985 | | /// reader. |
986 | 0 | fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result<Event<'i>, Error> |
987 | 0 | where |
988 | 0 | R: XmlSource<'i, B>, |
989 | | { |
990 | 0 | read_event_impl!(self, buf, self.reader, read_until_close) |
991 | 0 | } Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<bytes::bytes::Bytes>>>::read_event_impl::<&mut alloc::vec::Vec<u8>> Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<opendal::types::buffer::Buffer>>>::read_event_impl::<&mut alloc::vec::Vec<u8>> Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<&[u8]>>>::read_event_impl::<&mut alloc::vec::Vec<u8>> Unexecuted instantiation: <quick_xml::reader::Reader<&[u8]>>::read_event_impl::<()> |
992 | | |
993 | | /// Private function to read until `>` is found. This function expects that |
994 | | /// it was called just after encounter a `<` symbol. |
995 | 0 | fn read_until_close<'i, B>(&mut self, buf: B) -> Result<Event<'i>, Error> |
996 | 0 | where |
997 | 0 | R: XmlSource<'i, B>, |
998 | | { |
999 | 0 | read_until_close!(self, buf, self.reader) |
1000 | 0 | } Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<bytes::bytes::Bytes>>>::read_until_close::<&mut alloc::vec::Vec<u8>> Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<opendal::types::buffer::Buffer>>>::read_until_close::<&mut alloc::vec::Vec<u8>> Unexecuted instantiation: <quick_xml::reader::Reader<bytes::buf::reader::Reader<&[u8]>>>::read_until_close::<&mut alloc::vec::Vec<u8>> Unexecuted instantiation: <quick_xml::reader::Reader<&[u8]>>::read_until_close::<()> |
1001 | | } |
1002 | | |
1003 | | //////////////////////////////////////////////////////////////////////////////////////////////////// |
1004 | | |
1005 | | /// Result of an attempt to read XML textual data from the source. |
1006 | | #[derive(Debug)] |
1007 | | enum ReadTextResult<'r, B> { |
1008 | | /// Start of markup (`<` character) was found in the first byte. `<` was consumed. |
1009 | | /// Contains buffer that should be returned back to the next iteration cycle |
1010 | | /// to satisfy borrow checker requirements. |
1011 | | Markup(B), |
1012 | | /// Start of reference (`&` character) was found in the first byte. |
1013 | | /// `&` was not consumed. |
1014 | | /// Contains buffer that should be returned back to the next iteration cycle |
1015 | | /// to satisfy borrow checker requirements. |
1016 | | Ref(B), |
1017 | | /// Contains text block up to start of markup (`<` character). `<` was consumed. |
1018 | | UpToMarkup(&'r [u8]), |
1019 | | /// Contains text block up to start of reference (`&` character). |
1020 | | /// `&` was not consumed. |
1021 | | UpToRef(&'r [u8]), |
1022 | | /// Contains text block up to EOF, neither start of markup (`<` character) |
1023 | | /// or start of reference (`&` character) was found. |
1024 | | UpToEof(&'r [u8]), |
1025 | | /// IO error occurred. |
1026 | | Err(io::Error), |
1027 | | } |
1028 | | |
1029 | | /// Result of an attempt to read general reference from the reader. |
1030 | | #[derive(Debug)] |
1031 | | enum ReadRefResult<'r> { |
1032 | | /// Contains text block up to end of reference (`;` character). |
1033 | | /// Result includes start `&`, but not end `;`. |
1034 | | Ref(&'r [u8]), |
1035 | | /// Contains text block up to EOF. Neither end of reference (`;`), start of |
1036 | | /// another reference (`&`) or start of markup (`<`) characters was found. |
1037 | | /// Result includes start `&`. |
1038 | | UpToEof(&'r [u8]), |
1039 | | /// Contains text block up to next possible reference (`&` character). |
1040 | | /// Result includes start `&`. |
1041 | | UpToRef(&'r [u8]), |
1042 | | /// Contains text block up to start of markup (`<` character). |
1043 | | /// Result includes start `&`. |
1044 | | UpToMarkup(&'r [u8]), |
1045 | | /// IO error occurred. |
1046 | | Err(io::Error), |
1047 | | } |
1048 | | |
1049 | | /// Represents an input for a reader that can return borrowed data. |
1050 | | /// |
1051 | | /// There are two implementors of this trait: generic one that read data from |
1052 | | /// `Self`, copies some part of it into a provided buffer of type `B` and then |
1053 | | /// returns data that borrow from that buffer. |
1054 | | /// |
1055 | | /// The other implementor is for `&[u8]` and instead of copying data returns |
1056 | | /// borrowed data from `Self` instead. This implementation allows zero-copy |
1057 | | /// deserialization. |
1058 | | /// |
1059 | | /// # Parameters |
1060 | | /// - `'r`: lifetime of a buffer from which events will borrow |
1061 | | /// - `B`: a type of a buffer that can be used to store data read from `Self` and |
1062 | | /// from which events can borrow |
1063 | | trait XmlSource<'r, B> { |
1064 | | /// Removes UTF-8 BOM if it is present |
1065 | | #[cfg(not(feature = "encoding"))] |
1066 | | fn remove_utf8_bom(&mut self) -> io::Result<()>; |
1067 | | |
1068 | | /// Determines encoding from the start of input and removes BOM if it is present |
1069 | | #[cfg(feature = "encoding")] |
1070 | | fn detect_encoding(&mut self) -> io::Result<Option<&'static Encoding>>; |
1071 | | |
1072 | | /// Read input until start of markup (the `<`) is found, start of general entity |
1073 | | /// reference (the `&`) is found or end of input is reached. |
1074 | | /// |
1075 | | /// # Parameters |
1076 | | /// - `buf`: Buffer that could be filled from an input (`Self`) and |
1077 | | /// from which [events] could borrow their data |
1078 | | /// - `position`: Will be increased by amount of bytes consumed |
1079 | | /// |
1080 | | /// [events]: crate::events::Event |
1081 | | fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>; |
1082 | | |
1083 | | /// Read input until end of general reference (the `;`) is found, start of |
1084 | | /// another general reference (the `&`) is found or end of input is reached. |
1085 | | /// |
1086 | | /// This method must be called when current character is `&`. |
1087 | | /// |
1088 | | /// # Parameters |
1089 | | /// - `buf`: Buffer that could be filled from an input (`Self`) and |
1090 | | /// from which [events] could borrow their data |
1091 | | /// - `position`: Will be increased by amount of bytes consumed |
1092 | | /// |
1093 | | /// [events]: crate::events::Event |
1094 | | fn read_ref(&mut self, buf: B, position: &mut u64) -> ReadRefResult<'r>; |
1095 | | |
1096 | | /// Read input until processing instruction is finished. |
1097 | | /// |
1098 | | /// This method expect that start sequence of a parser already was read. |
1099 | | /// |
1100 | | /// Returns a slice of data read up to the end of the thing being parsed. |
1101 | | /// The end of thing and the returned content is determined by the used parser. |
1102 | | /// |
1103 | | /// If input (`Self`) is exhausted and no bytes was read, or if the specified |
1104 | | /// parser could not find the ending sequence of the thing, returns `SyntaxError`. |
1105 | | /// |
1106 | | /// # Parameters |
1107 | | /// - `buf`: Buffer that could be filled from an input (`Self`) and |
1108 | | /// from which [events] could borrow their data |
1109 | | /// - `position`: Will be increased by amount of bytes consumed |
1110 | | /// |
1111 | | /// A `P` type parameter is used to preserve state between calls to the underlying |
1112 | | /// reader which provides bytes fed into the parser. |
1113 | | /// |
1114 | | /// [events]: crate::events::Event |
1115 | | fn read_with<P>(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8], Error> |
1116 | | where |
1117 | | P: Parser; |
1118 | | |
1119 | | /// Read input until comment or CDATA is finished. |
1120 | | /// |
1121 | | /// This method expect that `<` already was read. |
1122 | | /// |
1123 | | /// Returns a slice of data read up to end of comment or CDATA (`>`), |
1124 | | /// which does not include into result. |
1125 | | /// |
1126 | | /// If input (`Self`) is exhausted and nothing was read, returns `None`. |
1127 | | /// |
1128 | | /// # Parameters |
1129 | | /// - `buf`: Buffer that could be filled from an input (`Self`) and |
1130 | | /// from which [events] could borrow their data |
1131 | | /// - `position`: Will be increased by amount of bytes consumed |
1132 | | /// |
1133 | | /// [events]: crate::events::Event |
1134 | | fn read_bang_element( |
1135 | | &mut self, |
1136 | | buf: B, |
1137 | | position: &mut u64, |
1138 | | ) -> Result<(BangType, &'r [u8]), Error>; |
1139 | | |
1140 | | /// Consume and discard all the whitespace until the next non-whitespace |
1141 | | /// character or EOF. |
1142 | | /// |
1143 | | /// # Parameters |
1144 | | /// - `position`: Will be increased by amount of bytes consumed |
1145 | | fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()>; |
1146 | | |
1147 | | /// Return one character without consuming it, so that future `read_*` calls |
1148 | | /// will still include it. On EOF, return `None`. |
1149 | | fn peek_one(&mut self) -> io::Result<Option<u8>>; |
1150 | | } |
1151 | | |
1152 | | /// Possible elements started with `<!` |
1153 | | #[derive(Debug, PartialEq)] |
1154 | | enum BangType { |
1155 | | /// <![CDATA[...]]> |
1156 | | CData, |
1157 | | /// <!--...--> |
1158 | | Comment, |
1159 | | /// <!DOCTYPE...>. Contains balance of '<' (+1) and '>' (-1) |
1160 | | DocType(i32), |
1161 | | } |
1162 | | impl BangType { |
1163 | | #[inline(always)] |
1164 | 0 | const fn new(byte: Option<u8>) -> Result<Self, SyntaxError> { |
1165 | 0 | Ok(match byte { |
1166 | 0 | Some(b'[') => Self::CData, |
1167 | 0 | Some(b'-') => Self::Comment, |
1168 | 0 | Some(b'D') | Some(b'd') => Self::DocType(0), |
1169 | 0 | _ => return Err(SyntaxError::InvalidBangMarkup), |
1170 | | }) |
1171 | 0 | } |
1172 | | |
1173 | | /// If element is finished, returns its content up to `>` symbol and |
1174 | | /// an index of this symbol, otherwise returns `None` |
1175 | | /// |
1176 | | /// # Parameters |
1177 | | /// - `buf`: buffer with data consumed on previous iterations |
1178 | | /// - `chunk`: data read on current iteration and not yet consumed from reader |
1179 | | #[inline(always)] |
1180 | 0 | fn parse<'b>(&mut self, buf: &[u8], chunk: &'b [u8]) -> Option<(&'b [u8], usize)> { |
1181 | 0 | match self { |
1182 | | Self::Comment => { |
1183 | 0 | for i in memchr::memchr_iter(b'>', chunk) { |
1184 | | // Need to read at least 6 symbols (`!---->`) for properly finished comment |
1185 | | // <!----> - XML comment |
1186 | | // 012345 - i |
1187 | 0 | if buf.len() + i > 4 { |
1188 | 0 | if chunk[..i].ends_with(b"--") { |
1189 | | // We cannot strip last `--` from the buffer because we need it in case of |
1190 | | // check_comments enabled option. XML standard requires that comment |
1191 | | // will not end with `--->` sequence because this is a special case of |
1192 | | // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments) |
1193 | 0 | return Some((&chunk[..i], i + 1)); // +1 for `>` |
1194 | 0 | } |
1195 | | // End sequence `-|->` was splitted at | |
1196 | | // buf --/ \-- chunk |
1197 | 0 | if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' { |
1198 | 0 | return Some((&chunk[..i], i + 1)); // +1 for `>` |
1199 | 0 | } |
1200 | | // End sequence `--|>` was splitted at | |
1201 | | // buf --/ \-- chunk |
1202 | 0 | if i == 0 && buf.ends_with(b"--") { |
1203 | 0 | return Some((&[], i + 1)); // +1 for `>` |
1204 | 0 | } |
1205 | 0 | } |
1206 | | } |
1207 | | } |
1208 | | Self::CData => { |
1209 | 0 | for i in memchr::memchr_iter(b'>', chunk) { |
1210 | 0 | if chunk[..i].ends_with(b"]]") { |
1211 | 0 | return Some((&chunk[..i], i + 1)); // +1 for `>` |
1212 | 0 | } |
1213 | | // End sequence `]|]>` was splitted at | |
1214 | | // buf --/ \-- chunk |
1215 | 0 | if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' { |
1216 | 0 | return Some((&chunk[..i], i + 1)); // +1 for `>` |
1217 | 0 | } |
1218 | | // End sequence `]]|>` was splitted at | |
1219 | | // buf --/ \-- chunk |
1220 | 0 | if i == 0 && buf.ends_with(b"]]") { |
1221 | 0 | return Some((&[], i + 1)); // +1 for `>` |
1222 | 0 | } |
1223 | | } |
1224 | | } |
1225 | 0 | Self::DocType(ref mut balance) => { |
1226 | 0 | for i in memchr::memchr2_iter(b'<', b'>', chunk) { |
1227 | 0 | if chunk[i] == b'<' { |
1228 | 0 | *balance += 1; |
1229 | 0 | } else { |
1230 | 0 | if *balance == 0 { |
1231 | 0 | return Some((&chunk[..i], i + 1)); // +1 for `>` |
1232 | 0 | } |
1233 | 0 | *balance -= 1; |
1234 | | } |
1235 | | } |
1236 | | } |
1237 | | } |
1238 | 0 | None |
1239 | 0 | } |
1240 | | #[inline] |
1241 | 0 | const fn to_err(&self) -> SyntaxError { |
1242 | 0 | match self { |
1243 | 0 | Self::CData => SyntaxError::UnclosedCData, |
1244 | 0 | Self::Comment => SyntaxError::UnclosedComment, |
1245 | 0 | Self::DocType(_) => SyntaxError::UnclosedDoctype, |
1246 | | } |
1247 | 0 | } Unexecuted instantiation: <quick_xml::reader::BangType>::to_err Unexecuted instantiation: <quick_xml::reader::BangType>::to_err |
1248 | | } |
1249 | | |
1250 | | //////////////////////////////////////////////////////////////////////////////////////////////////// |
1251 | | |
1252 | | #[cfg(test)] |
1253 | | mod test { |
1254 | | /// Checks the internal implementation of the various reader methods |
1255 | | macro_rules! check { |
1256 | | ( |
1257 | | #[$test:meta] |
1258 | | $read_event:ident, |
1259 | | $read_until_close:ident, |
1260 | | // constructor of the XML source on which internal functions will be called |
1261 | | $source:path, |
1262 | | // constructor of the buffer to which read data will stored |
1263 | | $buf:expr |
1264 | | $(, $async:ident, $await:ident)? |
1265 | | ) => { |
1266 | | mod read_bang_element { |
1267 | | use super::*; |
1268 | | use crate::errors::{Error, SyntaxError}; |
1269 | | use crate::reader::BangType; |
1270 | | use crate::utils::Bytes; |
1271 | | |
1272 | | /// Checks that reading CDATA content works correctly |
1273 | | mod cdata { |
1274 | | use super::*; |
1275 | | use pretty_assertions::assert_eq; |
1276 | | |
1277 | | /// Checks that if input begins like CDATA element, but CDATA start sequence |
1278 | | /// is not finished, parsing ends with an error |
1279 | | #[$test] |
1280 | | #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"] |
1281 | | $($async)? fn not_properly_start() { |
1282 | | let buf = $buf; |
1283 | | let mut position = 1; |
1284 | | let mut input = b"![]]>other content".as_ref(); |
1285 | | // ^= 1 |
1286 | | |
1287 | | match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { |
1288 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData), |
1289 | | x => panic!( |
1290 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1291 | | x |
1292 | | ), |
1293 | | } |
1294 | | assert_eq!(position, 1); |
1295 | | } |
1296 | | |
1297 | | /// Checks that if CDATA startup sequence was matched, but an end sequence |
1298 | | /// is not found, parsing ends with an error |
1299 | | #[$test] |
1300 | | $($async)? fn not_closed() { |
1301 | | let buf = $buf; |
1302 | | let mut position = 1; |
1303 | | let mut input = b"![CDATA[other content".as_ref(); |
1304 | | // ^= 1 ^= 22 |
1305 | | |
1306 | | match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { |
1307 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData), |
1308 | | x => panic!( |
1309 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1310 | | x |
1311 | | ), |
1312 | | } |
1313 | | assert_eq!(position, 22); |
1314 | | } |
1315 | | |
1316 | | /// Checks that CDATA element without content inside parsed successfully |
1317 | | #[$test] |
1318 | | $($async)? fn empty() { |
1319 | | let buf = $buf; |
1320 | | let mut position = 1; |
1321 | | let mut input = b"![CDATA[]]>other content".as_ref(); |
1322 | | // ^= 1 ^= 12 |
1323 | | |
1324 | | let (ty, bytes) = $source(&mut input) |
1325 | | .read_bang_element(buf, &mut position) |
1326 | | $(.$await)? |
1327 | | .unwrap(); |
1328 | | assert_eq!( |
1329 | | (ty, Bytes(bytes)), |
1330 | | (BangType::CData, Bytes(b"![CDATA[]]")) |
1331 | | ); |
1332 | | assert_eq!(position, 12); |
1333 | | } |
1334 | | |
1335 | | /// Checks that CDATA element with content parsed successfully. |
1336 | | /// Additionally checks that sequences inside CDATA that may look like |
1337 | | /// a CDATA end sequence do not interrupt CDATA parsing |
1338 | | #[$test] |
1339 | | $($async)? fn with_content() { |
1340 | | let buf = $buf; |
1341 | | let mut position = 1; |
1342 | | let mut input = b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref(); |
1343 | | // ^= 1 ^= 29 |
1344 | | |
1345 | | let (ty, bytes) = $source(&mut input) |
1346 | | .read_bang_element(buf, &mut position) |
1347 | | $(.$await)? |
1348 | | .unwrap(); |
1349 | | assert_eq!( |
1350 | | (ty, Bytes(bytes)), |
1351 | | (BangType::CData, Bytes(b"![CDATA[cdata]] ]>content]]")) |
1352 | | ); |
1353 | | assert_eq!(position, 29); |
1354 | | } |
1355 | | } |
1356 | | |
1357 | | /// Checks that reading XML comments works correctly. According to the [specification], |
1358 | | /// comment data can contain any sequence except `--`: |
1359 | | /// |
1360 | | /// ```peg |
1361 | | /// comment = '<--' (!'--' char)* '-->'; |
1362 | | /// char = [#x1-#x2C] |
1363 | | /// / [#x2E-#xD7FF] |
1364 | | /// / [#xE000-#xFFFD] |
1365 | | /// / [#x10000-#x10FFFF] |
1366 | | /// ``` |
1367 | | /// |
1368 | | /// The presence of this limitation, however, is simply a poorly designed specification |
1369 | | /// (maybe for purpose of building of LL(1) XML parser) and quick-xml does not check for |
1370 | | /// presence of these sequences by default. This tests allow such content. |
1371 | | /// |
1372 | | /// [specification]: https://www.w3.org/TR/xml11/#dt-comment |
1373 | | mod comment { |
1374 | | use super::*; |
1375 | | use pretty_assertions::assert_eq; |
1376 | | |
1377 | | #[$test] |
1378 | | #[ignore = "start comment sequence fully checked outside of `read_bang_element`"] |
1379 | | $($async)? fn not_properly_start() { |
1380 | | let buf = $buf; |
1381 | | let mut position = 1; |
1382 | | let mut input = b"!- -->other content".as_ref(); |
1383 | | // ^= 1 |
1384 | | |
1385 | | match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { |
1386 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), |
1387 | | x => panic!( |
1388 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1389 | | x |
1390 | | ), |
1391 | | } |
1392 | | assert_eq!(position, 1); |
1393 | | } |
1394 | | |
1395 | | #[$test] |
1396 | | $($async)? fn not_properly_end() { |
1397 | | let buf = $buf; |
1398 | | let mut position = 1; |
1399 | | let mut input = b"!->other content".as_ref(); |
1400 | | // ^= 1 ^= 17 |
1401 | | |
1402 | | match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { |
1403 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), |
1404 | | x => panic!( |
1405 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1406 | | x |
1407 | | ), |
1408 | | } |
1409 | | assert_eq!(position, 17); |
1410 | | } |
1411 | | |
1412 | | #[$test] |
1413 | | $($async)? fn not_closed1() { |
1414 | | let buf = $buf; |
1415 | | let mut position = 1; |
1416 | | let mut input = b"!--other content".as_ref(); |
1417 | | // ^= 1 ^= 17 |
1418 | | |
1419 | | match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { |
1420 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), |
1421 | | x => panic!( |
1422 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1423 | | x |
1424 | | ), |
1425 | | } |
1426 | | assert_eq!(position, 17); |
1427 | | } |
1428 | | |
1429 | | #[$test] |
1430 | | $($async)? fn not_closed2() { |
1431 | | let buf = $buf; |
1432 | | let mut position = 1; |
1433 | | let mut input = b"!-->other content".as_ref(); |
1434 | | // ^= 1 ^= 18 |
1435 | | |
1436 | | match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { |
1437 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), |
1438 | | x => panic!( |
1439 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1440 | | x |
1441 | | ), |
1442 | | } |
1443 | | assert_eq!(position, 18); |
1444 | | } |
1445 | | |
1446 | | #[$test] |
1447 | | $($async)? fn not_closed3() { |
1448 | | let buf = $buf; |
1449 | | let mut position = 1; |
1450 | | let mut input = b"!--->other content".as_ref(); |
1451 | | // ^= 1 ^= 19 |
1452 | | |
1453 | | match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { |
1454 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), |
1455 | | x => panic!( |
1456 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1457 | | x |
1458 | | ), |
1459 | | } |
1460 | | assert_eq!(position, 19); |
1461 | | } |
1462 | | |
1463 | | #[$test] |
1464 | | $($async)? fn empty() { |
1465 | | let buf = $buf; |
1466 | | let mut position = 1; |
1467 | | let mut input = b"!---->other content".as_ref(); |
1468 | | // ^= 1 ^= 7 |
1469 | | |
1470 | | let (ty, bytes) = $source(&mut input) |
1471 | | .read_bang_element(buf, &mut position) |
1472 | | $(.$await)? |
1473 | | .unwrap(); |
1474 | | assert_eq!( |
1475 | | (ty, Bytes(bytes)), |
1476 | | (BangType::Comment, Bytes(b"!----")) |
1477 | | ); |
1478 | | assert_eq!(position, 7); |
1479 | | } |
1480 | | |
1481 | | #[$test] |
1482 | | $($async)? fn with_content() { |
1483 | | let buf = $buf; |
1484 | | let mut position = 1; |
1485 | | let mut input = b"!--->comment<--->other content".as_ref(); |
1486 | | // ^= 1 ^= 18 |
1487 | | |
1488 | | let (ty, bytes) = $source(&mut input) |
1489 | | .read_bang_element(buf, &mut position) |
1490 | | $(.$await)? |
1491 | | .unwrap(); |
1492 | | assert_eq!( |
1493 | | (ty, Bytes(bytes)), |
1494 | | (BangType::Comment, Bytes(b"!--->comment<---")) |
1495 | | ); |
1496 | | assert_eq!(position, 18); |
1497 | | } |
1498 | | } |
1499 | | |
1500 | | /// Checks that reading DOCTYPE definition works correctly |
1501 | | mod doctype { |
1502 | | use super::*; |
1503 | | |
1504 | | mod uppercase { |
1505 | | use super::*; |
1506 | | use pretty_assertions::assert_eq; |
1507 | | |
1508 | | #[$test] |
1509 | | $($async)? fn not_properly_start() { |
1510 | | let buf = $buf; |
1511 | | let mut position = 1; |
1512 | | let mut input = b"!D other content".as_ref(); |
1513 | | // ^= 1 ^= 17 |
1514 | | |
1515 | | match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { |
1516 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), |
1517 | | x => panic!( |
1518 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1519 | | x |
1520 | | ), |
1521 | | } |
1522 | | assert_eq!(position, 17); |
1523 | | } |
1524 | | |
1525 | | #[$test] |
1526 | | $($async)? fn without_space() { |
1527 | | let buf = $buf; |
1528 | | let mut position = 1; |
1529 | | let mut input = b"!DOCTYPEother content".as_ref(); |
1530 | | // ^= 1 ^= 22 |
1531 | | |
1532 | | match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { |
1533 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), |
1534 | | x => panic!( |
1535 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1536 | | x |
1537 | | ), |
1538 | | } |
1539 | | assert_eq!(position, 22); |
1540 | | } |
1541 | | |
1542 | | #[$test] |
1543 | | $($async)? fn empty() { |
1544 | | let buf = $buf; |
1545 | | let mut position = 1; |
1546 | | let mut input = b"!DOCTYPE>other content".as_ref(); |
1547 | | // ^= 1 ^= 10 |
1548 | | |
1549 | | let (ty, bytes) = $source(&mut input) |
1550 | | .read_bang_element(buf, &mut position) |
1551 | | $(.$await)? |
1552 | | .unwrap(); |
1553 | | assert_eq!( |
1554 | | (ty, Bytes(bytes)), |
1555 | | (BangType::DocType(0), Bytes(b"!DOCTYPE")) |
1556 | | ); |
1557 | | assert_eq!(position, 10); |
1558 | | } |
1559 | | |
1560 | | #[$test] |
1561 | | $($async)? fn not_closed() { |
1562 | | let buf = $buf; |
1563 | | let mut position = 1; |
1564 | | let mut input = b"!DOCTYPE other content".as_ref(); |
1565 | | // ^= 1 ^23 |
1566 | | |
1567 | | match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { |
1568 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), |
1569 | | x => panic!( |
1570 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1571 | | x |
1572 | | ), |
1573 | | } |
1574 | | assert_eq!(position, 23); |
1575 | | } |
1576 | | } |
1577 | | |
1578 | | mod lowercase { |
1579 | | use super::*; |
1580 | | use pretty_assertions::assert_eq; |
1581 | | |
1582 | | #[$test] |
1583 | | $($async)? fn not_properly_start() { |
1584 | | let buf = $buf; |
1585 | | let mut position = 1; |
1586 | | let mut input = b"!d other content".as_ref(); |
1587 | | // ^= 1 ^= 17 |
1588 | | |
1589 | | match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { |
1590 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), |
1591 | | x => panic!( |
1592 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1593 | | x |
1594 | | ), |
1595 | | } |
1596 | | assert_eq!(position, 17); |
1597 | | } |
1598 | | |
1599 | | #[$test] |
1600 | | $($async)? fn without_space() { |
1601 | | let buf = $buf; |
1602 | | let mut position = 1; |
1603 | | let mut input = b"!doctypeother content".as_ref(); |
1604 | | // ^= 1 ^= 22 |
1605 | | |
1606 | | match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { |
1607 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), |
1608 | | x => panic!( |
1609 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1610 | | x |
1611 | | ), |
1612 | | } |
1613 | | assert_eq!(position, 22); |
1614 | | } |
1615 | | |
1616 | | #[$test] |
1617 | | $($async)? fn empty() { |
1618 | | let buf = $buf; |
1619 | | let mut position = 1; |
1620 | | let mut input = b"!doctype>other content".as_ref(); |
1621 | | // ^= 1 ^= 10 |
1622 | | |
1623 | | let (ty, bytes) = $source(&mut input) |
1624 | | .read_bang_element(buf, &mut position) |
1625 | | $(.$await)? |
1626 | | .unwrap(); |
1627 | | assert_eq!( |
1628 | | (ty, Bytes(bytes)), |
1629 | | (BangType::DocType(0), Bytes(b"!doctype")) |
1630 | | ); |
1631 | | assert_eq!(position, 10); |
1632 | | } |
1633 | | |
1634 | | #[$test] |
1635 | | $($async)? fn not_closed() { |
1636 | | let buf = $buf; |
1637 | | let mut position = 1; |
1638 | | let mut input = b"!doctype other content".as_ref(); |
1639 | | // ^= 1 ^= 23 |
1640 | | |
1641 | | match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { |
1642 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), |
1643 | | x => panic!( |
1644 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1645 | | x |
1646 | | ), |
1647 | | } |
1648 | | assert_eq!(position, 23); |
1649 | | } |
1650 | | } |
1651 | | } |
1652 | | } |
1653 | | |
1654 | | mod read_text { |
1655 | | use super::*; |
1656 | | use crate::reader::ReadTextResult; |
1657 | | use crate::utils::Bytes; |
1658 | | use pretty_assertions::assert_eq; |
1659 | | |
1660 | | #[$test] |
1661 | | $($async)? fn empty() { |
1662 | | let buf = $buf; |
1663 | | let mut position = 1; |
1664 | | let mut input = b"".as_ref(); |
1665 | | // ^= 1 |
1666 | | |
1667 | | match $source(&mut input).read_text(buf, &mut position) $(.$await)? { |
1668 | | ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"")), |
1669 | | x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x), |
1670 | | } |
1671 | | assert_eq!(position, 1); |
1672 | | } |
1673 | | |
1674 | | #[$test] |
1675 | | $($async)? fn markup() { |
1676 | | let buf = $buf; |
1677 | | let mut position = 1; |
1678 | | let mut input = b"<".as_ref(); |
1679 | | // ^= 2 |
1680 | | |
1681 | | match $source(&mut input).read_text(buf, &mut position) $(.$await)? { |
1682 | | ReadTextResult::Markup(b) => assert_eq!(b, $buf), |
1683 | | x => panic!("Expected `Markup(_)`, but got `{:?}`", x), |
1684 | | } |
1685 | | assert_eq!(position, 2); |
1686 | | } |
1687 | | |
1688 | | #[$test] |
1689 | | $($async)? fn ref_() { |
1690 | | let buf = $buf; |
1691 | | let mut position = 1; |
1692 | | let mut input = b"&".as_ref(); |
1693 | | // ^= 1 |
1694 | | |
1695 | | match $source(&mut input).read_text(buf, &mut position) $(.$await)? { |
1696 | | ReadTextResult::Ref(b) => assert_eq!(b, $buf), |
1697 | | x => panic!("Expected `Ref(_)`, but got `{:?}`", x), |
1698 | | } |
1699 | | assert_eq!(position, 1); |
1700 | | } |
1701 | | |
1702 | | #[$test] |
1703 | | $($async)? fn up_to_markup() { |
1704 | | let buf = $buf; |
1705 | | let mut position = 1; |
1706 | | let mut input = b"a<".as_ref(); |
1707 | | // 1 ^= 3 |
1708 | | |
1709 | | match $source(&mut input).read_text(buf, &mut position) $(.$await)? { |
1710 | | ReadTextResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")), |
1711 | | x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x), |
1712 | | } |
1713 | | assert_eq!(position, 3); |
1714 | | } |
1715 | | |
1716 | | #[$test] |
1717 | | $($async)? fn up_to_ref() { |
1718 | | let buf = $buf; |
1719 | | let mut position = 1; |
1720 | | let mut input = b"a&".as_ref(); |
1721 | | // ^= 2 |
1722 | | |
1723 | | match $source(&mut input).read_text(buf, &mut position) $(.$await)? { |
1724 | | ReadTextResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")), |
1725 | | x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x), |
1726 | | } |
1727 | | assert_eq!(position, 2); |
1728 | | } |
1729 | | |
1730 | | #[$test] |
1731 | | $($async)? fn up_to_eof() { |
1732 | | let buf = $buf; |
1733 | | let mut position = 1; |
1734 | | let mut input = b"a".as_ref(); |
1735 | | // ^= 2 |
1736 | | |
1737 | | match $source(&mut input).read_text(buf, &mut position) $(.$await)? { |
1738 | | ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")), |
1739 | | x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x), |
1740 | | } |
1741 | | assert_eq!(position, 2); |
1742 | | } |
1743 | | } |
1744 | | |
1745 | | mod read_ref { |
1746 | | use super::*; |
1747 | | use crate::reader::ReadRefResult; |
1748 | | use crate::utils::Bytes; |
1749 | | use pretty_assertions::assert_eq; |
1750 | | |
1751 | | // Empty input is not allowed for `read_ref` so not tested. |
1752 | | // Borrowed source triggers debug assertion, |
1753 | | // buffered do nothing due to implementation details. |
1754 | | |
1755 | | #[$test] |
1756 | | $($async)? fn up_to_eof() { |
1757 | | let buf = $buf; |
1758 | | let mut position = 1; |
1759 | | let mut input = b"&".as_ref(); |
1760 | | // ^= 2 |
1761 | | |
1762 | | match $source(&mut input).read_ref(buf, &mut position) $(.$await)? { |
1763 | | ReadRefResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")), |
1764 | | x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x), |
1765 | | } |
1766 | | assert_eq!(position, 2); |
1767 | | } |
1768 | | |
1769 | | #[$test] |
1770 | | $($async)? fn up_to_ref() { |
1771 | | let buf = $buf; |
1772 | | let mut position = 1; |
1773 | | let mut input = b"&&".as_ref(); |
1774 | | // ^= 2 |
1775 | | |
1776 | | match $source(&mut input).read_ref(buf, &mut position) $(.$await)? { |
1777 | | ReadRefResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")), |
1778 | | x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x), |
1779 | | } |
1780 | | assert_eq!(position, 2); |
1781 | | } |
1782 | | |
1783 | | #[$test] |
1784 | | $($async)? fn up_to_markup() { |
1785 | | let buf = $buf; |
1786 | | let mut position = 1; |
1787 | | let mut input = b"&<".as_ref(); |
1788 | | // ^= 3 |
1789 | | |
1790 | | match $source(&mut input).read_ref(buf, &mut position) $(.$await)? { |
1791 | | ReadRefResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")), |
1792 | | x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x), |
1793 | | } |
1794 | | assert_eq!(position, 3); |
1795 | | } |
1796 | | |
1797 | | #[$test] |
1798 | | $($async)? fn empty_ref() { |
1799 | | let buf = $buf; |
1800 | | let mut position = 1; |
1801 | | let mut input = b"&;".as_ref(); |
1802 | | // ^= 3 |
1803 | | |
1804 | | match $source(&mut input).read_ref(buf, &mut position) $(.$await)? { |
1805 | | ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")), |
1806 | | x => panic!("Expected `Ref(_)`, but got `{:?}`", x), |
1807 | | } |
1808 | | assert_eq!(position, 3); |
1809 | | } |
1810 | | |
1811 | | #[$test] |
1812 | | $($async)? fn normal() { |
1813 | | let buf = $buf; |
1814 | | let mut position = 1; |
1815 | | let mut input = b"<".as_ref(); |
1816 | | // ^= 5 |
1817 | | |
1818 | | match $source(&mut input).read_ref(buf, &mut position) $(.$await)? { |
1819 | | ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"<")), |
1820 | | x => panic!("Expected `Ref(_)`, but got `{:?}`", x), |
1821 | | } |
1822 | | assert_eq!(position, 5); |
1823 | | } |
1824 | | } |
1825 | | |
1826 | | mod read_element { |
1827 | | use super::*; |
1828 | | use crate::errors::{Error, SyntaxError}; |
1829 | | use crate::parser::ElementParser; |
1830 | | use crate::utils::Bytes; |
1831 | | use pretty_assertions::assert_eq; |
1832 | | |
1833 | | /// Checks that nothing was read from empty buffer |
1834 | | #[$test] |
1835 | | $($async)? fn empty() { |
1836 | | let buf = $buf; |
1837 | | let mut position = 1; |
1838 | | let mut input = b"".as_ref(); |
1839 | | // ^= 1 |
1840 | | |
1841 | | match $source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? { |
1842 | | Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedTag), |
1843 | | x => panic!( |
1844 | | "Expected `Err(Syntax(_))`, but got `{:?}`", |
1845 | | x |
1846 | | ), |
1847 | | } |
1848 | | assert_eq!(position, 1); |
1849 | | } |
1850 | | |
1851 | | mod open { |
1852 | | use super::*; |
1853 | | use pretty_assertions::assert_eq; |
1854 | | |
1855 | | #[$test] |
1856 | | $($async)? fn empty_tag() { |
1857 | | let buf = $buf; |
1858 | | let mut position = 1; |
1859 | | let mut input = b">".as_ref(); |
1860 | | // ^= 2 |
1861 | | |
1862 | | assert_eq!( |
1863 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
1864 | | Bytes(b"") |
1865 | | ); |
1866 | | assert_eq!(position, 2); |
1867 | | } |
1868 | | |
1869 | | #[$test] |
1870 | | $($async)? fn normal() { |
1871 | | let buf = $buf; |
1872 | | let mut position = 1; |
1873 | | let mut input = b"tag>".as_ref(); |
1874 | | // ^= 5 |
1875 | | |
1876 | | assert_eq!( |
1877 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
1878 | | Bytes(b"tag") |
1879 | | ); |
1880 | | assert_eq!(position, 5); |
1881 | | } |
1882 | | |
1883 | | #[$test] |
1884 | | $($async)? fn empty_ns_empty_tag() { |
1885 | | let buf = $buf; |
1886 | | let mut position = 1; |
1887 | | let mut input = b":>".as_ref(); |
1888 | | // ^= 3 |
1889 | | |
1890 | | assert_eq!( |
1891 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
1892 | | Bytes(b":") |
1893 | | ); |
1894 | | assert_eq!(position, 3); |
1895 | | } |
1896 | | |
1897 | | #[$test] |
1898 | | $($async)? fn empty_ns() { |
1899 | | let buf = $buf; |
1900 | | let mut position = 1; |
1901 | | let mut input = b":tag>".as_ref(); |
1902 | | // ^= 6 |
1903 | | |
1904 | | assert_eq!( |
1905 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
1906 | | Bytes(b":tag") |
1907 | | ); |
1908 | | assert_eq!(position, 6); |
1909 | | } |
1910 | | |
1911 | | #[$test] |
1912 | | $($async)? fn with_attributes() { |
1913 | | let buf = $buf; |
1914 | | let mut position = 1; |
1915 | | let mut input = br#"tag attr-1=">" attr2 = '>' 3attr>"#.as_ref(); |
1916 | | // ^= 39 |
1917 | | |
1918 | | assert_eq!( |
1919 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
1920 | | Bytes(br#"tag attr-1=">" attr2 = '>' 3attr"#) |
1921 | | ); |
1922 | | assert_eq!(position, 39); |
1923 | | } |
1924 | | } |
1925 | | |
1926 | | mod self_closed { |
1927 | | use super::*; |
1928 | | use pretty_assertions::assert_eq; |
1929 | | |
1930 | | #[$test] |
1931 | | $($async)? fn empty_tag() { |
1932 | | let buf = $buf; |
1933 | | let mut position = 1; |
1934 | | let mut input = b"/>".as_ref(); |
1935 | | // ^= 3 |
1936 | | |
1937 | | assert_eq!( |
1938 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
1939 | | Bytes(b"/") |
1940 | | ); |
1941 | | assert_eq!(position, 3); |
1942 | | } |
1943 | | |
1944 | | #[$test] |
1945 | | $($async)? fn normal() { |
1946 | | let buf = $buf; |
1947 | | let mut position = 1; |
1948 | | let mut input = b"tag/>".as_ref(); |
1949 | | // ^= 6 |
1950 | | |
1951 | | assert_eq!( |
1952 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
1953 | | Bytes(b"tag/") |
1954 | | ); |
1955 | | assert_eq!(position, 6); |
1956 | | } |
1957 | | |
1958 | | #[$test] |
1959 | | $($async)? fn empty_ns_empty_tag() { |
1960 | | let buf = $buf; |
1961 | | let mut position = 1; |
1962 | | let mut input = b":/>".as_ref(); |
1963 | | // ^= 4 |
1964 | | |
1965 | | assert_eq!( |
1966 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
1967 | | Bytes(b":/") |
1968 | | ); |
1969 | | assert_eq!(position, 4); |
1970 | | } |
1971 | | |
1972 | | #[$test] |
1973 | | $($async)? fn empty_ns() { |
1974 | | let buf = $buf; |
1975 | | let mut position = 1; |
1976 | | let mut input = b":tag/>".as_ref(); |
1977 | | // ^= 7 |
1978 | | |
1979 | | assert_eq!( |
1980 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
1981 | | Bytes(b":tag/") |
1982 | | ); |
1983 | | assert_eq!(position, 7); |
1984 | | } |
1985 | | |
1986 | | #[$test] |
1987 | | $($async)? fn with_attributes() { |
1988 | | let buf = $buf; |
1989 | | let mut position = 1; |
1990 | | let mut input = br#"tag attr-1="/>" attr2 = '/>' 3attr/>"#.as_ref(); |
1991 | | // ^= 42 |
1992 | | |
1993 | | assert_eq!( |
1994 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
1995 | | Bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/"#) |
1996 | | ); |
1997 | | assert_eq!(position, 42); |
1998 | | } |
1999 | | } |
2000 | | |
2001 | | mod close { |
2002 | | use super::*; |
2003 | | use pretty_assertions::assert_eq; |
2004 | | |
2005 | | #[$test] |
2006 | | $($async)? fn empty_tag() { |
2007 | | let buf = $buf; |
2008 | | let mut position = 1; |
2009 | | let mut input = b"/ >".as_ref(); |
2010 | | // ^= 4 |
2011 | | |
2012 | | assert_eq!( |
2013 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
2014 | | Bytes(b"/ ") |
2015 | | ); |
2016 | | assert_eq!(position, 4); |
2017 | | } |
2018 | | |
2019 | | #[$test] |
2020 | | $($async)? fn normal() { |
2021 | | let buf = $buf; |
2022 | | let mut position = 1; |
2023 | | let mut input = b"/tag>".as_ref(); |
2024 | | // ^= 6 |
2025 | | |
2026 | | assert_eq!( |
2027 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
2028 | | Bytes(b"/tag") |
2029 | | ); |
2030 | | assert_eq!(position, 6); |
2031 | | } |
2032 | | |
2033 | | #[$test] |
2034 | | $($async)? fn empty_ns_empty_tag() { |
2035 | | let buf = $buf; |
2036 | | let mut position = 1; |
2037 | | let mut input = b"/:>".as_ref(); |
2038 | | // ^= 4 |
2039 | | |
2040 | | assert_eq!( |
2041 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
2042 | | Bytes(b"/:") |
2043 | | ); |
2044 | | assert_eq!(position, 4); |
2045 | | } |
2046 | | |
2047 | | #[$test] |
2048 | | $($async)? fn empty_ns() { |
2049 | | let buf = $buf; |
2050 | | let mut position = 1; |
2051 | | let mut input = b"/:tag>".as_ref(); |
2052 | | // ^= 7 |
2053 | | |
2054 | | assert_eq!( |
2055 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
2056 | | Bytes(b"/:tag") |
2057 | | ); |
2058 | | assert_eq!(position, 7); |
2059 | | } |
2060 | | |
2061 | | #[$test] |
2062 | | $($async)? fn with_attributes() { |
2063 | | let buf = $buf; |
2064 | | let mut position = 1; |
2065 | | let mut input = br#"/tag attr-1=">" attr2 = '>' 3attr>"#.as_ref(); |
2066 | | // ^= 40 |
2067 | | |
2068 | | assert_eq!( |
2069 | | Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), |
2070 | | Bytes(br#"/tag attr-1=">" attr2 = '>' 3attr"#) |
2071 | | ); |
2072 | | assert_eq!(position, 40); |
2073 | | } |
2074 | | } |
2075 | | } |
2076 | | |
2077 | | /// Ensures, that no empty `Text` events are generated |
2078 | | mod $read_event { |
2079 | | use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event}; |
2080 | | use crate::reader::Reader; |
2081 | | use pretty_assertions::assert_eq; |
2082 | | |
2083 | | /// When `encoding` feature is enabled, encoding should be detected |
2084 | | /// from BOM (UTF-8) and BOM should be stripped. |
2085 | | /// |
2086 | | /// When `encoding` feature is disabled, UTF-8 is assumed and BOM |
2087 | | /// character should be stripped for consistency |
2088 | | #[$test] |
2089 | | $($async)? fn bom_from_reader() { |
2090 | | let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes()); |
2091 | | |
2092 | | assert_eq!( |
2093 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2094 | | Event::Text(BytesText::from_escaped("\u{feff}")) |
2095 | | ); |
2096 | | |
2097 | | assert_eq!( |
2098 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2099 | | Event::Eof |
2100 | | ); |
2101 | | } |
2102 | | |
2103 | | /// When parsing from &str, encoding is fixed (UTF-8), so |
2104 | | /// - when `encoding` feature is disabled, the behavior the |
2105 | | /// same as in `bom_from_reader` text |
2106 | | /// - when `encoding` feature is enabled, the behavior should |
2107 | | /// stay consistent, so the first BOM character is stripped |
2108 | | #[$test] |
2109 | | $($async)? fn bom_from_str() { |
2110 | | let mut reader = Reader::from_str("\u{feff}\u{feff}"); |
2111 | | |
2112 | | assert_eq!( |
2113 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2114 | | Event::Text(BytesText::from_escaped("\u{feff}")) |
2115 | | ); |
2116 | | |
2117 | | assert_eq!( |
2118 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2119 | | Event::Eof |
2120 | | ); |
2121 | | } |
2122 | | |
2123 | | #[$test] |
2124 | | $($async)? fn declaration() { |
2125 | | let mut reader = Reader::from_str("<?xml ?>"); |
2126 | | |
2127 | | assert_eq!( |
2128 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2129 | | Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3))) |
2130 | | ); |
2131 | | } |
2132 | | |
2133 | | #[$test] |
2134 | | $($async)? fn doctype() { |
2135 | | let mut reader = Reader::from_str("<!DOCTYPE x>"); |
2136 | | |
2137 | | assert_eq!( |
2138 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2139 | | Event::DocType(BytesText::from_escaped("x")) |
2140 | | ); |
2141 | | } |
2142 | | |
2143 | | #[$test] |
2144 | | $($async)? fn processing_instruction() { |
2145 | | let mut reader = Reader::from_str("<?xml-stylesheet '? >\" ?>"); |
2146 | | |
2147 | | assert_eq!( |
2148 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2149 | | Event::PI(BytesPI::new("xml-stylesheet '? >\" ")) |
2150 | | ); |
2151 | | } |
2152 | | |
2153 | | /// Lone closing tags are not allowed, so testing it together with start tag |
2154 | | #[$test] |
2155 | | $($async)? fn start_and_end() { |
2156 | | let mut reader = Reader::from_str("<tag></tag>"); |
2157 | | |
2158 | | assert_eq!( |
2159 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2160 | | Event::Start(BytesStart::new("tag")) |
2161 | | ); |
2162 | | |
2163 | | assert_eq!( |
2164 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2165 | | Event::End(BytesEnd::new("tag")) |
2166 | | ); |
2167 | | } |
2168 | | |
2169 | | #[$test] |
2170 | | $($async)? fn empty() { |
2171 | | let mut reader = Reader::from_str("<tag/>"); |
2172 | | |
2173 | | assert_eq!( |
2174 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2175 | | Event::Empty(BytesStart::new("tag")) |
2176 | | ); |
2177 | | } |
2178 | | |
2179 | | #[$test] |
2180 | | $($async)? fn text() { |
2181 | | let mut reader = Reader::from_str("text"); |
2182 | | |
2183 | | assert_eq!( |
2184 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2185 | | Event::Text(BytesText::from_escaped("text")) |
2186 | | ); |
2187 | | } |
2188 | | |
2189 | | #[$test] |
2190 | | $($async)? fn cdata() { |
2191 | | let mut reader = Reader::from_str("<![CDATA[]]>"); |
2192 | | |
2193 | | assert_eq!( |
2194 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2195 | | Event::CData(BytesCData::new("")) |
2196 | | ); |
2197 | | } |
2198 | | |
2199 | | #[$test] |
2200 | | $($async)? fn comment() { |
2201 | | let mut reader = Reader::from_str("<!---->"); |
2202 | | |
2203 | | assert_eq!( |
2204 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2205 | | Event::Comment(BytesText::from_escaped("")) |
2206 | | ); |
2207 | | } |
2208 | | |
2209 | | #[$test] |
2210 | | $($async)? fn eof() { |
2211 | | let mut reader = Reader::from_str(""); |
2212 | | |
2213 | | assert_eq!( |
2214 | | reader.$read_event($buf) $(.$await)? .unwrap(), |
2215 | | Event::Eof |
2216 | | ); |
2217 | | } |
2218 | | } |
2219 | | }; |
2220 | | } |
2221 | | |
2222 | | // Export macros for the child modules: |
2223 | | // - buffered_reader |
2224 | | // - slice_reader |
2225 | | pub(super) use check; |
2226 | | } |