/rust/registry/src/index.crates.io-1949cf8c6b5b557f/quick-xml-0.37.5/src/parser/pi.rs
Line | Count | Source |
1 | | //! Contains a parser for an XML processing instruction. |
2 | | |
3 | | use crate::errors::SyntaxError; |
4 | | use crate::parser::Parser; |
5 | | |
6 | | /// A parser that search a `?>` sequence in the slice. |
7 | | /// |
8 | | /// To use a parser create an instance of parser and [`feed`] data into it. |
9 | | /// After successful search the parser will return [`Some`] with position where |
10 | | /// processing instruction is ended (the position after `?>`). If search was |
11 | | /// unsuccessful, a [`None`] will be returned. You typically would expect positive |
12 | | /// result of search, so that you should feed new data until you get it. |
13 | | /// |
14 | | /// NOTE: after successful match the parser does not returned to the initial |
15 | | /// state and should not be used anymore. Create a new parser if you want to perform |
16 | | /// new search. |
17 | | /// |
18 | | /// # Example |
19 | | /// |
20 | | /// ``` |
21 | | /// # use pretty_assertions::assert_eq; |
22 | | /// use quick_xml::parser::{Parser, PiParser}; |
23 | | /// |
24 | | /// let mut parser = PiParser::default(); |
25 | | /// |
26 | | /// // Parse `<?instruction with = 'some > and ?' inside?>and the text follow...` |
27 | | /// // splitted into three chunks |
28 | | /// assert_eq!(parser.feed(b"<?instruction"), None); |
29 | | /// // ...get new chunk of data |
30 | | /// assert_eq!(parser.feed(b" with = 'some > and ?"), None); |
31 | | /// // ...get another chunk of data |
32 | | /// assert_eq!(parser.feed(b"' inside?>and the text follow..."), Some(9)); |
33 | | /// // ^ ^ |
34 | | /// // 0 9 |
35 | | /// ``` |
36 | | /// |
37 | | /// [`feed`]: Self::feed() |
38 | | #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] |
39 | | pub struct PiParser( |
40 | | /// A flag that indicates was the `bytes` in the previous attempt to find the |
41 | | /// end ended with `?`. |
42 | | pub bool, |
43 | | ); |
44 | | |
45 | | impl Parser for PiParser { |
46 | | /// Determines the end position of a processing instruction in the provided slice. |
47 | | /// Processing instruction ends on the first occurrence of `?>` which cannot be |
48 | | /// escaped. |
49 | | /// |
50 | | /// Returns position after the `?>` or `None` if such sequence was not found. |
51 | | /// |
52 | | /// [Section 2.6]: Parameter entity references MUST NOT be recognized within |
53 | | /// processing instructions, so parser do not search for them. |
54 | | /// |
55 | | /// # Parameters |
56 | | /// - `bytes`: a slice to find the end of a processing instruction. |
57 | | /// Should contain text in ASCII-compatible encoding |
58 | | /// |
59 | | /// [Section 2.6]: https://www.w3.org/TR/xml11/#sec-pi |
60 | | #[inline] |
61 | 0 | fn feed(&mut self, bytes: &[u8]) -> Option<usize> { |
62 | 0 | for i in memchr::memchr_iter(b'>', bytes) { |
63 | 0 | match i { |
64 | 0 | 0 if self.0 => return Some(0), |
65 | | // If the previous byte is `?`, then we found `?>` |
66 | 0 | i if i > 0 && bytes[i - 1] == b'?' => return Some(i), |
67 | 0 | _ => {} |
68 | | } |
69 | | } |
70 | 0 | self.0 = bytes.last().copied() == Some(b'?'); |
71 | 0 | None |
72 | 0 | } |
73 | | |
74 | | #[inline] |
75 | 0 | fn eof_error() -> SyntaxError { |
76 | 0 | SyntaxError::UnclosedPIOrXmlDecl |
77 | 0 | } |
78 | | } |
79 | | |
80 | | #[test] |
81 | | fn pi() { |
82 | | use pretty_assertions::assert_eq; |
83 | | |
84 | | /// Returns `Ok(pos)` with the position in the buffer where processing |
85 | | /// instruction is ended. |
86 | | /// |
87 | | /// Returns `Err(internal_state)` if parsing is not done yet. |
88 | | fn parse_pi(bytes: &[u8], had_question_mark: bool) -> Result<usize, bool> { |
89 | | let mut parser = PiParser(had_question_mark); |
90 | | match parser.feed(bytes) { |
91 | | Some(i) => Ok(i), |
92 | | None => Err(parser.0), |
93 | | } |
94 | | } |
95 | | |
96 | | // Comments shows which character was seen the last before calling `feed`. |
97 | | // `x` means any character, pipe denotes start of the buffer that passed to `feed` |
98 | | |
99 | | assert_eq!(parse_pi(b"", false), Err(false)); // x| |
100 | | assert_eq!(parse_pi(b"", true), Err(false)); // ?| |
101 | | |
102 | | assert_eq!(parse_pi(b"?", false), Err(true)); // x|? |
103 | | assert_eq!(parse_pi(b"?", true), Err(true)); // ?|? |
104 | | |
105 | | assert_eq!(parse_pi(b">", false), Err(false)); // x|> |
106 | | assert_eq!(parse_pi(b">", true), Ok(0)); // ?|> |
107 | | |
108 | | assert_eq!(parse_pi(b"?>", false), Ok(1)); // x|?> |
109 | | assert_eq!(parse_pi(b"?>", true), Ok(1)); // ?|?> |
110 | | |
111 | | assert_eq!(parse_pi(b">?>", false), Ok(2)); // x|>?> |
112 | | assert_eq!(parse_pi(b">?>", true), Ok(0)); // ?|>?> |
113 | | } |