/rust/registry/src/index.crates.io-1949cf8c6b5b557f/quick-xml-0.38.4/src/de/mod.rs
Line | Count | Source |
1 | | //! Serde `Deserializer` module. |
2 | | //! |
3 | | //! Due to the complexity of the XML standard and the fact that Serde was developed |
4 | | //! with JSON in mind, not all Serde concepts apply smoothly to XML. This leads to |
5 | | //! that fact that some XML concepts are inexpressible in terms of Serde derives |
6 | | //! and may require manual deserialization. |
7 | | //! |
8 | | //! The most notable restriction is the ability to distinguish between _elements_ |
9 | | //! and _attributes_, as no other format used by serde has such a conception. |
10 | | //! |
11 | | //! Due to that the mapping is performed in a best effort manner. |
12 | | //! |
13 | | //! |
14 | | //! |
15 | | //! Table of Contents |
16 | | //! ================= |
17 | | //! - [Mapping XML to Rust types](#mapping-xml-to-rust-types) |
18 | | //! - [Basics](#basics) |
19 | | //! - [Optional attributes and elements](#optional-attributes-and-elements) |
20 | | //! - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type) |
21 | | //! - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types) |
22 | | //! - [Mapping of `xsi:nil`](#mapping-of-xsinil) |
23 | | //! - [Generate Rust types from XML](#generate-rust-types-from-xml) |
24 | | //! - [Composition Rules](#composition-rules) |
25 | | //! - [Enum Representations](#enum-representations) |
26 | | //! - [Normal enum variant](#normal-enum-variant) |
27 | | //! - [`$text` enum variant](#text-enum-variant) |
28 | | //! - [`$text` and `$value` special names](#text-and-value-special-names) |
29 | | //! - [`$text`](#text) |
30 | | //! - [`$value`](#value) |
31 | | //! - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives) |
32 | | //! - [Structs and sequences of structs](#structs-and-sequences-of-structs) |
33 | | //! - [Enums and sequences of enums](#enums-and-sequences-of-enums) |
34 | | //! - [Frequently Used Patterns](#frequently-used-patterns) |
35 | | //! - [`<element>` lists](#element-lists) |
36 | | //! - [Overlapped (Out-of-Order) Elements](#overlapped-out-of-order-elements) |
37 | | //! - [Internally Tagged Enums](#internally-tagged-enums) |
38 | | //! |
39 | | //! |
40 | | //! |
41 | | //! Mapping XML to Rust types |
42 | | //! ========================= |
43 | | //! |
44 | | //! Type names are never considered when deserializing, so you can name your |
45 | | //! types as you wish. Other general rules: |
46 | | //! - `struct` field name could be represented in XML only as an attribute name |
47 | | //! or an element name; |
48 | | //! - `enum` variant name could be represented in XML only as an attribute name |
49 | | //! or an element name; |
50 | | //! - the unit struct, unit type `()` and unit enum variant can be deserialized |
51 | | //! from any valid XML content: |
52 | | //! - attribute and element names; |
53 | | //! - attribute and element values; |
54 | | //! - text or CDATA content (including mixed text and CDATA content). |
55 | | //! |
56 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
57 | | //! |
58 | | //! NOTE: All tests are marked with an `ignore` option, even though they do |
59 | | //! compile. This is because rustdoc marks such blocks with an information |
60 | | //! icon unlike `no_run` blocks. |
61 | | //! |
62 | | //! </div> |
63 | | //! |
64 | | //! <table> |
65 | | //! <thead> |
66 | | //! <tr><th colspan="2"> |
67 | | //! |
68 | | //! ## Basics |
69 | | //! |
70 | | //! </th></tr> |
71 | | //! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr> |
72 | | //! </thead> |
73 | | //! <tbody style="vertical-align:top;"> |
74 | | //! <tr> |
75 | | //! <td> |
76 | | //! Content of attributes and text / CDATA content of elements (including mixed |
77 | | //! text and CDATA content): |
78 | | //! |
79 | | //! ```xml |
80 | | //! <... ...="content" /> |
81 | | //! ``` |
82 | | //! ```xml |
83 | | //! <...>content</...> |
84 | | //! ``` |
85 | | //! ```xml |
86 | | //! <...><![CDATA[content]]></...> |
87 | | //! ``` |
88 | | //! ```xml |
89 | | //! <...>text<![CDATA[cdata]]>text</...> |
90 | | //! ``` |
91 | | //! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case. |
92 | | //! </td> |
93 | | //! <td> |
94 | | //! |
95 | | //! You can use any type that can be deserialized from an `&str`, for example: |
96 | | //! - [`String`] and [`&str`] |
97 | | //! - [`Cow<str>`] |
98 | | //! - [`u32`], [`f32`] and other numeric types |
99 | | //! - `enum`s, like |
100 | | //! ``` |
101 | | //! # use pretty_assertions::assert_eq; |
102 | | //! # use serde::Deserialize; |
103 | | //! # #[derive(Debug, PartialEq)] |
104 | | //! #[derive(Deserialize)] |
105 | | //! enum Language { |
106 | | //! Rust, |
107 | | //! Cpp, |
108 | | //! #[serde(other)] |
109 | | //! Other, |
110 | | //! } |
111 | | //! # #[derive(Debug, PartialEq, Deserialize)] |
112 | | //! # struct X { #[serde(rename = "$text")] x: Language } |
113 | | //! # assert_eq!(X { x: Language::Rust }, quick_xml::de::from_str("<x>Rust</x>").unwrap()); |
114 | | //! # assert_eq!(X { x: Language::Cpp }, quick_xml::de::from_str("<x>C<![CDATA[p]]>p</x>").unwrap()); |
115 | | //! # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("<x><![CDATA[other]]></x>").unwrap()); |
116 | | //! ``` |
117 | | //! |
118 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
119 | | //! |
120 | | //! NOTE: deserialization to non-owned types (i.e. borrow from the input), |
121 | | //! such as `&str`, is possible only if you parse document in the UTF-8 |
122 | | //! encoding and content does not contain entity references such as `&`, |
123 | | //! or character references such as `
`, as well as text content represented |
124 | | //! by one piece of [text] or [CDATA] element. |
125 | | //! </div> |
126 | | //! <!-- TODO: document an error type returned --> |
127 | | //! |
128 | | //! [text]: Event::Text |
129 | | //! [CDATA]: Event::CData |
130 | | //! </td> |
131 | | //! </tr> |
132 | | //! <!-- 2 ===================================================================================== --> |
133 | | //! <tr> |
134 | | //! <td> |
135 | | //! |
136 | | //! Content of attributes and text / CDATA content of elements (including mixed |
137 | | //! text and CDATA content), which represents a space-delimited lists, as |
138 | | //! specified in the XML Schema specification for [`xs:list`] `simpleType`: |
139 | | //! |
140 | | //! ```xml |
141 | | //! <... ...="element1 element2 ..." /> |
142 | | //! ``` |
143 | | //! ```xml |
144 | | //! <...> |
145 | | //! element1 |
146 | | //! element2 |
147 | | //! ... |
148 | | //! </...> |
149 | | //! ``` |
150 | | //! ```xml |
151 | | //! <...><![CDATA[ |
152 | | //! element1 |
153 | | //! element2 |
154 | | //! ... |
155 | | //! ]]></...> |
156 | | //! ``` |
157 | | //! |
158 | | //! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes |
159 | | //! </td> |
160 | | //! <td> |
161 | | //! |
162 | | //! Use any type that deserialized using [`deserialize_seq()`] call, for example: |
163 | | //! |
164 | | //! ``` |
165 | | //! type List = Vec<u32>; |
166 | | //! ``` |
167 | | //! |
168 | | //! See the next row to learn where in your struct definition you should |
169 | | //! use that type. |
170 | | //! |
171 | | //! According to the XML Schema specification, delimiters for elements is one |
172 | | //! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s). |
173 | | //! |
174 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
175 | | //! |
176 | | //! NOTE: according to the XML Schema restrictions, you cannot escape those |
177 | | //! white-space characters, so list elements will _never_ contain them. |
178 | | //! In practice you will usually use `xs:list`s for lists of numbers or enumerated |
179 | | //! values which looks like identifiers in many languages, for example, `item`, |
180 | | //! `some_item` or `some-item`, so that shouldn't be a problem. |
181 | | //! |
182 | | //! NOTE: according to the XML Schema specification, list elements can be |
183 | | //! delimited only by spaces. Other delimiters (for example, commas) are not |
184 | | //! allowed. |
185 | | //! |
186 | | //! </div> |
187 | | //! |
188 | | //! [`deserialize_seq()`]: de::Deserializer::deserialize_seq |
189 | | //! </td> |
190 | | //! </tr> |
191 | | //! <!-- 3 ===================================================================================== --> |
192 | | //! <tr> |
193 | | //! <td> |
194 | | //! A typical XML with attributes. The root tag name does not matter: |
195 | | //! |
196 | | //! ```xml |
197 | | //! <any-tag one="..." two="..."/> |
198 | | //! ``` |
199 | | //! </td> |
200 | | //! <td> |
201 | | //! |
202 | | //! A structure where each XML attribute is mapped to a field with a name |
203 | | //! starting with `@`. Because Rust identifiers do not permit the `@` character, |
204 | | //! you should use the `#[serde(rename = "@...")]` attribute to rename it. |
205 | | //! The name of the struct itself does not matter: |
206 | | //! |
207 | | //! ``` |
208 | | //! # use serde::Deserialize; |
209 | | //! # type T = (); |
210 | | //! # type U = (); |
211 | | //! // Get both attributes |
212 | | //! # #[derive(Debug, PartialEq)] |
213 | | //! #[derive(Deserialize)] |
214 | | //! struct AnyName { |
215 | | //! #[serde(rename = "@one")] |
216 | | //! one: T, |
217 | | //! |
218 | | //! #[serde(rename = "@two")] |
219 | | //! two: U, |
220 | | //! } |
221 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap(); |
222 | | //! ``` |
223 | | //! ``` |
224 | | //! # use serde::Deserialize; |
225 | | //! # type T = (); |
226 | | //! // Get only the one attribute, ignore the other |
227 | | //! # #[derive(Debug, PartialEq)] |
228 | | //! #[derive(Deserialize)] |
229 | | //! struct AnyName { |
230 | | //! #[serde(rename = "@one")] |
231 | | //! one: T, |
232 | | //! } |
233 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap(); |
234 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."/>"#).unwrap(); |
235 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap(); |
236 | | //! ``` |
237 | | //! ``` |
238 | | //! # use serde::Deserialize; |
239 | | //! // Ignore all attributes |
240 | | //! // You can also use the `()` type (unit type) |
241 | | //! # #[derive(Debug, PartialEq)] |
242 | | //! #[derive(Deserialize)] |
243 | | //! struct AnyName; |
244 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap(); |
245 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap(); |
246 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap(); |
247 | | //! ``` |
248 | | //! |
249 | | //! All these structs can be used to deserialize from an XML on the |
250 | | //! left side depending on amount of information that you want to get. |
251 | | //! Of course, you can combine them with elements extractor structs (see below). |
252 | | //! |
253 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
254 | | //! |
255 | | //! NOTE: XML allows you to have an attribute and an element with the same name |
256 | | //! inside the one element. quick-xml deals with that by prepending a `@` prefix |
257 | | //! to the name of attributes. |
258 | | //! </div> |
259 | | //! </td> |
260 | | //! </tr> |
261 | | //! <!-- 4 ===================================================================================== --> |
262 | | //! <tr> |
263 | | //! <td> |
264 | | //! A typical XML with child elements. The root tag name does not matter: |
265 | | //! |
266 | | //! ```xml |
267 | | //! <any-tag> |
268 | | //! <one>...</one> |
269 | | //! <two>...</two> |
270 | | //! </any-tag> |
271 | | //! ``` |
272 | | //! </td> |
273 | | //! <td> |
274 | | //! A structure where each XML child element is mapped to the field. |
275 | | //! Each element name becomes a name of field. The name of the struct itself |
276 | | //! does not matter: |
277 | | //! |
278 | | //! ``` |
279 | | //! # use serde::Deserialize; |
280 | | //! # type T = (); |
281 | | //! # type U = (); |
282 | | //! // Get both elements |
283 | | //! # #[derive(Debug, PartialEq)] |
284 | | //! #[derive(Deserialize)] |
285 | | //! struct AnyName { |
286 | | //! one: T, |
287 | | //! two: U, |
288 | | //! } |
289 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap(); |
290 | | //! # |
291 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap_err(); |
292 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap_err(); |
293 | | //! ``` |
294 | | //! ``` |
295 | | //! # use serde::Deserialize; |
296 | | //! # type T = (); |
297 | | //! // Get only the one element, ignore the other |
298 | | //! # #[derive(Debug, PartialEq)] |
299 | | //! #[derive(Deserialize)] |
300 | | //! struct AnyName { |
301 | | //! one: T, |
302 | | //! } |
303 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap(); |
304 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap(); |
305 | | //! ``` |
306 | | //! ``` |
307 | | //! # use serde::Deserialize; |
308 | | //! // Ignore all elements |
309 | | //! // You can also use the `()` type (unit type) |
310 | | //! # #[derive(Debug, PartialEq)] |
311 | | //! #[derive(Deserialize)] |
312 | | //! struct AnyName; |
313 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap(); |
314 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap(); |
315 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap(); |
316 | | //! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap(); |
317 | | //! ``` |
318 | | //! |
319 | | //! All these structs can be used to deserialize from an XML on the |
320 | | //! left side depending on amount of information that you want to get. |
321 | | //! Of course, you can combine them with attributes extractor structs (see above). |
322 | | //! |
323 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
324 | | //! |
325 | | //! NOTE: XML allows you to have an attribute and an element with the same name |
326 | | //! inside the one element. quick-xml deals with that by prepending a `@` prefix |
327 | | //! to the name of attributes. |
328 | | //! </div> |
329 | | //! </td> |
330 | | //! </tr> |
331 | | //! <!-- 5 ===================================================================================== --> |
332 | | //! <tr> |
333 | | //! <td> |
334 | | //! An XML with an attribute and a child element named equally: |
335 | | //! |
336 | | //! ```xml |
337 | | //! <any-tag field="..."> |
338 | | //! <field>...</field> |
339 | | //! </any-tag> |
340 | | //! ``` |
341 | | //! </td> |
342 | | //! <td> |
343 | | //! |
344 | | //! You MUST specify `#[serde(rename = "@field")]` on a field that will be used |
345 | | //! for an attribute: |
346 | | //! |
347 | | //! ``` |
348 | | //! # use pretty_assertions::assert_eq; |
349 | | //! # use serde::Deserialize; |
350 | | //! # type T = (); |
351 | | //! # type U = (); |
352 | | //! # #[derive(Debug, PartialEq)] |
353 | | //! #[derive(Deserialize)] |
354 | | //! struct AnyName { |
355 | | //! #[serde(rename = "@field")] |
356 | | //! attribute: T, |
357 | | //! field: U, |
358 | | //! } |
359 | | //! # assert_eq!( |
360 | | //! # AnyName { attribute: (), field: () }, |
361 | | //! # quick_xml::de::from_str(r#" |
362 | | //! # <any-tag field="..."> |
363 | | //! # <field>...</field> |
364 | | //! # </any-tag> |
365 | | //! # "#).unwrap(), |
366 | | //! # ); |
367 | | //! ``` |
368 | | //! </td> |
369 | | //! </tr> |
370 | | //! <!-- ======================================================================================= --> |
371 | | //! <tr><th colspan="2"> |
372 | | //! |
373 | | //! ## Optional attributes and elements |
374 | | //! |
375 | | //! </th></tr> |
376 | | //! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr> |
377 | | //! <!-- 6 ===================================================================================== --> |
378 | | //! <tr> |
379 | | //! <td> |
380 | | //! An optional XML attribute that you want to capture. |
381 | | //! The root tag name does not matter: |
382 | | //! |
383 | | //! ```xml |
384 | | //! <any-tag optional="..."/> |
385 | | //! ``` |
386 | | //! ```xml |
387 | | //! <any-tag/> |
388 | | //! ``` |
389 | | //! </td> |
390 | | //! <td> |
391 | | //! |
392 | | //! A structure with an optional field, renamed according to the requirements |
393 | | //! for attributes: |
394 | | //! |
395 | | //! ``` |
396 | | //! # use pretty_assertions::assert_eq; |
397 | | //! # use serde::Deserialize; |
398 | | //! # type T = (); |
399 | | //! # #[derive(Debug, PartialEq)] |
400 | | //! #[derive(Deserialize)] |
401 | | //! struct AnyName { |
402 | | //! #[serde(rename = "@optional")] |
403 | | //! optional: Option<T>, |
404 | | //! } |
405 | | //! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag optional="..."/>"#).unwrap()); |
406 | | //! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap()); |
407 | | //! ``` |
408 | | //! When the XML attribute is present, type `T` will be deserialized from |
409 | | //! an attribute value (which is a string). Note, that if `T = String` or other |
410 | | //! string type, the empty attribute is mapped to a `Some("")`, whereas `None` |
411 | | //! represents the missed attribute: |
412 | | //! ```xml |
413 | | //! <any-tag optional="..."/><!-- Some("...") --> |
414 | | //! <any-tag optional=""/> <!-- Some("") --> |
415 | | //! <any-tag/> <!-- None --> |
416 | | //! ``` |
417 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
418 | | //! |
419 | | //! NOTE: The behaviour is not symmetric by default. `None` will be serialized as |
420 | | //! `optional=""`. This behaviour is consistent across serde crates. You should add |
421 | | //! `#[serde(skip_serializing_if = "Option::is_none")]` attribute to the field to |
422 | | //! skip `None`s. |
423 | | //! </div> |
424 | | //! </td> |
425 | | //! </tr> |
426 | | //! <!-- 7 ===================================================================================== --> |
427 | | //! <tr> |
428 | | //! <td> |
429 | | //! An optional XML elements that you want to capture. |
430 | | //! The root tag name does not matter: |
431 | | //! |
432 | | //! ```xml |
433 | | //! <any-tag/> |
434 | | //! <optional>...</optional> |
435 | | //! </any-tag> |
436 | | //! ``` |
437 | | //! ```xml |
438 | | //! <any-tag/> |
439 | | //! <optional/> |
440 | | //! </any-tag> |
441 | | //! ``` |
442 | | //! ```xml |
443 | | //! <any-tag/> |
444 | | //! ``` |
445 | | //! </td> |
446 | | //! <td> |
447 | | //! |
448 | | //! A structure with an optional field: |
449 | | //! |
450 | | //! ``` |
451 | | //! # use pretty_assertions::assert_eq; |
452 | | //! # use serde::Deserialize; |
453 | | //! # type T = (); |
454 | | //! # #[derive(Debug, PartialEq)] |
455 | | //! #[derive(Deserialize)] |
456 | | //! struct AnyName { |
457 | | //! optional: Option<T>, |
458 | | //! } |
459 | | //! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag><optional>...</optional></any-tag>"#).unwrap()); |
460 | | //! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap()); |
461 | | //! ``` |
462 | | //! When the XML element is present, type `T` will be deserialized from an |
463 | | //! element (which is a string or a multi-mapping -- i.e. mapping which can have |
464 | | //! duplicated keys). |
465 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
466 | | //! |
467 | | //! NOTE: The behaviour is not symmetric by default. `None` will be serialized as |
468 | | //! `<optional/>`. This behaviour is consistent across serde crates. You should add |
469 | | //! `#[serde(skip_serializing_if = "Option::is_none")]` attribute to the field to |
470 | | //! skip `None`s. |
471 | | //! |
472 | | //! NOTE: Deserializer will automatically handle a [`xsi:nil`] attribute and set field to `None`. |
473 | | //! For more info see [Mapping of `xsi:nil`](#mapping-of-xsinil). |
474 | | //! </div> |
475 | | //! </td> |
476 | | //! </tr> |
477 | | //! <!-- ======================================================================================= --> |
478 | | //! <tr><th colspan="2"> |
479 | | //! |
480 | | //! ## Choices (`xs:choice` XML Schema type) |
481 | | //! |
482 | | //! </th></tr> |
483 | | //! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr> |
484 | | //! <!-- 8 ===================================================================================== --> |
485 | | //! <tr> |
486 | | //! <td> |
487 | | //! An XML with different root tag names, as well as text / CDATA content: |
488 | | //! |
489 | | //! ```xml |
490 | | //! <one field1="...">...</one> |
491 | | //! ``` |
492 | | //! ```xml |
493 | | //! <two> |
494 | | //! <field2>...</field2> |
495 | | //! </two> |
496 | | //! ``` |
497 | | //! ```xml |
498 | | //! Text <![CDATA[or (mixed) |
499 | | //! CDATA]]> content |
500 | | //! ``` |
501 | | //! </td> |
502 | | //! <td> |
503 | | //! |
504 | | //! An enum where each variant has the name of a possible root tag. The name of |
505 | | //! the enum itself does not matter. |
506 | | //! |
507 | | //! If you need to get the textual content, mark a variant with `#[serde(rename = "$text")]`. |
508 | | //! |
509 | | //! All these structs can be used to deserialize from any XML on the |
510 | | //! left side depending on amount of information that you want to get: |
511 | | //! |
512 | | //! ``` |
513 | | //! # use pretty_assertions::assert_eq; |
514 | | //! # use serde::Deserialize; |
515 | | //! # type T = (); |
516 | | //! # type U = (); |
517 | | //! # #[derive(Debug, PartialEq)] |
518 | | //! #[derive(Deserialize)] |
519 | | //! #[serde(rename_all = "snake_case")] |
520 | | //! enum AnyName { |
521 | | //! One { #[serde(rename = "@field1")] field1: T }, |
522 | | //! Two { field2: U }, |
523 | | //! |
524 | | //! /// Use unit variant, if you do not care of a content. |
525 | | //! /// You can use tuple variant if you want to parse |
526 | | //! /// textual content as an xs:list. |
527 | | //! /// Struct variants are will pass a string to the |
528 | | //! /// struct enum variant visitor, which typically |
529 | | //! /// returns Err(Custom) |
530 | | //! #[serde(rename = "$text")] |
531 | | //! Text(String), |
532 | | //! } |
533 | | //! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap()); |
534 | | //! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap()); |
535 | | //! # assert_eq!(AnyName::Text("text cdata ".into()), quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap()); |
536 | | //! ``` |
537 | | //! ``` |
538 | | //! # use pretty_assertions::assert_eq; |
539 | | //! # use serde::Deserialize; |
540 | | //! # type T = (); |
541 | | //! # #[derive(Debug, PartialEq)] |
542 | | //! #[derive(Deserialize)] |
543 | | //! struct Two { |
544 | | //! field2: T, |
545 | | //! } |
546 | | //! # #[derive(Debug, PartialEq)] |
547 | | //! #[derive(Deserialize)] |
548 | | //! #[serde(rename_all = "snake_case")] |
549 | | //! enum AnyName { |
550 | | //! // `field1` content discarded |
551 | | //! One, |
552 | | //! Two(Two), |
553 | | //! #[serde(rename = "$text")] |
554 | | //! Text, |
555 | | //! } |
556 | | //! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap()); |
557 | | //! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap()); |
558 | | //! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap()); |
559 | | //! ``` |
560 | | //! ``` |
561 | | //! # use pretty_assertions::assert_eq; |
562 | | //! # use serde::Deserialize; |
563 | | //! # #[derive(Debug, PartialEq)] |
564 | | //! #[derive(Deserialize)] |
565 | | //! #[serde(rename_all = "snake_case")] |
566 | | //! enum AnyName { |
567 | | //! One, |
568 | | //! // the <two> and textual content will be mapped to this |
569 | | //! #[serde(other)] |
570 | | //! Other, |
571 | | //! } |
572 | | //! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap()); |
573 | | //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap()); |
574 | | //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap()); |
575 | | //! ``` |
576 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
577 | | //! |
578 | | //! NOTE: You should have variants for all possible tag names in your enum |
579 | | //! or have an `#[serde(other)]` variant. |
580 | | //! <!-- TODO: document an error type if that requirement is violated --> |
581 | | //! </div> |
582 | | //! </td> |
583 | | //! </tr> |
584 | | //! <!-- 9 ===================================================================================== --> |
585 | | //! <tr> |
586 | | //! <td> |
587 | | //! |
588 | | //! `<xs:choice>` embedded in the other element, and at the same time you want |
589 | | //! to get access to other attributes that can appear in the same container |
590 | | //! (`<any-tag>`). Also this case can be described, as if you want to choose |
591 | | //! Rust enum variant based on a tag name: |
592 | | //! |
593 | | //! ```xml |
594 | | //! <any-tag field="..."> |
595 | | //! <one>...</one> |
596 | | //! </any-tag> |
597 | | //! ``` |
598 | | //! ```xml |
599 | | //! <any-tag field="..."> |
600 | | //! <two>...</two> |
601 | | //! </any-tag> |
602 | | //! ``` |
603 | | //! ```xml |
604 | | //! <any-tag field="..."> |
605 | | //! Text <![CDATA[or (mixed) |
606 | | //! CDATA]]> content |
607 | | //! </any-tag> |
608 | | //! ``` |
609 | | //! </td> |
610 | | //! <td> |
611 | | //! |
612 | | //! A structure with a field which type is an `enum`. |
613 | | //! |
614 | | //! If you need to get a textual content, mark a variant with `#[serde(rename = "$text")]`. |
615 | | //! |
616 | | //! Names of the enum, struct, and struct field with `Choice` type does not matter: |
617 | | //! |
618 | | //! ``` |
619 | | //! # use pretty_assertions::assert_eq; |
620 | | //! # use serde::Deserialize; |
621 | | //! # type T = (); |
622 | | //! # #[derive(Debug, PartialEq)] |
623 | | //! #[derive(Deserialize)] |
624 | | //! #[serde(rename_all = "snake_case")] |
625 | | //! enum Choice { |
626 | | //! One, |
627 | | //! Two, |
628 | | //! |
629 | | //! /// Use unit variant, if you do not care of a content. |
630 | | //! /// You can use tuple variant if you want to parse |
631 | | //! /// textual content as an xs:list. |
632 | | //! /// Struct variants are will pass a string to the |
633 | | //! /// struct enum variant visitor, which typically |
634 | | //! /// returns Err(Custom) |
635 | | //! #[serde(rename = "$text")] |
636 | | //! Text(String), |
637 | | //! } |
638 | | //! # #[derive(Debug, PartialEq)] |
639 | | //! #[derive(Deserialize)] |
640 | | //! struct AnyName { |
641 | | //! #[serde(rename = "@field")] |
642 | | //! field: T, |
643 | | //! |
644 | | //! #[serde(rename = "$value")] |
645 | | //! any_name: Choice, |
646 | | //! } |
647 | | //! # assert_eq!( |
648 | | //! # AnyName { field: (), any_name: Choice::One }, |
649 | | //! # quick_xml::de::from_str(r#"<any-tag field="..."><one>...</one></any-tag>"#).unwrap(), |
650 | | //! # ); |
651 | | //! # assert_eq!( |
652 | | //! # AnyName { field: (), any_name: Choice::Two }, |
653 | | //! # quick_xml::de::from_str(r#"<any-tag field="..."><two>...</two></any-tag>"#).unwrap(), |
654 | | //! # ); |
655 | | //! # assert_eq!( |
656 | | //! # AnyName { field: (), any_name: Choice::Text("text cdata ".into()) }, |
657 | | //! # quick_xml::de::from_str(r#"<any-tag field="...">text <![CDATA[ cdata ]]></any-tag>"#).unwrap(), |
658 | | //! # ); |
659 | | //! ``` |
660 | | //! </td> |
661 | | //! </tr> |
662 | | //! <!-- 10 ==================================================================================== --> |
663 | | //! <tr> |
664 | | //! <td> |
665 | | //! |
666 | | //! `<xs:choice>` embedded in the other element, and at the same time you want |
667 | | //! to get access to other elements that can appear in the same container |
668 | | //! (`<any-tag>`). Also this case can be described, as if you want to choose |
669 | | //! Rust enum variant based on a tag name: |
670 | | //! |
671 | | //! ```xml |
672 | | //! <any-tag> |
673 | | //! <field>...</field> |
674 | | //! <one>...</one> |
675 | | //! </any-tag> |
676 | | //! ``` |
677 | | //! ```xml |
678 | | //! <any-tag> |
679 | | //! <two>...</two> |
680 | | //! <field>...</field> |
681 | | //! </any-tag> |
682 | | //! ``` |
683 | | //! </td> |
684 | | //! <td> |
685 | | //! |
686 | | //! A structure with a field which type is an `enum`. |
687 | | //! |
688 | | //! Names of the enum, struct, and struct field with `Choice` type does not matter: |
689 | | //! |
690 | | //! ``` |
691 | | //! # use pretty_assertions::assert_eq; |
692 | | //! # use serde::Deserialize; |
693 | | //! # type T = (); |
694 | | //! # #[derive(Debug, PartialEq)] |
695 | | //! #[derive(Deserialize)] |
696 | | //! #[serde(rename_all = "snake_case")] |
697 | | //! enum Choice { |
698 | | //! One, |
699 | | //! Two, |
700 | | //! } |
701 | | //! # #[derive(Debug, PartialEq)] |
702 | | //! #[derive(Deserialize)] |
703 | | //! struct AnyName { |
704 | | //! field: T, |
705 | | //! |
706 | | //! #[serde(rename = "$value")] |
707 | | //! any_name: Choice, |
708 | | //! } |
709 | | //! # assert_eq!( |
710 | | //! # AnyName { field: (), any_name: Choice::One }, |
711 | | //! # quick_xml::de::from_str(r#"<any-tag><field>...</field><one>...</one></any-tag>"#).unwrap(), |
712 | | //! # ); |
713 | | //! # assert_eq!( |
714 | | //! # AnyName { field: (), any_name: Choice::Two }, |
715 | | //! # quick_xml::de::from_str(r#"<any-tag><two>...</two><field>...</field></any-tag>"#).unwrap(), |
716 | | //! # ); |
717 | | //! ``` |
718 | | //! |
719 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
720 | | //! |
721 | | //! NOTE: if your `Choice` enum would contain an `#[serde(other)]` |
722 | | //! variant, element `<field>` will be mapped to the `field` and not to the enum |
723 | | //! variant. |
724 | | //! </div> |
725 | | //! |
726 | | //! </td> |
727 | | //! </tr> |
728 | | //! <!-- 11 ==================================================================================== --> |
729 | | //! <tr> |
730 | | //! <td> |
731 | | //! |
732 | | //! `<xs:choice>` encapsulated in other element with a fixed name: |
733 | | //! |
734 | | //! ```xml |
735 | | //! <any-tag field="..."> |
736 | | //! <choice> |
737 | | //! <one>...</one> |
738 | | //! </choice> |
739 | | //! </any-tag> |
740 | | //! ``` |
741 | | //! ```xml |
742 | | //! <any-tag field="..."> |
743 | | //! <choice> |
744 | | //! <two>...</two> |
745 | | //! </choice> |
746 | | //! </any-tag> |
747 | | //! ``` |
748 | | //! </td> |
749 | | //! <td> |
750 | | //! |
751 | | //! A structure with a field of an intermediate type with one field of `enum` type. |
752 | | //! Actually, this example is not necessary, because you can construct it by yourself |
753 | | //! using the composition rules that were described above. However the XML construction |
754 | | //! described here is very common, so it is shown explicitly. |
755 | | //! |
756 | | //! Names of the enum and struct does not matter: |
757 | | //! |
758 | | //! ``` |
759 | | //! # use pretty_assertions::assert_eq; |
760 | | //! # use serde::Deserialize; |
761 | | //! # type T = (); |
762 | | //! # #[derive(Debug, PartialEq)] |
763 | | //! #[derive(Deserialize)] |
764 | | //! #[serde(rename_all = "snake_case")] |
765 | | //! enum Choice { |
766 | | //! One, |
767 | | //! Two, |
768 | | //! } |
769 | | //! # #[derive(Debug, PartialEq)] |
770 | | //! #[derive(Deserialize)] |
771 | | //! struct Holder { |
772 | | //! #[serde(rename = "$value")] |
773 | | //! any_name: Choice, |
774 | | //! } |
775 | | //! # #[derive(Debug, PartialEq)] |
776 | | //! #[derive(Deserialize)] |
777 | | //! struct AnyName { |
778 | | //! #[serde(rename = "@field")] |
779 | | //! field: T, |
780 | | //! |
781 | | //! choice: Holder, |
782 | | //! } |
783 | | //! # assert_eq!( |
784 | | //! # AnyName { field: (), choice: Holder { any_name: Choice::One } }, |
785 | | //! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><one>...</one></choice></any-tag>"#).unwrap(), |
786 | | //! # ); |
787 | | //! # assert_eq!( |
788 | | //! # AnyName { field: (), choice: Holder { any_name: Choice::Two } }, |
789 | | //! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><two>...</two></choice></any-tag>"#).unwrap(), |
790 | | //! # ); |
791 | | //! ``` |
792 | | //! </td> |
793 | | //! </tr> |
794 | | //! <!-- 12 ==================================================================================== --> |
795 | | //! <tr> |
796 | | //! <td> |
797 | | //! |
798 | | //! `<xs:choice>` encapsulated in other element with a fixed name: |
799 | | //! |
800 | | //! ```xml |
801 | | //! <any-tag> |
802 | | //! <field>...</field> |
803 | | //! <choice> |
804 | | //! <one>...</one> |
805 | | //! </choice> |
806 | | //! </any-tag> |
807 | | //! ``` |
808 | | //! ```xml |
809 | | //! <any-tag> |
810 | | //! <choice> |
811 | | //! <two>...</two> |
812 | | //! </choice> |
813 | | //! <field>...</field> |
814 | | //! </any-tag> |
815 | | //! ``` |
816 | | //! </td> |
817 | | //! <td> |
818 | | //! |
819 | | //! A structure with a field of an intermediate type with one field of `enum` type. |
820 | | //! Actually, this example is not necessary, because you can construct it by yourself |
821 | | //! using the composition rules that were described above. However the XML construction |
822 | | //! described here is very common, so it is shown explicitly. |
823 | | //! |
824 | | //! Names of the enum and struct does not matter: |
825 | | //! |
826 | | //! ``` |
827 | | //! # use pretty_assertions::assert_eq; |
828 | | //! # use serde::Deserialize; |
829 | | //! # type T = (); |
830 | | //! # #[derive(Debug, PartialEq)] |
831 | | //! #[derive(Deserialize)] |
832 | | //! #[serde(rename_all = "snake_case")] |
833 | | //! enum Choice { |
834 | | //! One, |
835 | | //! Two, |
836 | | //! } |
837 | | //! # #[derive(Debug, PartialEq)] |
838 | | //! #[derive(Deserialize)] |
839 | | //! struct Holder { |
840 | | //! #[serde(rename = "$value")] |
841 | | //! any_name: Choice, |
842 | | //! } |
843 | | //! # #[derive(Debug, PartialEq)] |
844 | | //! #[derive(Deserialize)] |
845 | | //! struct AnyName { |
846 | | //! field: T, |
847 | | //! |
848 | | //! choice: Holder, |
849 | | //! } |
850 | | //! # assert_eq!( |
851 | | //! # AnyName { field: (), choice: Holder { any_name: Choice::One } }, |
852 | | //! # quick_xml::de::from_str(r#"<any-tag><field>...</field><choice><one>...</one></choice></any-tag>"#).unwrap(), |
853 | | //! # ); |
854 | | //! # assert_eq!( |
855 | | //! # AnyName { field: (), choice: Holder { any_name: Choice::Two } }, |
856 | | //! # quick_xml::de::from_str(r#"<any-tag><choice><two>...</two></choice><field>...</field></any-tag>"#).unwrap(), |
857 | | //! # ); |
858 | | //! ``` |
859 | | //! </td> |
860 | | //! </tr> |
861 | | //! <!-- ======================================================================================== --> |
862 | | //! <tr><th colspan="2"> |
863 | | //! |
864 | | //! ## Sequences (`xs:all` and `xs:sequence` XML Schema types) |
865 | | //! |
866 | | //! </th></tr> |
867 | | //! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr> |
868 | | //! <!-- 13 ==================================================================================== --> |
869 | | //! <tr> |
870 | | //! <td> |
871 | | //! A sequence inside of a tag without a dedicated name: |
872 | | //! |
873 | | //! ```xml |
874 | | //! <any-tag/> |
875 | | //! ``` |
876 | | //! ```xml |
877 | | //! <any-tag> |
878 | | //! <item/> |
879 | | //! </any-tag> |
880 | | //! ``` |
881 | | //! ```xml |
882 | | //! <any-tag> |
883 | | //! <item/> |
884 | | //! <item/> |
885 | | //! <item/> |
886 | | //! </any-tag> |
887 | | //! ``` |
888 | | //! </td> |
889 | | //! <td> |
890 | | //! |
891 | | //! A structure with a field which is a sequence type, for example, [`Vec`]. |
892 | | //! Because XML syntax does not distinguish between empty sequences and missed |
893 | | //! elements, we should indicate that on the Rust side, because serde will require |
894 | | //! that field `item` exists. You can do that in two possible ways: |
895 | | //! |
896 | | //! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]: |
897 | | //! ``` |
898 | | //! # use pretty_assertions::assert_eq; |
899 | | //! # use serde::Deserialize; |
900 | | //! # type Item = (); |
901 | | //! # #[derive(Debug, PartialEq)] |
902 | | //! #[derive(Deserialize)] |
903 | | //! struct AnyName { |
904 | | //! #[serde(default)] |
905 | | //! item: Vec<Item>, |
906 | | //! } |
907 | | //! # assert_eq!( |
908 | | //! # AnyName { item: vec![] }, |
909 | | //! # quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(), |
910 | | //! # ); |
911 | | //! # assert_eq!( |
912 | | //! # AnyName { item: vec![()] }, |
913 | | //! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(), |
914 | | //! # ); |
915 | | //! # assert_eq!( |
916 | | //! # AnyName { item: vec![(), (), ()] }, |
917 | | //! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(), |
918 | | //! # ); |
919 | | //! ``` |
920 | | //! |
921 | | //! Use the [`Option`]. In that case inner array will always contains at least one |
922 | | //! element after deserialization: |
923 | | //! ```ignore |
924 | | //! # use pretty_assertions::assert_eq; |
925 | | //! # use serde::Deserialize; |
926 | | //! # type Item = (); |
927 | | //! # #[derive(Debug, PartialEq)] |
928 | | //! #[derive(Deserialize)] |
929 | | //! struct AnyName { |
930 | | //! item: Option<Vec<Item>>, |
931 | | //! } |
932 | | //! # assert_eq!( |
933 | | //! # AnyName { item: None }, |
934 | | //! # quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(), |
935 | | //! # ); |
936 | | //! # assert_eq!( |
937 | | //! # AnyName { item: Some(vec![()]) }, |
938 | | //! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(), |
939 | | //! # ); |
940 | | //! # assert_eq!( |
941 | | //! # AnyName { item: Some(vec![(), (), ()]) }, |
942 | | //! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(), |
943 | | //! # ); |
944 | | //! ``` |
945 | | //! |
946 | | //! See also [Frequently Used Patterns](#element-lists). |
947 | | //! |
948 | | //! [field]: https://serde.rs/field-attrs.html#default |
949 | | //! [struct]: https://serde.rs/container-attrs.html#default |
950 | | //! </td> |
951 | | //! </tr> |
952 | | //! <!-- 14 ==================================================================================== --> |
953 | | //! <tr> |
954 | | //! <td> |
955 | | //! A sequence with a strict order, probably with mixed content |
956 | | //! (text / CDATA and tags): |
957 | | //! |
958 | | //! ```xml |
959 | | //! <one>...</one> |
960 | | //! text |
961 | | //! <![CDATA[cdata]]> |
962 | | //! <two>...</two> |
963 | | //! <one>...</one> |
964 | | //! ``` |
965 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
966 | | //! |
967 | | //! NOTE: this is just an example for showing mapping. XML does not allow |
968 | | //! multiple root tags -- you should wrap the sequence into a tag. |
969 | | //! </div> |
970 | | //! </td> |
971 | | //! <td> |
972 | | //! |
973 | | //! All elements mapped to the heterogeneous sequential type: tuple or named tuple. |
974 | | //! Each element of the tuple should be able to be deserialized from the nested |
975 | | //! element content (`...`), except the enum types which would be deserialized |
976 | | //! from the full element (`<one>...</one>`), so they could use the element name |
977 | | //! to choose the right variant: |
978 | | //! |
979 | | //! ``` |
980 | | //! # use pretty_assertions::assert_eq; |
981 | | //! # use serde::Deserialize; |
982 | | //! # type One = (); |
983 | | //! # type Two = (); |
984 | | //! # /* |
985 | | //! type One = ...; |
986 | | //! type Two = ...; |
987 | | //! # */ |
988 | | //! # #[derive(Debug, PartialEq)] |
989 | | //! #[derive(Deserialize)] |
990 | | //! struct AnyName(One, String, Two, One); |
991 | | //! # assert_eq!( |
992 | | //! # AnyName((), "text cdata".into(), (), ()), |
993 | | //! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(), |
994 | | //! # ); |
995 | | //! ``` |
996 | | //! ``` |
997 | | //! # use pretty_assertions::assert_eq; |
998 | | //! # use serde::Deserialize; |
999 | | //! # #[derive(Debug, PartialEq)] |
1000 | | //! #[derive(Deserialize)] |
1001 | | //! #[serde(rename_all = "snake_case")] |
1002 | | //! enum Choice { |
1003 | | //! One, |
1004 | | //! } |
1005 | | //! # type Two = (); |
1006 | | //! # /* |
1007 | | //! type Two = ...; |
1008 | | //! # */ |
1009 | | //! type AnyName = (Choice, String, Two, Choice); |
1010 | | //! # assert_eq!( |
1011 | | //! # (Choice::One, "text cdata".to_string(), (), Choice::One), |
1012 | | //! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(), |
1013 | | //! # ); |
1014 | | //! ``` |
1015 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
1016 | | //! |
1017 | | //! NOTE: consequent text and CDATA nodes are merged into the one text node, |
1018 | | //! so you cannot have two adjacent string types in your sequence. |
1019 | | //! |
1020 | | //! NOTE: In the case that the list might contain tags that are overlapped with |
1021 | | //! tags that do not correspond to the list you should add the feature [`overlapped-lists`]. |
1022 | | //! </div> |
1023 | | //! </td> |
1024 | | //! </tr> |
1025 | | //! <!-- 15 ==================================================================================== --> |
1026 | | //! <tr> |
1027 | | //! <td> |
1028 | | //! A sequence with a non-strict order, probably with a mixed content |
1029 | | //! (text / CDATA and tags). |
1030 | | //! |
1031 | | //! ```xml |
1032 | | //! <one>...</one> |
1033 | | //! text |
1034 | | //! <![CDATA[cdata]]> |
1035 | | //! <two>...</two> |
1036 | | //! <one>...</one> |
1037 | | //! ``` |
1038 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
1039 | | //! |
1040 | | //! NOTE: this is just an example for showing mapping. XML does not allow |
1041 | | //! multiple root tags -- you should wrap the sequence into a tag. |
1042 | | //! </div> |
1043 | | //! </td> |
1044 | | //! <td> |
1045 | | //! A homogeneous sequence of elements with a fixed or dynamic size: |
1046 | | //! |
1047 | | //! ``` |
1048 | | //! # use pretty_assertions::assert_eq; |
1049 | | //! # use serde::Deserialize; |
1050 | | //! # #[derive(Debug, PartialEq)] |
1051 | | //! #[derive(Deserialize)] |
1052 | | //! #[serde(rename_all = "snake_case")] |
1053 | | //! enum Choice { |
1054 | | //! One, |
1055 | | //! Two, |
1056 | | //! #[serde(other)] |
1057 | | //! Other, |
1058 | | //! } |
1059 | | //! type AnyName = [Choice; 4]; |
1060 | | //! # assert_eq!( |
1061 | | //! # [Choice::One, Choice::Other, Choice::Two, Choice::One], |
1062 | | //! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(), |
1063 | | //! # ); |
1064 | | //! ``` |
1065 | | //! ``` |
1066 | | //! # use pretty_assertions::assert_eq; |
1067 | | //! # use serde::Deserialize; |
1068 | | //! # #[derive(Debug, PartialEq)] |
1069 | | //! #[derive(Deserialize)] |
1070 | | //! #[serde(rename_all = "snake_case")] |
1071 | | //! enum Choice { |
1072 | | //! One, |
1073 | | //! Two, |
1074 | | //! #[serde(rename = "$text")] |
1075 | | //! Other(String), |
1076 | | //! } |
1077 | | //! type AnyName = Vec<Choice>; |
1078 | | //! # assert_eq!( |
1079 | | //! # vec![ |
1080 | | //! # Choice::One, |
1081 | | //! # Choice::Other("text cdata".into()), |
1082 | | //! # Choice::Two, |
1083 | | //! # Choice::One, |
1084 | | //! # ], |
1085 | | //! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(), |
1086 | | //! # ); |
1087 | | //! ``` |
1088 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
1089 | | //! |
1090 | | //! NOTE: consequent text and CDATA nodes are merged into the one text node, |
1091 | | //! so you cannot have two adjacent string types in your sequence. |
1092 | | //! </div> |
1093 | | //! </td> |
1094 | | //! </tr> |
1095 | | //! <!-- 16 ==================================================================================== --> |
1096 | | //! <tr> |
1097 | | //! <td> |
1098 | | //! A sequence with a strict order, probably with a mixed content, |
1099 | | //! (text and tags) inside of the other element: |
1100 | | //! |
1101 | | //! ```xml |
1102 | | //! <any-tag attribute="..."> |
1103 | | //! <one>...</one> |
1104 | | //! text |
1105 | | //! <![CDATA[cdata]]> |
1106 | | //! <two>...</two> |
1107 | | //! <one>...</one> |
1108 | | //! </any-tag> |
1109 | | //! ``` |
1110 | | //! </td> |
1111 | | //! <td> |
1112 | | //! |
1113 | | //! A structure where all child elements mapped to the one field which have |
1114 | | //! a heterogeneous sequential type: tuple or named tuple. Each element of the |
1115 | | //! tuple should be able to be deserialized from the full element (`<one>...</one>`). |
1116 | | //! |
1117 | | //! You MUST specify `#[serde(rename = "$value")]` on that field: |
1118 | | //! |
1119 | | //! ``` |
1120 | | //! # use pretty_assertions::assert_eq; |
1121 | | //! # use serde::Deserialize; |
1122 | | //! # type One = (); |
1123 | | //! # type Two = (); |
1124 | | //! # /* |
1125 | | //! type One = ...; |
1126 | | //! type Two = ...; |
1127 | | //! # */ |
1128 | | //! |
1129 | | //! # #[derive(Debug, PartialEq)] |
1130 | | //! #[derive(Deserialize)] |
1131 | | //! struct AnyName { |
1132 | | //! #[serde(rename = "@attribute")] |
1133 | | //! # attribute: (), |
1134 | | //! # /* |
1135 | | //! attribute: ..., |
1136 | | //! # */ |
1137 | | //! // Does not (yet?) supported by the serde |
1138 | | //! // https://github.com/serde-rs/serde/issues/1905 |
1139 | | //! // #[serde(flatten)] |
1140 | | //! #[serde(rename = "$value")] |
1141 | | //! any_name: (One, String, Two, One), |
1142 | | //! } |
1143 | | //! # assert_eq!( |
1144 | | //! # AnyName { attribute: (), any_name: ((), "text cdata".into(), (), ()) }, |
1145 | | //! # quick_xml::de::from_str("\ |
1146 | | //! # <any-tag attribute='...'>\ |
1147 | | //! # <one>...</one>\ |
1148 | | //! # text \ |
1149 | | //! # <![CDATA[cdata]]>\ |
1150 | | //! # <two>...</two>\ |
1151 | | //! # <one>...</one>\ |
1152 | | //! # </any-tag>" |
1153 | | //! # ).unwrap(), |
1154 | | //! # ); |
1155 | | //! ``` |
1156 | | //! ``` |
1157 | | //! # use pretty_assertions::assert_eq; |
1158 | | //! # use serde::Deserialize; |
1159 | | //! # type One = (); |
1160 | | //! # type Two = (); |
1161 | | //! # /* |
1162 | | //! type One = ...; |
1163 | | //! type Two = ...; |
1164 | | //! # */ |
1165 | | //! |
1166 | | //! # #[derive(Debug, PartialEq)] |
1167 | | //! #[derive(Deserialize)] |
1168 | | //! struct NamedTuple(One, String, Two, One); |
1169 | | //! |
1170 | | //! # #[derive(Debug, PartialEq)] |
1171 | | //! #[derive(Deserialize)] |
1172 | | //! struct AnyName { |
1173 | | //! #[serde(rename = "@attribute")] |
1174 | | //! # attribute: (), |
1175 | | //! # /* |
1176 | | //! attribute: ..., |
1177 | | //! # */ |
1178 | | //! // Does not (yet?) supported by the serde |
1179 | | //! // https://github.com/serde-rs/serde/issues/1905 |
1180 | | //! // #[serde(flatten)] |
1181 | | //! #[serde(rename = "$value")] |
1182 | | //! any_name: NamedTuple, |
1183 | | //! } |
1184 | | //! # assert_eq!( |
1185 | | //! # AnyName { attribute: (), any_name: NamedTuple((), "text cdata".into(), (), ()) }, |
1186 | | //! # quick_xml::de::from_str("\ |
1187 | | //! # <any-tag attribute='...'>\ |
1188 | | //! # <one>...</one>\ |
1189 | | //! # text \ |
1190 | | //! # <![CDATA[cdata]]>\ |
1191 | | //! # <two>...</two>\ |
1192 | | //! # <one>...</one>\ |
1193 | | //! # </any-tag>" |
1194 | | //! # ).unwrap(), |
1195 | | //! # ); |
1196 | | //! ``` |
1197 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
1198 | | //! |
1199 | | //! NOTE: consequent text and CDATA nodes are merged into the one text node, |
1200 | | //! so you cannot have two adjacent string types in your sequence. |
1201 | | //! </div> |
1202 | | //! </td> |
1203 | | //! </tr> |
1204 | | //! <!-- 17 ==================================================================================== --> |
1205 | | //! <tr> |
1206 | | //! <td> |
1207 | | //! A sequence with a non-strict order, probably with a mixed content |
1208 | | //! (text / CDATA and tags) inside of the other element: |
1209 | | //! |
1210 | | //! ```xml |
1211 | | //! <any-tag> |
1212 | | //! <one>...</one> |
1213 | | //! text |
1214 | | //! <![CDATA[cdata]]> |
1215 | | //! <two>...</two> |
1216 | | //! <one>...</one> |
1217 | | //! </any-tag> |
1218 | | //! ``` |
1219 | | //! </td> |
1220 | | //! <td> |
1221 | | //! |
1222 | | //! A structure where all child elements mapped to the one field which have |
1223 | | //! a homogeneous sequential type: array-like container. A container type `T` |
1224 | | //! should be able to be deserialized from the nested element content (`...`), |
1225 | | //! except if it is an enum type which would be deserialized from the full |
1226 | | //! element (`<one>...</one>`). |
1227 | | //! |
1228 | | //! You MUST specify `#[serde(rename = "$value")]` on that field: |
1229 | | //! |
1230 | | //! ``` |
1231 | | //! # use pretty_assertions::assert_eq; |
1232 | | //! # use serde::Deserialize; |
1233 | | //! # #[derive(Debug, PartialEq)] |
1234 | | //! #[derive(Deserialize)] |
1235 | | //! #[serde(rename_all = "snake_case")] |
1236 | | //! enum Choice { |
1237 | | //! One, |
1238 | | //! Two, |
1239 | | //! #[serde(rename = "$text")] |
1240 | | //! Other(String), |
1241 | | //! } |
1242 | | //! # #[derive(Debug, PartialEq)] |
1243 | | //! #[derive(Deserialize)] |
1244 | | //! struct AnyName { |
1245 | | //! #[serde(rename = "@attribute")] |
1246 | | //! # attribute: (), |
1247 | | //! # /* |
1248 | | //! attribute: ..., |
1249 | | //! # */ |
1250 | | //! // Does not (yet?) supported by the serde |
1251 | | //! // https://github.com/serde-rs/serde/issues/1905 |
1252 | | //! // #[serde(flatten)] |
1253 | | //! #[serde(rename = "$value")] |
1254 | | //! any_name: [Choice; 4], |
1255 | | //! } |
1256 | | //! # assert_eq!( |
1257 | | //! # AnyName { attribute: (), any_name: [ |
1258 | | //! # Choice::One, |
1259 | | //! # Choice::Other("text cdata".into()), |
1260 | | //! # Choice::Two, |
1261 | | //! # Choice::One, |
1262 | | //! # ] }, |
1263 | | //! # quick_xml::de::from_str("\ |
1264 | | //! # <any-tag attribute='...'>\ |
1265 | | //! # <one>...</one>\ |
1266 | | //! # text \ |
1267 | | //! # <![CDATA[cdata]]>\ |
1268 | | //! # <two>...</two>\ |
1269 | | //! # <one>...</one>\ |
1270 | | //! # </any-tag>" |
1271 | | //! # ).unwrap(), |
1272 | | //! # ); |
1273 | | //! ``` |
1274 | | //! ``` |
1275 | | //! # use pretty_assertions::assert_eq; |
1276 | | //! # use serde::Deserialize; |
1277 | | //! # #[derive(Debug, PartialEq)] |
1278 | | //! #[derive(Deserialize)] |
1279 | | //! #[serde(rename_all = "snake_case")] |
1280 | | //! enum Choice { |
1281 | | //! One, |
1282 | | //! Two, |
1283 | | //! #[serde(rename = "$text")] |
1284 | | //! Other(String), |
1285 | | //! } |
1286 | | //! # #[derive(Debug, PartialEq)] |
1287 | | //! #[derive(Deserialize)] |
1288 | | //! struct AnyName { |
1289 | | //! #[serde(rename = "@attribute")] |
1290 | | //! # attribute: (), |
1291 | | //! # /* |
1292 | | //! attribute: ..., |
1293 | | //! # */ |
1294 | | //! // Does not (yet?) supported by the serde |
1295 | | //! // https://github.com/serde-rs/serde/issues/1905 |
1296 | | //! // #[serde(flatten)] |
1297 | | //! #[serde(rename = "$value")] |
1298 | | //! any_name: Vec<Choice>, |
1299 | | //! } |
1300 | | //! # assert_eq!( |
1301 | | //! # AnyName { attribute: (), any_name: vec![ |
1302 | | //! # Choice::One, |
1303 | | //! # Choice::Other("text cdata".into()), |
1304 | | //! # Choice::Two, |
1305 | | //! # Choice::One, |
1306 | | //! # ] }, |
1307 | | //! # quick_xml::de::from_str("\ |
1308 | | //! # <any-tag attribute='...'>\ |
1309 | | //! # <one>...</one>\ |
1310 | | //! # text \ |
1311 | | //! # <![CDATA[cdata]]>\ |
1312 | | //! # <two>...</two>\ |
1313 | | //! # <one>...</one>\ |
1314 | | //! # </any-tag>" |
1315 | | //! # ).unwrap(), |
1316 | | //! # ); |
1317 | | //! ``` |
1318 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
1319 | | //! |
1320 | | //! NOTE: consequent text and CDATA nodes are merged into the one text node, |
1321 | | //! so you cannot have two adjacent string types in your sequence. |
1322 | | //! </div> |
1323 | | //! </td> |
1324 | | //! </tr> |
1325 | | //! </tbody> |
1326 | | //! </table> |
1327 | | //! |
1328 | | //! |
1329 | | //! Mapping of `xsi:nil` |
1330 | | //! ==================== |
1331 | | //! |
1332 | | //! quick-xml supports handling of [`xsi:nil`] special attribute. When field of optional |
1333 | | //! type is mapped to the XML element which have `xsi:nil="true"` set, or if that attribute |
1334 | | //! is placed on parent XML element, the deserializer will call [`Visitor::visit_none`] |
1335 | | //! and skip XML element corresponding to a field. |
1336 | | //! |
1337 | | //! Examples: |
1338 | | //! |
1339 | | //! ``` |
1340 | | //! # use pretty_assertions::assert_eq; |
1341 | | //! # use serde::Deserialize; |
1342 | | //! #[derive(Deserialize, Debug, PartialEq)] |
1343 | | //! struct TypeWithOptionalField { |
1344 | | //! element: Option<String>, |
1345 | | //! } |
1346 | | //! |
1347 | | //! assert_eq!( |
1348 | | //! TypeWithOptionalField { |
1349 | | //! element: None, |
1350 | | //! }, |
1351 | | //! quick_xml::de::from_str(" |
1352 | | //! <any-tag xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'> |
1353 | | //! <element xsi:nil='true'>Content is skiped because of xsi:nil='true'</element> |
1354 | | //! </any-tag> |
1355 | | //! ").unwrap(), |
1356 | | //! ); |
1357 | | //! ``` |
1358 | | //! |
1359 | | //! You can capture attributes from the optional type, because ` xsi:nil="true"` elements can have |
1360 | | //! attributes: |
1361 | | //! ``` |
1362 | | //! # use pretty_assertions::assert_eq; |
1363 | | //! # use serde::Deserialize; |
1364 | | //! #[derive(Deserialize, Debug, PartialEq)] |
1365 | | //! struct TypeWithOptionalField { |
1366 | | //! #[serde(rename = "@attribute")] |
1367 | | //! attribute: usize, |
1368 | | //! |
1369 | | //! element: Option<String>, |
1370 | | //! non_optional: String, |
1371 | | //! } |
1372 | | //! |
1373 | | //! assert_eq!( |
1374 | | //! TypeWithOptionalField { |
1375 | | //! attribute: 42, |
1376 | | //! element: None, |
1377 | | //! non_optional: "Note, that non-optional fields will be deserialized as usual".to_string(), |
1378 | | //! }, |
1379 | | //! quick_xml::de::from_str(" |
1380 | | //! <any-tag attribute='42' xsi:nil='true' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'> |
1381 | | //! <element>Content is skiped because of xsi:nil='true'</element> |
1382 | | //! <non_optional>Note, that non-optional fields will be deserialized as usual</non_optional> |
1383 | | //! </any-tag> |
1384 | | //! ").unwrap(), |
1385 | | //! ); |
1386 | | //! ``` |
1387 | | //! |
1388 | | //! Generate Rust types from XML |
1389 | | //! ============================ |
1390 | | //! |
1391 | | //! To speed up the creation of Rust types that represent a given XML file you can |
1392 | | //! use the [xml_schema_generator](https://github.com/Thomblin/xml_schema_generator). |
1393 | | //! It provides a standalone binary and a Rust library that parses one or more XML files |
1394 | | //! and generates a collection of structs that are compatible with quick_xml::de. |
1395 | | //! |
1396 | | //! |
1397 | | //! |
1398 | | //! Composition Rules |
1399 | | //! ================= |
1400 | | //! |
1401 | | //! The XML format is very different from other formats supported by `serde`. |
1402 | | //! One such difference it is how data in the serialized form is related to |
1403 | | //! the Rust type. Usually each byte in the data can be associated only with |
1404 | | //! one field in the data structure. However, XML is an exception. |
1405 | | //! |
1406 | | //! For example, took this XML: |
1407 | | //! |
1408 | | //! ```xml |
1409 | | //! <any> |
1410 | | //! <key attr="value"/> |
1411 | | //! </any> |
1412 | | //! ``` |
1413 | | //! |
1414 | | //! and try to deserialize it to the struct `AnyName`: |
1415 | | //! |
1416 | | //! ```no_run |
1417 | | //! # use serde::Deserialize; |
1418 | | //! #[derive(Deserialize)] |
1419 | | //! struct AnyName { // AnyName calls `deserialize_struct` on `<any><key attr="value"/></any>` |
1420 | | //! // Used data: ^^^^^^^^^^^^^^^^^^^ |
1421 | | //! key: Inner, // Inner calls `deserialize_struct` on `<key attr="value"/>` |
1422 | | //! // Used data: ^^^^^^^^^^^^ |
1423 | | //! } |
1424 | | //! #[derive(Deserialize)] |
1425 | | //! struct Inner { |
1426 | | //! #[serde(rename = "@attr")] |
1427 | | //! attr: String, // String calls `deserialize_string` on `value` |
1428 | | //! // Used data: ^^^^^ |
1429 | | //! } |
1430 | | //! ``` |
1431 | | //! |
1432 | | //! Comments shows what methods of a [`Deserializer`] called by each struct |
1433 | | //! `deserialize` method and which input their seen. **Used data** shows, what |
1434 | | //! content is actually used for deserializing. As you see, name of the inner |
1435 | | //! `<key>` tag used both as a map key / outer struct field name and as part |
1436 | | //! of the inner struct (although _value_ of the tag, i.e. `key` is not used |
1437 | | //! by it). |
1438 | | //! |
1439 | | //! |
1440 | | //! |
1441 | | //! Enum Representations |
1442 | | //! ==================== |
1443 | | //! |
1444 | | //! `quick-xml` represents enums differently in normal fields, `$text` fields and |
1445 | | //! `$value` fields. A normal representation is compatible with serde's adjacent |
1446 | | //! and internal tags feature -- tag for adjacently and internally tagged enums |
1447 | | //! are serialized using [`Serializer::serialize_unit_variant`] and deserialized |
1448 | | //! using [`Deserializer::deserialize_enum`]. |
1449 | | //! |
1450 | | //! Use those simple rules to remember, how enum would be represented in XML: |
1451 | | //! - In `$value` field the representation is always the same as top-level representation; |
1452 | | //! - In `$text` field the representation is always the same as in normal field, |
1453 | | //! but surrounding tags with field name are removed; |
1454 | | //! - In normal field the representation is always contains a tag with a field name. |
1455 | | //! |
1456 | | //! Normal enum variant |
1457 | | //! ------------------- |
1458 | | //! |
1459 | | //! To model an `xs:choice` XML construct use `$value` field. |
1460 | | //! To model a top-level `xs:choice` just use the enum type. |
1461 | | //! |
1462 | | //! |Kind |Top-level and in `$value` field |In normal field |In `$text` field | |
1463 | | //! |-------|-----------------------------------------|---------------------|---------------------| |
1464 | | //! |Unit |`<Unit/>` |`<field>Unit</field>`|`Unit` | |
1465 | | //! |Newtype|`<Newtype>42</Newtype>` |Err(Custom) [^0] |Err(Custom) [^0] | |
1466 | | //! |Tuple |`<Tuple>42</Tuple><Tuple>answer</Tuple>` |Err(Custom) [^0] |Err(Custom) [^0] | |
1467 | | //! |Struct |`<Struct><q>42</q><a>answer</a></Struct>`|Err(Custom) [^0] |Err(Custom) [^0] | |
1468 | | //! |
1469 | | //! `$text` enum variant |
1470 | | //! -------------------- |
1471 | | //! |
1472 | | //! |Kind |Top-level and in `$value` field |In normal field |In `$text` field | |
1473 | | //! |-------|-----------------------------------------|---------------------|---------------------| |
1474 | | //! |Unit |_(empty)_ |`<field/>` |_(empty)_ | |
1475 | | //! |Newtype|`42` |Err(Custom) [^0] [^1]|Err(Custom) [^0] [^2]| |
1476 | | //! |Tuple |`42 answer` |Err(Custom) [^0] [^3]|Err(Custom) [^0] [^4]| |
1477 | | //! |Struct |Err(Custom) [^0] |Err(Custom) [^0] |Err(Custom) [^0] | |
1478 | | //! |
1479 | | //! [^0]: Error is returned by the deserialized type. In case of derived implementation a `Custom` |
1480 | | //! error will be returned, but custom deserialize implementation can successfully deserialize |
1481 | | //! value from a string which will be passed to it. |
1482 | | //! |
1483 | | //! [^1]: If this serialize as `<field>42</field>` then it will be ambiguity during deserialization, |
1484 | | //! because it clash with `Unit` representation in normal field. |
1485 | | //! |
1486 | | //! [^2]: If this serialize as `42` then it will be ambiguity during deserialization, |
1487 | | //! because it clash with `Unit` representation in `$text` field. |
1488 | | //! |
1489 | | //! [^3]: If this serialize as `<field>42 answer</field>` then it will be ambiguity during deserialization, |
1490 | | //! because it clash with `Unit` representation in normal field. |
1491 | | //! |
1492 | | //! [^4]: If this serialize as `42 answer` then it will be ambiguity during deserialization, |
1493 | | //! because it clash with `Unit` representation in `$text` field. |
1494 | | //! |
1495 | | //! |
1496 | | //! |
1497 | | //! `$text` and `$value` special names |
1498 | | //! ================================== |
1499 | | //! |
1500 | | //! quick-xml supports two special names for fields -- `$text` and `$value`. |
1501 | | //! Although they may seem the same, there is a distinction. Two different |
1502 | | //! names is required mostly for serialization, because quick-xml should know |
1503 | | //! how you want to serialize certain constructs, which could be represented |
1504 | | //! through XML in multiple different ways. |
1505 | | //! |
1506 | | //! The only difference is in how complex types and sequences are serialized. |
1507 | | //! If you doubt which one you should select, begin with [`$value`](#value). |
1508 | | //! |
1509 | | //! If you have both `$text` and `$value` in you struct, then text events will be |
1510 | | //! mapped to the `$text` field: |
1511 | | //! |
1512 | | //! ``` |
1513 | | //! # use serde::Deserialize; |
1514 | | //! # use quick_xml::de::from_str; |
1515 | | //! #[derive(Deserialize, PartialEq, Debug)] |
1516 | | //! struct TextAndValue { |
1517 | | //! #[serde(rename = "$text")] |
1518 | | //! text: Option<String>, |
1519 | | //! |
1520 | | //! #[serde(rename = "$value")] |
1521 | | //! value: Option<String>, |
1522 | | //! } |
1523 | | //! |
1524 | | //! let object: TextAndValue = from_str("<AnyName>text <![CDATA[and CDATA]]></AnyName>").unwrap(); |
1525 | | //! assert_eq!(object, TextAndValue { |
1526 | | //! text: Some("text and CDATA".to_string()), |
1527 | | //! value: None, |
1528 | | //! }); |
1529 | | //! ``` |
1530 | | //! |
1531 | | //! ## `$text` |
1532 | | //! `$text` is used when you want to write your XML as a text or a CDATA content. |
1533 | | //! More formally, field with that name represents simple type definition with |
1534 | | //! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic, |
1535 | | //! as described in the [specification]. |
1536 | | //! |
1537 | | //! As a result, not all types of such fields can be serialized. Only serialization |
1538 | | //! of following types are supported: |
1539 | | //! - all primitive types (strings, numbers, booleans) |
1540 | | //! - unit variants of enumerations (serializes to a name of a variant) |
1541 | | //! - newtypes (delegates serialization to inner type) |
1542 | | //! - [`Option`] of above (`None` serializes to nothing) |
1543 | | //! - sequences (including tuples and tuple variants of enumerations) of above, |
1544 | | //! excluding `None` and empty string elements (because it will not be possible |
1545 | | //! to deserialize them back). The elements are separated by space(s) |
1546 | | //! - unit type `()` and unit structs (serializes to nothing) |
1547 | | //! |
1548 | | //! Complex types, such as structs and maps, are not supported in this field. |
1549 | | //! If you want them, you should use `$value`. |
1550 | | //! |
1551 | | //! Sequences serialized to a space-delimited string, that is why only certain |
1552 | | //! types are allowed in this mode: |
1553 | | //! |
1554 | | //! ``` |
1555 | | //! # use serde::{Deserialize, Serialize}; |
1556 | | //! # use quick_xml::de::from_str; |
1557 | | //! # use quick_xml::se::to_string; |
1558 | | //! #[derive(Deserialize, Serialize, PartialEq, Debug)] |
1559 | | //! struct AnyName { |
1560 | | //! #[serde(rename = "$text")] |
1561 | | //! field: Vec<usize>, |
1562 | | //! } |
1563 | | //! |
1564 | | //! let obj = AnyName { field: vec![1, 2, 3] }; |
1565 | | //! let xml = to_string(&obj).unwrap(); |
1566 | | //! assert_eq!(xml, "<AnyName>1 2 3</AnyName>"); |
1567 | | //! |
1568 | | //! let object: AnyName = from_str(&xml).unwrap(); |
1569 | | //! assert_eq!(object, obj); |
1570 | | //! ``` |
1571 | | //! |
1572 | | //! ## `$value` |
1573 | | //! <div style="background:rgba(120,145,255,0.45);padding:0.75em;"> |
1574 | | //! |
1575 | | //! NOTE: a name `#content` would better explain the purpose of that field, |
1576 | | //! but `$value` is used for compatibility with other XML serde crates, which |
1577 | | //! uses that name. This will allow you to switch XML crates more smoothly if required. |
1578 | | //! </div> |
1579 | | //! |
1580 | | //! The representation of primitive types in `$value` does not differ from their |
1581 | | //! representation in `$text` fields. The difference is how sequences are serialized |
1582 | | //! and deserialized. `$value` serializes each sequence item as a separate XML element. |
1583 | | //! How the name of the XML element is chosen depends on the field's type. For |
1584 | | //! `enum`s, the variant name is used. For `struct`s, the name of the `struct` |
1585 | | //! is used. |
1586 | | //! |
1587 | | //! During deserialization, if the `$value` field is an enum, then the variant's |
1588 | | //! name is matched against. That's **not** the case with structs, however, since |
1589 | | //! `serde` does not expose type names of nested fields. This does mean that **any** |
1590 | | //! type could be deserialized into a `$value` struct-type field, so long as the |
1591 | | //! struct's fields have compatible types (or are captured as text by `String` |
1592 | | //! or similar-behaving types). This can be handy when using generic types in fields |
1593 | | //! where one knows in advance what to expect. If you do not know what to expect, |
1594 | | //! however, prefer an enum with all possible variants. |
1595 | | //! |
1596 | | //! Unit structs and unit type `()` serialize to nothing and can be deserialized |
1597 | | //! from any content. |
1598 | | //! |
1599 | | //! Serialization and deserialization of `$value` field performed as usual, except |
1600 | | //! that name for an XML element will be given by the serialized type, instead of |
1601 | | //! field. The latter allow to serialize enumerated types, where variant is encoded |
1602 | | //! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`. |
1603 | | //! |
1604 | | //! In the example below, field will be serialized as `<field/>`, because elements |
1605 | | //! get their names from the field name. It cannot be deserialized, because `Enum` |
1606 | | //! expects elements `<A/>`, `<B/>` or `<C/>`, but `AnyName` looked only for `<field/>`: |
1607 | | //! |
1608 | | //! ``` |
1609 | | //! # use serde::{Deserialize, Serialize}; |
1610 | | //! # use pretty_assertions::assert_eq; |
1611 | | //! # #[derive(PartialEq, Debug)] |
1612 | | //! #[derive(Deserialize, Serialize)] |
1613 | | //! enum Enum { A, B, C } |
1614 | | //! |
1615 | | //! # #[derive(PartialEq, Debug)] |
1616 | | //! #[derive(Deserialize, Serialize)] |
1617 | | //! struct AnyName { |
1618 | | //! // <field>A</field>, <field>B</field>, or <field>C</field> |
1619 | | //! field: Enum, |
1620 | | //! } |
1621 | | //! # assert_eq!( |
1622 | | //! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(), |
1623 | | //! # "<AnyName><field>A</field></AnyName>", |
1624 | | //! # ); |
1625 | | //! # assert_eq!( |
1626 | | //! # AnyName { field: Enum::B }, |
1627 | | //! # quick_xml::de::from_str("<root><field>B</field></root>").unwrap(), |
1628 | | //! # ); |
1629 | | //! ``` |
1630 | | //! |
1631 | | //! If you rename field to `$value`, then `field` would be serialized as `<A/>`, |
1632 | | //! `<B/>` or `<C/>`, depending on the its content. It is also possible to |
1633 | | //! deserialize it from the same elements: |
1634 | | //! |
1635 | | //! ``` |
1636 | | //! # use serde::{Deserialize, Serialize}; |
1637 | | //! # use pretty_assertions::assert_eq; |
1638 | | //! # #[derive(Deserialize, Serialize, PartialEq, Debug)] |
1639 | | //! # enum Enum { A, B, C } |
1640 | | //! # |
1641 | | //! # #[derive(PartialEq, Debug)] |
1642 | | //! #[derive(Deserialize, Serialize)] |
1643 | | //! struct AnyName { |
1644 | | //! // <A/>, <B/> or <C/> |
1645 | | //! #[serde(rename = "$value")] |
1646 | | //! field: Enum, |
1647 | | //! } |
1648 | | //! # assert_eq!( |
1649 | | //! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(), |
1650 | | //! # "<AnyName><A/></AnyName>", |
1651 | | //! # ); |
1652 | | //! # assert_eq!( |
1653 | | //! # AnyName { field: Enum::B }, |
1654 | | //! # quick_xml::de::from_str("<root><B/></root>").unwrap(), |
1655 | | //! # ); |
1656 | | //! ``` |
1657 | | //! |
1658 | | //! The next example demonstrates how generic types can be used in conjunction |
1659 | | //! with `$value`-named fields to allow the reuse of wrapping structs. A common |
1660 | | //! example use case for this feature is SOAP messages, which can be commmonly |
1661 | | //! found wrapped around `<soapenv:Envelope> ... </soapenv:Envelope>`. |
1662 | | //! |
1663 | | //! ```rust |
1664 | | //! # use pretty_assertions::assert_eq; |
1665 | | //! # use quick_xml::de::from_str; |
1666 | | //! # use quick_xml::se::to_string; |
1667 | | //! # use serde::{Deserialize, Serialize}; |
1668 | | //! # |
1669 | | //! #[derive(Deserialize, Serialize, PartialEq, Debug)] |
1670 | | //! struct Envelope<T> { |
1671 | | //! body: Body<T>, |
1672 | | //! } |
1673 | | //! |
1674 | | //! #[derive(Deserialize, Serialize, PartialEq, Debug)] |
1675 | | //! struct Body<T> { |
1676 | | //! #[serde(rename = "$value")] |
1677 | | //! inner: T, |
1678 | | //! } |
1679 | | //! |
1680 | | //! #[derive(Serialize, PartialEq, Debug)] |
1681 | | //! struct Example { |
1682 | | //! a: i32, |
1683 | | //! } |
1684 | | //! |
1685 | | //! assert_eq!( |
1686 | | //! to_string(&Envelope { body: Body { inner: Example { a: 42 } } }).unwrap(), |
1687 | | //! // Notice how `inner` is not present in the XML |
1688 | | //! "<Envelope><body><Example><a>42</a></Example></body></Envelope>", |
1689 | | //! ); |
1690 | | //! |
1691 | | //! #[derive(Deserialize, PartialEq, Debug)] |
1692 | | //! struct AnotherExample { |
1693 | | //! a: i32, |
1694 | | //! } |
1695 | | //! |
1696 | | //! assert_eq!( |
1697 | | //! // Notice that tag the name does nothing for struct in `$value` field |
1698 | | //! Envelope { body: Body { inner: AnotherExample { a: 42 } } }, |
1699 | | //! from_str("<Envelope><body><Example><a>42</a></Example></body></Envelope>").unwrap(), |
1700 | | //! ); |
1701 | | //! ``` |
1702 | | //! |
1703 | | //! ### Primitives and sequences of primitives |
1704 | | //! |
1705 | | //! Sequences serialized to a list of elements. Note, that types that does not |
1706 | | //! produce their own tag (i. e. primitives) will produce [`SeError::Unsupported`] |
1707 | | //! if they contains more that one element, because such sequence cannot be |
1708 | | //! deserialized to the same value: |
1709 | | //! |
1710 | | //! ``` |
1711 | | //! # use serde::{Deserialize, Serialize}; |
1712 | | //! # use pretty_assertions::assert_eq; |
1713 | | //! # use quick_xml::de::from_str; |
1714 | | //! # use quick_xml::se::to_string; |
1715 | | //! #[derive(Deserialize, Serialize, PartialEq, Debug)] |
1716 | | //! struct AnyName { |
1717 | | //! #[serde(rename = "$value")] |
1718 | | //! field: Vec<usize>, |
1719 | | //! } |
1720 | | //! |
1721 | | //! let obj = AnyName { field: vec![1, 2, 3] }; |
1722 | | //! // If this object were serialized, it would be represented as "<AnyName>123</AnyName>" |
1723 | | //! to_string(&obj).unwrap_err(); |
1724 | | //! |
1725 | | //! let object: AnyName = from_str("<AnyName>123</AnyName>").unwrap(); |
1726 | | //! assert_eq!(object, AnyName { field: vec![123] }); |
1727 | | //! |
1728 | | //! // `1 2 3` is mapped to a single `usize` element |
1729 | | //! // It is impossible to deserialize list of primitives to such field |
1730 | | //! from_str::<AnyName>("<AnyName>1 2 3</AnyName>").unwrap_err(); |
1731 | | //! ``` |
1732 | | //! |
1733 | | //! A particular case of that example is a string `$value` field, which probably |
1734 | | //! would be a most used example of that attribute: |
1735 | | //! |
1736 | | //! ``` |
1737 | | //! # use serde::{Deserialize, Serialize}; |
1738 | | //! # use pretty_assertions::assert_eq; |
1739 | | //! # use quick_xml::de::from_str; |
1740 | | //! # use quick_xml::se::to_string; |
1741 | | //! #[derive(Deserialize, Serialize, PartialEq, Debug)] |
1742 | | //! struct AnyName { |
1743 | | //! #[serde(rename = "$value")] |
1744 | | //! field: String, |
1745 | | //! } |
1746 | | //! |
1747 | | //! let obj = AnyName { field: "content".to_string() }; |
1748 | | //! let xml = to_string(&obj).unwrap(); |
1749 | | //! assert_eq!(xml, "<AnyName>content</AnyName>"); |
1750 | | //! ``` |
1751 | | //! |
1752 | | //! ### Structs and sequences of structs |
1753 | | //! |
1754 | | //! Note, that structures do not have a serializable name as well (name of the |
1755 | | //! type is never used), so it is impossible to serialize non-unit struct or |
1756 | | //! sequence of non-unit structs in `$value` field. (sequences of) unit structs |
1757 | | //! are serialized as empty string, because units itself serializing |
1758 | | //! to nothing: |
1759 | | //! |
1760 | | //! ``` |
1761 | | //! # use serde::{Deserialize, Serialize}; |
1762 | | //! # use pretty_assertions::assert_eq; |
1763 | | //! # use quick_xml::de::from_str; |
1764 | | //! # use quick_xml::se::to_string; |
1765 | | //! #[derive(Deserialize, Serialize, PartialEq, Debug)] |
1766 | | //! struct Unit; |
1767 | | //! |
1768 | | //! #[derive(Deserialize, Serialize, PartialEq, Debug)] |
1769 | | //! struct AnyName { |
1770 | | //! // #[serde(default)] is required to deserialization of empty lists |
1771 | | //! // This is a general note, not related to $value |
1772 | | //! #[serde(rename = "$value", default)] |
1773 | | //! field: Vec<Unit>, |
1774 | | //! } |
1775 | | //! |
1776 | | //! let obj = AnyName { field: vec![Unit, Unit, Unit] }; |
1777 | | //! let xml = to_string(&obj).unwrap(); |
1778 | | //! assert_eq!(xml, "<AnyName/>"); |
1779 | | //! |
1780 | | //! let object: AnyName = from_str("<AnyName/>").unwrap(); |
1781 | | //! assert_eq!(object, AnyName { field: vec![] }); |
1782 | | //! |
1783 | | //! let object: AnyName = from_str("<AnyName></AnyName>").unwrap(); |
1784 | | //! assert_eq!(object, AnyName { field: vec![] }); |
1785 | | //! |
1786 | | //! let object: AnyName = from_str("<AnyName><A/><B/><C/></AnyName>").unwrap(); |
1787 | | //! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] }); |
1788 | | //! ``` |
1789 | | //! |
1790 | | //! ### Enums and sequences of enums |
1791 | | //! |
1792 | | //! Enumerations uses the variant name as an element name: |
1793 | | //! |
1794 | | //! ``` |
1795 | | //! # use serde::{Deserialize, Serialize}; |
1796 | | //! # use pretty_assertions::assert_eq; |
1797 | | //! # use quick_xml::de::from_str; |
1798 | | //! # use quick_xml::se::to_string; |
1799 | | //! #[derive(Deserialize, Serialize, PartialEq, Debug)] |
1800 | | //! struct AnyName { |
1801 | | //! #[serde(rename = "$value")] |
1802 | | //! field: Vec<Enum>, |
1803 | | //! } |
1804 | | //! |
1805 | | //! #[derive(Deserialize, Serialize, PartialEq, Debug)] |
1806 | | //! enum Enum { A, B, C } |
1807 | | //! |
1808 | | //! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] }; |
1809 | | //! let xml = to_string(&obj).unwrap(); |
1810 | | //! assert_eq!( |
1811 | | //! xml, |
1812 | | //! "<AnyName>\ |
1813 | | //! <A/>\ |
1814 | | //! <B/>\ |
1815 | | //! <C/>\ |
1816 | | //! </AnyName>" |
1817 | | //! ); |
1818 | | //! |
1819 | | //! let object: AnyName = from_str(&xml).unwrap(); |
1820 | | //! assert_eq!(object, obj); |
1821 | | //! ``` |
1822 | | //! |
1823 | | //! |
1824 | | //! |
1825 | | //! Frequently Used Patterns |
1826 | | //! ======================== |
1827 | | //! |
1828 | | //! Some XML constructs used so frequent, that it is worth to document the recommended |
1829 | | //! way to represent them in the Rust. The sections below describes them. |
1830 | | //! |
1831 | | //! `<element>` lists |
1832 | | //! ----------------- |
1833 | | //! Many XML formats wrap lists of elements in the additional container, |
1834 | | //! although this is not required by the XML rules: |
1835 | | //! |
1836 | | //! ```xml |
1837 | | //! <root> |
1838 | | //! <field1/> |
1839 | | //! <field2/> |
1840 | | //! <list><!-- Container --> |
1841 | | //! <element/> |
1842 | | //! <element/> |
1843 | | //! <element/> |
1844 | | //! </list> |
1845 | | //! <field3/> |
1846 | | //! </root> |
1847 | | //! ``` |
1848 | | //! In this case, there is a great desire to describe this XML in this way: |
1849 | | //! ``` |
1850 | | //! /// Represents <element/> |
1851 | | //! type Element = (); |
1852 | | //! |
1853 | | //! /// Represents <root>...</root> |
1854 | | //! struct AnyName { |
1855 | | //! // Incorrect |
1856 | | //! list: Vec<Element>, |
1857 | | //! } |
1858 | | //! ``` |
1859 | | //! This will not work, because potentially `<list>` element can have attributes |
1860 | | //! and other elements inside. You should define the struct for the `<list>` |
1861 | | //! explicitly, as you do that in the XSD for that XML: |
1862 | | //! ``` |
1863 | | //! /// Represents <element/> |
1864 | | //! type Element = (); |
1865 | | //! |
1866 | | //! /// Represents <root>...</root> |
1867 | | //! struct AnyName { |
1868 | | //! // Correct |
1869 | | //! list: List, |
1870 | | //! } |
1871 | | //! /// Represents <list>...</list> |
1872 | | //! struct List { |
1873 | | //! element: Vec<Element>, |
1874 | | //! } |
1875 | | //! ``` |
1876 | | //! |
1877 | | //! If you want to simplify your API, you could write a simple function for unwrapping |
1878 | | //! inner list and apply it via [`deserialize_with`]: |
1879 | | //! |
1880 | | //! ``` |
1881 | | //! # use pretty_assertions::assert_eq; |
1882 | | //! use quick_xml::de::from_str; |
1883 | | //! use serde::{Deserialize, Deserializer}; |
1884 | | //! |
1885 | | //! /// Represents <element/> |
1886 | | //! type Element = (); |
1887 | | //! |
1888 | | //! /// Represents <root>...</root> |
1889 | | //! #[derive(Deserialize, Debug, PartialEq)] |
1890 | | //! struct AnyName { |
1891 | | //! #[serde(deserialize_with = "unwrap_list")] |
1892 | | //! list: Vec<Element>, |
1893 | | //! } |
1894 | | //! |
1895 | | //! fn unwrap_list<'de, D>(deserializer: D) -> Result<Vec<Element>, D::Error> |
1896 | | //! where |
1897 | | //! D: Deserializer<'de>, |
1898 | | //! { |
1899 | | //! /// Represents <list>...</list> |
1900 | | //! #[derive(Deserialize)] |
1901 | | //! struct List { |
1902 | | //! // default allows empty list |
1903 | | //! #[serde(default)] |
1904 | | //! element: Vec<Element>, |
1905 | | //! } |
1906 | | //! Ok(List::deserialize(deserializer)?.element) |
1907 | | //! } |
1908 | | //! |
1909 | | //! assert_eq!( |
1910 | | //! AnyName { list: vec![(), (), ()] }, |
1911 | | //! from_str(" |
1912 | | //! <root> |
1913 | | //! <list> |
1914 | | //! <element/> |
1915 | | //! <element/> |
1916 | | //! <element/> |
1917 | | //! </list> |
1918 | | //! </root> |
1919 | | //! ").unwrap(), |
1920 | | //! ); |
1921 | | //! ``` |
1922 | | //! |
1923 | | //! Instead of writing such functions manually, you also could try <https://lib.rs/crates/serde-query>. |
1924 | | //! |
1925 | | //! Overlapped (Out-of-Order) Elements |
1926 | | //! ---------------------------------- |
1927 | | //! In the case that the list might contain tags that are overlapped with |
1928 | | //! tags that do not correspond to the list (this is a usual case in XML |
1929 | | //! documents) like this: |
1930 | | //! ```xml |
1931 | | //! <any-name> |
1932 | | //! <item/> |
1933 | | //! <another-item/> |
1934 | | //! <item/> |
1935 | | //! <item/> |
1936 | | //! </any-name> |
1937 | | //! ``` |
1938 | | //! you should enable the [`overlapped-lists`] feature to make it possible |
1939 | | //! to deserialize this to: |
1940 | | //! ```no_run |
1941 | | //! # use serde::Deserialize; |
1942 | | //! #[derive(Deserialize)] |
1943 | | //! #[serde(rename_all = "kebab-case")] |
1944 | | //! struct AnyName { |
1945 | | //! item: Vec<()>, |
1946 | | //! another_item: (), |
1947 | | //! } |
1948 | | //! ``` |
1949 | | //! |
1950 | | //! |
1951 | | //! Internally Tagged Enums |
1952 | | //! ----------------------- |
1953 | | //! [Tagged enums] are currently not supported because of an issue in the Serde |
1954 | | //! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in |
1955 | | //! Serde which could be useful for XML parsing ([serde#1495]). This can be worked |
1956 | | //! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]` |
1957 | | //! or implementing [`Deserialize`], but this can get very tedious very fast for |
1958 | | //! files with large amounts of tagged enums. To help with this issue quick-xml |
1959 | | //! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the |
1960 | | //! macro documentation for details. |
1961 | | //! |
1962 | | //! |
1963 | | //! [`overlapped-lists`]: ../index.html#overlapped-lists |
1964 | | //! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition |
1965 | | //! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with |
1966 | | //! [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil |
1967 | | //! [`Serializer::serialize_unit_variant`]: serde::Serializer::serialize_unit_variant |
1968 | | //! [`Deserializer::deserialize_enum`]: serde::Deserializer::deserialize_enum |
1969 | | //! [`SeError::Unsupported`]: crate::errors::serialize::SeError::Unsupported |
1970 | | //! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged |
1971 | | //! [serde#1183]: https://github.com/serde-rs/serde/issues/1183 |
1972 | | //! [serde#1495]: https://github.com/serde-rs/serde/issues/1495 |
1973 | | //! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586 |
1974 | | //! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum |
1975 | | |
1976 | | // Macros should be defined before the modules that using them |
1977 | | // Also, macros should be imported before using them |
1978 | | use serde::serde_if_integer128; |
1979 | | |
1980 | | macro_rules! forward_to_simple_type { |
1981 | | ($deserialize:ident, $($mut:tt)?) => { |
1982 | | #[inline] |
1983 | 0 | fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError> |
1984 | 0 | where |
1985 | 0 | V: Visitor<'de>, |
1986 | | { |
1987 | 0 | SimpleTypeDeserializer::from_text(self.read_string()?).$deserialize(visitor) |
1988 | 0 | } Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_i8::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_u8::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_f32::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_f64::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_i16::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_i32::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_i64::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_str::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_u16::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_u32::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_u64::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_bool::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_char::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_i128::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_u128::<_> Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_string::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_i8::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_u8::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_f32::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_f64::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_i16::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_i32::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_i64::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_str::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_u16::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_u32::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_u64::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_bool::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_char::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_i128::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_u128::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_string::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_i8::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_u8::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_f32::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_f64::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_i16::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_i32::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_i64::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_str::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_u16::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_u32::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_u64::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_bool::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_char::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_i128::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_u128::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_string::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_i8::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_u8::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_f32::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_f64::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_i16::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_i32::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_i64::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_str::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_u16::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_u32::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_u64::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_bool::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_char::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_i128::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_u128::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_string::<_> |
1989 | | }; |
1990 | | } |
1991 | | |
1992 | | /// Implement deserialization methods for scalar types, such as numbers, strings, |
1993 | | /// byte arrays, booleans and identifiers. |
1994 | | macro_rules! deserialize_primitives { |
1995 | | ($($mut:tt)?) => { |
1996 | | forward_to_simple_type!(deserialize_i8, $($mut)?); |
1997 | | forward_to_simple_type!(deserialize_i16, $($mut)?); |
1998 | | forward_to_simple_type!(deserialize_i32, $($mut)?); |
1999 | | forward_to_simple_type!(deserialize_i64, $($mut)?); |
2000 | | |
2001 | | forward_to_simple_type!(deserialize_u8, $($mut)?); |
2002 | | forward_to_simple_type!(deserialize_u16, $($mut)?); |
2003 | | forward_to_simple_type!(deserialize_u32, $($mut)?); |
2004 | | forward_to_simple_type!(deserialize_u64, $($mut)?); |
2005 | | |
2006 | | serde_if_integer128! { |
2007 | | forward_to_simple_type!(deserialize_i128, $($mut)?); |
2008 | | forward_to_simple_type!(deserialize_u128, $($mut)?); |
2009 | | } |
2010 | | |
2011 | | forward_to_simple_type!(deserialize_f32, $($mut)?); |
2012 | | forward_to_simple_type!(deserialize_f64, $($mut)?); |
2013 | | |
2014 | | forward_to_simple_type!(deserialize_bool, $($mut)?); |
2015 | | forward_to_simple_type!(deserialize_char, $($mut)?); |
2016 | | |
2017 | | forward_to_simple_type!(deserialize_str, $($mut)?); |
2018 | | forward_to_simple_type!(deserialize_string, $($mut)?); |
2019 | | |
2020 | | /// Forwards deserialization to the [`deserialize_any`](#method.deserialize_any). |
2021 | | #[inline] |
2022 | 0 | fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, DeError> |
2023 | 0 | where |
2024 | 0 | V: Visitor<'de>, |
2025 | | { |
2026 | 0 | self.deserialize_any(visitor) |
2027 | 0 | } Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_bytes::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_bytes::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_bytes::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_bytes::<_> |
2028 | | |
2029 | | /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes). |
2030 | | #[inline] |
2031 | 0 | fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, DeError> |
2032 | 0 | where |
2033 | 0 | V: Visitor<'de>, |
2034 | | { |
2035 | 0 | self.deserialize_bytes(visitor) |
2036 | 0 | } Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_byte_buf::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_byte_buf::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_byte_buf::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_byte_buf::<_> |
2037 | | |
2038 | | /// Representation of the named units the same as [unnamed units](#method.deserialize_unit). |
2039 | | #[inline] |
2040 | 0 | fn deserialize_unit_struct<V>( |
2041 | 0 | self, |
2042 | 0 | _name: &'static str, |
2043 | 0 | visitor: V, |
2044 | 0 | ) -> Result<V::Value, DeError> |
2045 | 0 | where |
2046 | 0 | V: Visitor<'de>, |
2047 | | { |
2048 | 0 | self.deserialize_unit(visitor) |
2049 | 0 | } Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_unit_struct::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_unit_struct::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_unit_struct::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_unit_struct::<_> |
2050 | | |
2051 | | /// Representation of tuples the same as [sequences](#method.deserialize_seq). |
2052 | | #[inline] |
2053 | 0 | fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError> |
2054 | 0 | where |
2055 | 0 | V: Visitor<'de>, |
2056 | | { |
2057 | 0 | self.deserialize_seq(visitor) |
2058 | 0 | } Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_tuple::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_tuple::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_tuple::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_tuple::<_> |
2059 | | |
2060 | | /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple). |
2061 | | #[inline] |
2062 | 0 | fn deserialize_tuple_struct<V>( |
2063 | 0 | self, |
2064 | 0 | _name: &'static str, |
2065 | 0 | len: usize, |
2066 | 0 | visitor: V, |
2067 | 0 | ) -> Result<V::Value, DeError> |
2068 | 0 | where |
2069 | 0 | V: Visitor<'de>, |
2070 | | { |
2071 | 0 | self.deserialize_tuple(len, visitor) |
2072 | 0 | } Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_tuple_struct::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_tuple_struct::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_tuple_struct::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_tuple_struct::<_> |
2073 | | |
2074 | | /// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct) |
2075 | | /// with empty name and fields. |
2076 | | #[inline] |
2077 | 0 | fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError> |
2078 | 0 | where |
2079 | 0 | V: Visitor<'de>, |
2080 | | { |
2081 | 0 | self.deserialize_struct("", &[], visitor) |
2082 | 0 | } Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_map::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_map::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_map::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_map::<_> |
2083 | | |
2084 | | /// Identifiers represented as [strings](#method.deserialize_str). |
2085 | | #[inline] |
2086 | 0 | fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError> |
2087 | 0 | where |
2088 | 0 | V: Visitor<'de>, |
2089 | | { |
2090 | 0 | self.deserialize_str(visitor) |
2091 | 0 | } Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_identifier::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_identifier::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_identifier::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_identifier::<_> |
2092 | | |
2093 | | /// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit). |
2094 | | #[inline] |
2095 | 0 | fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError> |
2096 | 0 | where |
2097 | 0 | V: Visitor<'de>, |
2098 | | { |
2099 | 0 | self.deserialize_unit(visitor) |
2100 | 0 | } Unexecuted instantiation: <&mut quick_xml::de::Deserializer<_, _> as serde_core::de::Deserializer>::deserialize_ignored_any::<_> Unexecuted instantiation: <quick_xml::de::map::MapValueDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_ignored_any::<_> Unexecuted instantiation: <quick_xml::de::map::ElementDeserializer<_, _> as serde_core::de::Deserializer>::deserialize_ignored_any::<_> Unexecuted instantiation: <quick_xml::de::text::TextDeserializer as serde_core::de::Deserializer>::deserialize_ignored_any::<_> |
2101 | | }; |
2102 | | } |
2103 | | |
2104 | | mod attributes; |
2105 | | mod key; |
2106 | | mod map; |
2107 | | mod resolver; |
2108 | | mod simple_type; |
2109 | | mod text; |
2110 | | mod var; |
2111 | | |
2112 | | pub use self::attributes::AttributesDeserializer; |
2113 | | pub use self::resolver::{EntityResolver, PredefinedEntityResolver}; |
2114 | | pub use self::simple_type::SimpleTypeDeserializer; |
2115 | | pub use crate::errors::serialize::DeError; |
2116 | | |
2117 | | use crate::{ |
2118 | | de::map::ElementMapAccess, |
2119 | | encoding::Decoder, |
2120 | | errors::Error, |
2121 | | escape::{parse_number, EscapeError}, |
2122 | | events::{BytesCData, BytesEnd, BytesRef, BytesStart, BytesText, Event}, |
2123 | | name::QName, |
2124 | | reader::NsReader, |
2125 | | }; |
2126 | | use serde::de::{ |
2127 | | self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor, |
2128 | | }; |
2129 | | use std::borrow::Cow; |
2130 | | #[cfg(feature = "overlapped-lists")] |
2131 | | use std::collections::VecDeque; |
2132 | | use std::io::BufRead; |
2133 | | use std::mem::replace; |
2134 | | #[cfg(feature = "overlapped-lists")] |
2135 | | use std::num::NonZeroUsize; |
2136 | | use std::ops::{Deref, Range}; |
2137 | | |
2138 | | /// Data represented by a text node or a CDATA node. XML markup is not expected |
2139 | | pub(crate) const TEXT_KEY: &str = "$text"; |
2140 | | /// Data represented by any XML markup inside |
2141 | | pub(crate) const VALUE_KEY: &str = "$value"; |
2142 | | |
2143 | | /// A function to check whether the character is a whitespace (blank, new line, carriage return or tab). |
2144 | | #[inline] |
2145 | 0 | const fn is_non_whitespace(ch: char) -> bool { |
2146 | 0 | !matches!(ch, ' ' | '\r' | '\n' | '\t') |
2147 | 0 | } |
2148 | | |
2149 | | /// Decoded and concatenated content of consequent [`Text`] and [`CData`] |
2150 | | /// events. _Consequent_ means that events should follow each other or be |
2151 | | /// delimited only by (any count of) [`Comment`] or [`PI`] events. |
2152 | | /// |
2153 | | /// Internally text is stored in `Cow<str>`. Cloning of text is cheap while it |
2154 | | /// is borrowed and makes copies of data when it is owned. |
2155 | | /// |
2156 | | /// [`Text`]: Event::Text |
2157 | | /// [`CData`]: Event::CData |
2158 | | /// [`Comment`]: Event::Comment |
2159 | | /// [`PI`]: Event::PI |
2160 | | #[derive(Clone, Debug, PartialEq, Eq)] |
2161 | | pub struct Text<'a> { |
2162 | | /// Untrimmed text after concatenating content of all |
2163 | | /// [`Text`] and [`CData`] events |
2164 | | /// |
2165 | | /// [`Text`]: Event::Text |
2166 | | /// [`CData`]: Event::CData |
2167 | | text: Cow<'a, str>, |
2168 | | /// A range into `text` which contains data after trimming |
2169 | | content: Range<usize>, |
2170 | | } |
2171 | | |
2172 | | impl<'a> Text<'a> { |
2173 | 0 | fn new(text: Cow<'a, str>) -> Self { |
2174 | 0 | let start = text.find(is_non_whitespace).unwrap_or(0); |
2175 | 0 | let end = text.rfind(is_non_whitespace).map_or(0, |i| i + 1); |
2176 | | |
2177 | 0 | let content = if start >= end { 0..0 } else { start..end }; |
2178 | | |
2179 | 0 | Self { text, content } |
2180 | 0 | } |
2181 | | |
2182 | | /// Returns text without leading and trailing whitespaces as [defined] by XML specification. |
2183 | | /// |
2184 | | /// If you want to only check if text contains only whitespaces, use [`is_blank`](Self::is_blank), |
2185 | | /// which will not allocate. |
2186 | | /// |
2187 | | /// # Example |
2188 | | /// |
2189 | | /// ``` |
2190 | | /// # use quick_xml::de::Text; |
2191 | | /// # use pretty_assertions::assert_eq; |
2192 | | /// # |
2193 | | /// let text = Text::from(""); |
2194 | | /// assert_eq!(text.trimmed(), ""); |
2195 | | /// |
2196 | | /// let text = Text::from(" \r\n\t "); |
2197 | | /// assert_eq!(text.trimmed(), ""); |
2198 | | /// |
2199 | | /// let text = Text::from(" some useful text "); |
2200 | | /// assert_eq!(text.trimmed(), "some useful text"); |
2201 | | /// ``` |
2202 | | /// |
2203 | | /// [defined]: https://www.w3.org/TR/xml11/#NT-S |
2204 | 0 | pub fn trimmed(&self) -> Cow<'a, str> { |
2205 | 0 | match self.text { |
2206 | 0 | Cow::Borrowed(text) => Cow::Borrowed(&text[self.content.clone()]), |
2207 | 0 | Cow::Owned(ref text) => Cow::Owned(text[self.content.clone()].to_string()), |
2208 | | } |
2209 | 0 | } |
2210 | | |
2211 | | /// Returns `true` if text is empty or contains only whitespaces as [defined] by XML specification. |
2212 | | /// |
2213 | | /// # Example |
2214 | | /// |
2215 | | /// ``` |
2216 | | /// # use quick_xml::de::Text; |
2217 | | /// # use pretty_assertions::assert_eq; |
2218 | | /// # |
2219 | | /// let text = Text::from(""); |
2220 | | /// assert_eq!(text.is_blank(), true); |
2221 | | /// |
2222 | | /// let text = Text::from(" \r\n\t "); |
2223 | | /// assert_eq!(text.is_blank(), true); |
2224 | | /// |
2225 | | /// let text = Text::from(" some useful text "); |
2226 | | /// assert_eq!(text.is_blank(), false); |
2227 | | /// ``` |
2228 | | /// |
2229 | | /// [defined]: https://www.w3.org/TR/xml11/#NT-S |
2230 | 0 | pub fn is_blank(&self) -> bool { |
2231 | 0 | self.content.is_empty() |
2232 | 0 | } |
2233 | | } |
2234 | | |
2235 | | impl<'a> Deref for Text<'a> { |
2236 | | type Target = str; |
2237 | | |
2238 | | #[inline] |
2239 | 0 | fn deref(&self) -> &Self::Target { |
2240 | 0 | self.text.deref() |
2241 | 0 | } |
2242 | | } |
2243 | | |
2244 | | impl<'a> From<&'a str> for Text<'a> { |
2245 | | #[inline] |
2246 | 0 | fn from(text: &'a str) -> Self { |
2247 | 0 | Self::new(Cow::Borrowed(text)) |
2248 | 0 | } |
2249 | | } |
2250 | | |
2251 | | impl<'a> From<String> for Text<'a> { |
2252 | | #[inline] |
2253 | 0 | fn from(text: String) -> Self { |
2254 | 0 | Self::new(Cow::Owned(text)) |
2255 | 0 | } |
2256 | | } |
2257 | | |
2258 | | impl<'a> From<Cow<'a, str>> for Text<'a> { |
2259 | | #[inline] |
2260 | 0 | fn from(text: Cow<'a, str>) -> Self { |
2261 | 0 | Self::new(text) |
2262 | 0 | } |
2263 | | } |
2264 | | |
2265 | | //////////////////////////////////////////////////////////////////////////////////////////////////// |
2266 | | |
2267 | | /// Simplified event which contains only these variants that used by deserializer |
2268 | | #[derive(Clone, Debug, PartialEq, Eq)] |
2269 | | pub enum DeEvent<'a> { |
2270 | | /// Start tag (with attributes) `<tag attr="value">`. |
2271 | | Start(BytesStart<'a>), |
2272 | | /// End tag `</tag>`. |
2273 | | End(BytesEnd<'a>), |
2274 | | /// Decoded and concatenated content of consequent [`Text`] and [`CData`] |
2275 | | /// events. _Consequent_ means that events should follow each other or be |
2276 | | /// delimited only by (any count of) [`Comment`] or [`PI`] events. |
2277 | | /// |
2278 | | /// [`Text`]: Event::Text |
2279 | | /// [`CData`]: Event::CData |
2280 | | /// [`Comment`]: Event::Comment |
2281 | | /// [`PI`]: Event::PI |
2282 | | Text(Text<'a>), |
2283 | | /// End of XML document. |
2284 | | Eof, |
2285 | | } |
2286 | | |
2287 | | //////////////////////////////////////////////////////////////////////////////////////////////////// |
2288 | | |
2289 | | /// Simplified event which contains only these variants that used by deserializer, |
2290 | | /// but [`Text`] events not yet fully processed. |
2291 | | /// |
2292 | | /// [`Text`] events should be trimmed if they does not surrounded by the other |
2293 | | /// [`Text`] or [`CData`] events. This event contains intermediate state of [`Text`] |
2294 | | /// event, where they are trimmed from the start, but not from the end. To trim |
2295 | | /// end spaces we should lookahead by one deserializer event (i. e. skip all |
2296 | | /// comments and processing instructions). |
2297 | | /// |
2298 | | /// [`Text`]: Event::Text |
2299 | | /// [`CData`]: Event::CData |
2300 | | #[derive(Clone, Debug, PartialEq, Eq)] |
2301 | | pub enum PayloadEvent<'a> { |
2302 | | /// Start tag (with attributes) `<tag attr="value">`. |
2303 | | Start(BytesStart<'a>), |
2304 | | /// End tag `</tag>`. |
2305 | | End(BytesEnd<'a>), |
2306 | | /// Escaped character data between tags. |
2307 | | Text(BytesText<'a>), |
2308 | | /// Unescaped character data stored in `<![CDATA[...]]>`. |
2309 | | CData(BytesCData<'a>), |
2310 | | /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`. |
2311 | | DocType(BytesText<'a>), |
2312 | | /// Reference `&ref;` in the textual data. |
2313 | | GeneralRef(BytesRef<'a>), |
2314 | | /// End of XML document. |
2315 | | Eof, |
2316 | | } |
2317 | | |
2318 | | impl<'a> PayloadEvent<'a> { |
2319 | | /// Ensures that all data is owned to extend the object's lifetime if necessary. |
2320 | | #[inline] |
2321 | 0 | fn into_owned(self) -> PayloadEvent<'static> { |
2322 | 0 | match self { |
2323 | 0 | PayloadEvent::Start(e) => PayloadEvent::Start(e.into_owned()), |
2324 | 0 | PayloadEvent::End(e) => PayloadEvent::End(e.into_owned()), |
2325 | 0 | PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()), |
2326 | 0 | PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()), |
2327 | 0 | PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()), |
2328 | 0 | PayloadEvent::GeneralRef(e) => PayloadEvent::GeneralRef(e.into_owned()), |
2329 | 0 | PayloadEvent::Eof => PayloadEvent::Eof, |
2330 | | } |
2331 | 0 | } |
2332 | | } |
2333 | | |
2334 | | /// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s. |
2335 | | /// [`PayloadEvent::Text`] events, that followed by any event except |
2336 | | /// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end. |
2337 | | struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolver> { |
2338 | | /// A source of low-level XML events |
2339 | | reader: R, |
2340 | | /// Intermediate event, that could be returned by the next call to `next()`. |
2341 | | /// If that is the `Text` event then leading spaces already trimmed, but |
2342 | | /// trailing spaces is not. Before the event will be returned, trimming of |
2343 | | /// the spaces could be necessary |
2344 | | lookahead: Result<PayloadEvent<'i>, DeError>, |
2345 | | |
2346 | | /// Used to resolve unknown entities that would otherwise cause the parser |
2347 | | /// to return an [`EscapeError::UnrecognizedEntity`] error. |
2348 | | /// |
2349 | | /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity |
2350 | | entity_resolver: E, |
2351 | | } |
2352 | | |
2353 | | impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> { |
2354 | 0 | fn new(mut reader: R, entity_resolver: E) -> Self { |
2355 | | // Lookahead by one event immediately, so we do not need to check in the |
2356 | | // loop if we need lookahead or not |
2357 | 0 | let lookahead = reader.next(); |
2358 | | |
2359 | 0 | Self { |
2360 | 0 | reader, |
2361 | 0 | lookahead, |
2362 | 0 | entity_resolver, |
2363 | 0 | } |
2364 | 0 | } |
2365 | | |
2366 | | /// Returns `true` if all events was consumed |
2367 | 0 | const fn is_empty(&self) -> bool { |
2368 | 0 | matches!(self.lookahead, Ok(PayloadEvent::Eof)) |
2369 | 0 | } |
2370 | | |
2371 | | /// Read next event and put it in lookahead, return the current lookahead |
2372 | | #[inline(always)] |
2373 | 0 | fn next_impl(&mut self) -> Result<PayloadEvent<'i>, DeError> { |
2374 | 0 | replace(&mut self.lookahead, self.reader.next()) |
2375 | 0 | } |
2376 | | |
2377 | | /// Returns `true` when next event is not a text event in any form. |
2378 | | #[inline(always)] |
2379 | 0 | const fn current_event_is_last_text(&self) -> bool { |
2380 | | // If next event is a text or CDATA, we should not trim trailing spaces |
2381 | 0 | !matches!( |
2382 | 0 | self.lookahead, |
2383 | | Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_) | PayloadEvent::GeneralRef(_)) |
2384 | | ) |
2385 | 0 | } |
2386 | | |
2387 | | /// Read all consequent [`Text`] and [`CData`] events until non-text event |
2388 | | /// occurs. Content of all events would be appended to `result` and returned |
2389 | | /// as [`DeEvent::Text`]. |
2390 | | /// |
2391 | | /// [`Text`]: PayloadEvent::Text |
2392 | | /// [`CData`]: PayloadEvent::CData |
2393 | 0 | fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> { |
2394 | | loop { |
2395 | 0 | if self.current_event_is_last_text() { |
2396 | 0 | break; |
2397 | 0 | } |
2398 | | |
2399 | 0 | match self.next_impl()? { |
2400 | 0 | PayloadEvent::Text(e) => result.to_mut().push_str(&e.xml_content()?), |
2401 | 0 | PayloadEvent::CData(e) => result.to_mut().push_str(&e.xml_content()?), |
2402 | 0 | PayloadEvent::GeneralRef(e) => self.resolve_reference(result.to_mut(), e)?, |
2403 | | |
2404 | | // SAFETY: current_event_is_last_text checks that event is Text, CData or GeneralRef |
2405 | 0 | _ => unreachable!("Only `Text`, `CData` or `GeneralRef` events can come here"), |
2406 | | } |
2407 | | } |
2408 | 0 | Ok(DeEvent::Text(Text::new(result))) |
2409 | 0 | } |
2410 | | |
2411 | | /// Return an input-borrowing event. |
2412 | 0 | fn next(&mut self) -> Result<DeEvent<'i>, DeError> { |
2413 | | loop { |
2414 | 0 | return match self.next_impl()? { |
2415 | 0 | PayloadEvent::Start(e) => Ok(DeEvent::Start(e)), |
2416 | 0 | PayloadEvent::End(e) => Ok(DeEvent::End(e)), |
2417 | 0 | PayloadEvent::Text(e) => self.drain_text(e.xml_content()?), |
2418 | 0 | PayloadEvent::CData(e) => self.drain_text(e.xml_content()?), |
2419 | 0 | PayloadEvent::DocType(e) => { |
2420 | 0 | self.entity_resolver |
2421 | 0 | .capture(e) |
2422 | 0 | .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?; |
2423 | 0 | continue; |
2424 | | } |
2425 | 0 | PayloadEvent::GeneralRef(e) => { |
2426 | 0 | let mut text = String::new(); |
2427 | 0 | self.resolve_reference(&mut text, e)?; |
2428 | 0 | self.drain_text(text.into()) |
2429 | | } |
2430 | 0 | PayloadEvent::Eof => Ok(DeEvent::Eof), |
2431 | | }; |
2432 | | } |
2433 | 0 | } |
2434 | | |
2435 | 0 | fn resolve_reference(&mut self, result: &mut String, event: BytesRef) -> Result<(), DeError> { |
2436 | 0 | let len = event.len(); |
2437 | 0 | let reference = self.decoder().decode(&event)?; |
2438 | | |
2439 | 0 | if let Some(num) = reference.strip_prefix('#') { |
2440 | 0 | let codepoint = parse_number(num).map_err(EscapeError::InvalidCharRef)?; |
2441 | 0 | result.push_str(codepoint.encode_utf8(&mut [0u8; 4])); |
2442 | 0 | return Ok(()); |
2443 | 0 | } |
2444 | 0 | if let Some(value) = self.entity_resolver.resolve(reference.as_ref()) { |
2445 | 0 | result.push_str(value); |
2446 | 0 | return Ok(()); |
2447 | 0 | } |
2448 | 0 | Err(EscapeError::UnrecognizedEntity(0..len, reference.to_string()).into()) |
2449 | 0 | } |
2450 | | |
2451 | | #[inline] |
2452 | 0 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { |
2453 | 0 | match self.lookahead { |
2454 | | // We pre-read event with the same name that is required to be skipped. |
2455 | | // First call of `read_to_end` will end out pre-read event, the second |
2456 | | // will consume other events |
2457 | 0 | Ok(PayloadEvent::Start(ref e)) if e.name() == name => { |
2458 | 0 | let result1 = self.reader.read_to_end(name); |
2459 | 0 | let result2 = self.reader.read_to_end(name); |
2460 | | |
2461 | | // In case of error `next_impl` returns `Eof` |
2462 | 0 | let _ = self.next_impl(); |
2463 | 0 | result1?; |
2464 | 0 | result2?; |
2465 | | } |
2466 | | // We pre-read event with the same name that is required to be skipped. |
2467 | | // Because this is end event, we already consume the whole tree, so |
2468 | | // nothing to do, just update lookahead |
2469 | 0 | Ok(PayloadEvent::End(ref e)) if e.name() == name => { |
2470 | 0 | let _ = self.next_impl(); |
2471 | 0 | } |
2472 | | Ok(_) => { |
2473 | 0 | let result = self.reader.read_to_end(name); |
2474 | | |
2475 | | // In case of error `next_impl` returns `Eof` |
2476 | 0 | let _ = self.next_impl(); |
2477 | 0 | result?; |
2478 | | } |
2479 | | // Read next lookahead event, unpack error from the current lookahead |
2480 | | Err(_) => { |
2481 | 0 | self.next_impl()?; |
2482 | | } |
2483 | | } |
2484 | 0 | Ok(()) |
2485 | 0 | } |
2486 | | |
2487 | | #[inline] |
2488 | 0 | fn decoder(&self) -> Decoder { |
2489 | 0 | self.reader.decoder() |
2490 | 0 | } |
2491 | | } |
2492 | | |
2493 | | //////////////////////////////////////////////////////////////////////////////////////////////////// |
2494 | | |
2495 | | /// Deserialize an instance of type `T` from a string of XML text. |
2496 | 0 | pub fn from_str<'de, T>(s: &'de str) -> Result<T, DeError> |
2497 | 0 | where |
2498 | 0 | T: Deserialize<'de>, |
2499 | | { |
2500 | 0 | let mut de = Deserializer::from_str(s); |
2501 | 0 | T::deserialize(&mut de) |
2502 | 0 | } |
2503 | | |
2504 | | /// Deserialize from a reader. This method will do internal copies of data |
2505 | | /// read from `reader`. If you want have a `&str` input and want to borrow |
2506 | | /// as much as possible, use [`from_str`]. |
2507 | 0 | pub fn from_reader<R, T>(reader: R) -> Result<T, DeError> |
2508 | 0 | where |
2509 | 0 | R: BufRead, |
2510 | 0 | T: DeserializeOwned, |
2511 | | { |
2512 | 0 | let mut de = Deserializer::from_reader(reader); |
2513 | 0 | T::deserialize(&mut de) |
2514 | 0 | } |
2515 | | |
2516 | | //////////////////////////////////////////////////////////////////////////////////////////////////// |
2517 | | |
2518 | | /// A structure that deserializes XML into Rust values. |
2519 | | pub struct Deserializer<'de, R, E: EntityResolver = PredefinedEntityResolver> |
2520 | | where |
2521 | | R: XmlRead<'de>, |
2522 | | { |
2523 | | /// An XML reader that streams events into this deserializer |
2524 | | reader: XmlReader<'de, R, E>, |
2525 | | |
2526 | | /// When deserializing sequences sometimes we have to skip unwanted events. |
2527 | | /// That events should be stored and then replayed. This is a replay buffer, |
2528 | | /// that streams events while not empty. When it exhausted, events will |
2529 | | /// requested from [`Self::reader`]. |
2530 | | #[cfg(feature = "overlapped-lists")] |
2531 | | read: VecDeque<DeEvent<'de>>, |
2532 | | /// When deserializing sequences sometimes we have to skip events, because XML |
2533 | | /// is tolerant to elements order and even if in the XSD order is strictly |
2534 | | /// specified (using `xs:sequence`) most of XML parsers allows order violations. |
2535 | | /// That means, that elements, forming a sequence, could be overlapped with |
2536 | | /// other elements, do not related to that sequence. |
2537 | | /// |
2538 | | /// In order to support this, deserializer will scan events and skip unwanted |
2539 | | /// events, store them here. After call [`Self::start_replay()`] all events |
2540 | | /// moved from this to [`Self::read`]. |
2541 | | #[cfg(feature = "overlapped-lists")] |
2542 | | write: VecDeque<DeEvent<'de>>, |
2543 | | /// Maximum number of events that can be skipped when processing sequences |
2544 | | /// that occur out-of-order. This field is used to prevent potential |
2545 | | /// denial-of-service (DoS) attacks which could cause infinite memory |
2546 | | /// consumption when parsing a very large amount of XML into a sequence field. |
2547 | | #[cfg(feature = "overlapped-lists")] |
2548 | | limit: Option<NonZeroUsize>, |
2549 | | |
2550 | | #[cfg(not(feature = "overlapped-lists"))] |
2551 | | peek: Option<DeEvent<'de>>, |
2552 | | |
2553 | | /// Buffer to store attribute name as a field name exposed to serde consumers |
2554 | | key_buf: String, |
2555 | | } |
2556 | | |
2557 | | impl<'de, R, E> Deserializer<'de, R, E> |
2558 | | where |
2559 | | R: XmlRead<'de>, |
2560 | | E: EntityResolver, |
2561 | | { |
2562 | | /// Create an XML deserializer from one of the possible quick_xml input sources. |
2563 | | /// |
2564 | | /// Typically it is more convenient to use one of these methods instead: |
2565 | | /// |
2566 | | /// - [`Deserializer::from_str`] |
2567 | | /// - [`Deserializer::from_reader`] |
2568 | 0 | fn new(reader: R, entity_resolver: E) -> Self { |
2569 | 0 | Self { |
2570 | 0 | reader: XmlReader::new(reader, entity_resolver), |
2571 | 0 |
|
2572 | 0 | #[cfg(feature = "overlapped-lists")] |
2573 | 0 | read: VecDeque::new(), |
2574 | 0 | #[cfg(feature = "overlapped-lists")] |
2575 | 0 | write: VecDeque::new(), |
2576 | 0 | #[cfg(feature = "overlapped-lists")] |
2577 | 0 | limit: None, |
2578 | 0 |
|
2579 | 0 | #[cfg(not(feature = "overlapped-lists"))] |
2580 | 0 | peek: None, |
2581 | 0 |
|
2582 | 0 | key_buf: String::new(), |
2583 | 0 | } |
2584 | 0 | } |
2585 | | |
2586 | | /// Returns `true` if all events was consumed. |
2587 | 0 | pub fn is_empty(&self) -> bool { |
2588 | | #[cfg(feature = "overlapped-lists")] |
2589 | 0 | let event = self.read.front(); |
2590 | | |
2591 | | #[cfg(not(feature = "overlapped-lists"))] |
2592 | | let event = self.peek.as_ref(); |
2593 | | |
2594 | 0 | match event { |
2595 | 0 | None | Some(DeEvent::Eof) => self.reader.is_empty(), |
2596 | 0 | _ => false, |
2597 | | } |
2598 | 0 | } |
2599 | | |
2600 | | /// Returns the underlying XML reader. |
2601 | | /// |
2602 | | /// ``` |
2603 | | /// # use pretty_assertions::assert_eq; |
2604 | | /// use serde::Deserialize; |
2605 | | /// use quick_xml::de::Deserializer; |
2606 | | /// use quick_xml::NsReader; |
2607 | | /// |
2608 | | /// #[derive(Deserialize)] |
2609 | | /// struct SomeStruct { |
2610 | | /// field1: String, |
2611 | | /// field2: String, |
2612 | | /// } |
2613 | | /// |
2614 | | /// // Try to deserialize from broken XML |
2615 | | /// let mut de = Deserializer::from_str( |
2616 | | /// "<SomeStruct><field1><field2></SomeStruct>" |
2617 | | /// // 0 ^= 28 ^= 41 |
2618 | | /// ); |
2619 | | /// |
2620 | | /// let err = SomeStruct::deserialize(&mut de); |
2621 | | /// assert!(err.is_err()); |
2622 | | /// |
2623 | | /// let reader: &NsReader<_> = de.get_ref().get_ref(); |
2624 | | /// |
2625 | | /// assert_eq!(reader.error_position(), 28); |
2626 | | /// assert_eq!(reader.buffer_position(), 41); |
2627 | | /// ``` |
2628 | 0 | pub const fn get_ref(&self) -> &R { |
2629 | 0 | &self.reader.reader |
2630 | 0 | } |
2631 | | |
2632 | | /// Set the maximum number of events that could be skipped during deserialization |
2633 | | /// of sequences. |
2634 | | /// |
2635 | | /// If `<element>` contains more than specified nested elements, `$text` or |
2636 | | /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during |
2637 | | /// deserialization of sequence field (any type that uses [`deserialize_seq`] |
2638 | | /// for the deserialization, for example, `Vec<T>`). |
2639 | | /// |
2640 | | /// This method can be used to prevent a [DoS] attack and infinite memory |
2641 | | /// consumption when parsing a very large XML to a sequence field. |
2642 | | /// |
2643 | | /// It is strongly recommended to set limit to some value when you parse data |
2644 | | /// from untrusted sources. You should choose a value that your typical XMLs |
2645 | | /// can have _between_ different elements that corresponds to the same sequence. |
2646 | | /// |
2647 | | /// # Examples |
2648 | | /// |
2649 | | /// Let's imagine, that we deserialize such structure: |
2650 | | /// ``` |
2651 | | /// struct List { |
2652 | | /// item: Vec<()>, |
2653 | | /// } |
2654 | | /// ``` |
2655 | | /// |
2656 | | /// The XML that we try to parse look like this: |
2657 | | /// ```xml |
2658 | | /// <any-name> |
2659 | | /// <item/> |
2660 | | /// <!-- Bufferization starts at this point --> |
2661 | | /// <another-item> |
2662 | | /// <some-element>with text</some-element> |
2663 | | /// <yet-another-element/> |
2664 | | /// </another-item> |
2665 | | /// <!-- Buffer will be emptied at this point; 7 events were buffered --> |
2666 | | /// <item/> |
2667 | | /// <!-- There is nothing to buffer, because elements follows each other --> |
2668 | | /// <item/> |
2669 | | /// </any-name> |
2670 | | /// ``` |
2671 | | /// |
2672 | | /// There, when we deserialize the `item` field, we need to buffer 7 events, |
2673 | | /// before we can deserialize the second `<item/>`: |
2674 | | /// |
2675 | | /// - `<another-item>` |
2676 | | /// - `<some-element>` |
2677 | | /// - `$text(with text)` |
2678 | | /// - `</some-element>` |
2679 | | /// - `<yet-another-element/>` (virtual start event) |
2680 | | /// - `<yet-another-element/>` (virtual end event) |
2681 | | /// - `</another-item>` |
2682 | | /// |
2683 | | /// Note, that `<yet-another-element/>` internally represented as 2 events: |
2684 | | /// one for the start tag and one for the end tag. In the future this can be |
2685 | | /// eliminated, but for now we use [auto-expanding feature] of a reader, |
2686 | | /// because this simplifies deserializer code. |
2687 | | /// |
2688 | | /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq |
2689 | | /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack |
2690 | | /// [auto-expanding feature]: crate::reader::Config::expand_empty_elements |
2691 | | #[cfg(feature = "overlapped-lists")] |
2692 | 0 | pub fn event_buffer_size(&mut self, limit: Option<NonZeroUsize>) -> &mut Self { |
2693 | 0 | self.limit = limit; |
2694 | 0 | self |
2695 | 0 | } |
2696 | | |
2697 | | #[cfg(feature = "overlapped-lists")] |
2698 | 0 | fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> { |
2699 | 0 | if self.read.is_empty() { |
2700 | 0 | self.read.push_front(self.reader.next()?); |
2701 | 0 | } |
2702 | 0 | if let Some(event) = self.read.front() { |
2703 | 0 | return Ok(event); |
2704 | 0 | } |
2705 | | // SAFETY: `self.read` was filled in the code above. |
2706 | | // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }` |
2707 | | // if unsafe code will be allowed |
2708 | 0 | unreachable!() |
2709 | 0 | } |
2710 | | #[cfg(not(feature = "overlapped-lists"))] |
2711 | | fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> { |
2712 | | match &mut self.peek { |
2713 | | Some(event) => Ok(event), |
2714 | | empty_peek @ None => Ok(empty_peek.insert(self.reader.next()?)), |
2715 | | } |
2716 | | } |
2717 | | |
2718 | | #[inline] |
2719 | 0 | fn last_peeked(&self) -> &DeEvent<'de> { |
2720 | | #[cfg(feature = "overlapped-lists")] |
2721 | | { |
2722 | 0 | self.read |
2723 | 0 | .front() |
2724 | 0 | .expect("`Deserializer::peek()` should be called") |
2725 | | } |
2726 | | #[cfg(not(feature = "overlapped-lists"))] |
2727 | | { |
2728 | | self.peek |
2729 | | .as_ref() |
2730 | | .expect("`Deserializer::peek()` should be called") |
2731 | | } |
2732 | 0 | } |
2733 | | |
2734 | 0 | fn next(&mut self) -> Result<DeEvent<'de>, DeError> { |
2735 | | // Replay skipped or peeked events |
2736 | | #[cfg(feature = "overlapped-lists")] |
2737 | 0 | if let Some(event) = self.read.pop_front() { |
2738 | 0 | return Ok(event); |
2739 | 0 | } |
2740 | | #[cfg(not(feature = "overlapped-lists"))] |
2741 | | if let Some(e) = self.peek.take() { |
2742 | | return Ok(e); |
2743 | | } |
2744 | 0 | self.reader.next() |
2745 | 0 | } |
2746 | | |
2747 | 0 | fn skip_whitespaces(&mut self) -> Result<(), DeError> { |
2748 | | loop { |
2749 | 0 | match self.peek()? { |
2750 | 0 | DeEvent::Text(e) if e.is_blank() => { |
2751 | 0 | self.next()?; |
2752 | | } |
2753 | 0 | _ => break, |
2754 | | } |
2755 | | } |
2756 | 0 | Ok(()) |
2757 | 0 | } |
2758 | | |
2759 | | /// Returns the mark after which all events, skipped by [`Self::skip()`] call, |
2760 | | /// should be replayed after calling [`Self::start_replay()`]. |
2761 | | #[cfg(feature = "overlapped-lists")] |
2762 | | #[inline] |
2763 | | #[must_use = "returned checkpoint should be used in `start_replay`"] |
2764 | 0 | fn skip_checkpoint(&self) -> usize { |
2765 | 0 | self.write.len() |
2766 | 0 | } |
2767 | | |
2768 | | /// Extracts XML tree of events from and stores them in the skipped events |
2769 | | /// buffer from which they can be retrieved later. You MUST call |
2770 | | /// [`Self::start_replay()`] after calling this to give access to the skipped |
2771 | | /// events and release internal buffers. |
2772 | | #[cfg(feature = "overlapped-lists")] |
2773 | 0 | fn skip(&mut self) -> Result<(), DeError> { |
2774 | 0 | let event = self.next()?; |
2775 | 0 | self.skip_event(event)?; |
2776 | | // Skip all subtree, if we skip a start event |
2777 | 0 | if let Some(DeEvent::Start(e)) = self.write.back() { |
2778 | 0 | let end = e.name().as_ref().to_owned(); |
2779 | 0 | let mut depth = 0; |
2780 | | loop { |
2781 | 0 | let event = self.next()?; |
2782 | 0 | match event { |
2783 | 0 | DeEvent::Start(ref e) if e.name().as_ref() == end => { |
2784 | 0 | self.skip_event(event)?; |
2785 | 0 | depth += 1; |
2786 | | } |
2787 | 0 | DeEvent::End(ref e) if e.name().as_ref() == end => { |
2788 | 0 | self.skip_event(event)?; |
2789 | 0 | if depth == 0 { |
2790 | 0 | break; |
2791 | 0 | } |
2792 | 0 | depth -= 1; |
2793 | | } |
2794 | | DeEvent::Eof => { |
2795 | 0 | self.skip_event(event)?; |
2796 | 0 | break; |
2797 | | } |
2798 | 0 | _ => self.skip_event(event)?, |
2799 | | } |
2800 | | } |
2801 | 0 | } |
2802 | 0 | Ok(()) |
2803 | 0 | } |
2804 | | |
2805 | | #[cfg(feature = "overlapped-lists")] |
2806 | | #[inline] |
2807 | 0 | fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> { |
2808 | 0 | if let Some(max) = self.limit { |
2809 | 0 | if self.write.len() >= max.get() { |
2810 | 0 | return Err(DeError::TooManyEvents(max)); |
2811 | 0 | } |
2812 | 0 | } |
2813 | 0 | self.write.push_back(event); |
2814 | 0 | Ok(()) |
2815 | 0 | } |
2816 | | |
2817 | | /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`] |
2818 | | /// skip buffer to [`Self::read`] buffer. |
2819 | | /// |
2820 | | /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts |
2821 | | /// return events that was skipped previously by calling [`Self::skip()`], |
2822 | | /// and only when all that events will be consumed, the deserializer starts |
2823 | | /// to drain events from underlying reader. |
2824 | | /// |
2825 | | /// This method MUST be called if any number of [`Self::skip()`] was called |
2826 | | /// after [`Self::new()`] or `start_replay()` or you'll lost events. |
2827 | | #[cfg(feature = "overlapped-lists")] |
2828 | 0 | fn start_replay(&mut self, checkpoint: usize) { |
2829 | 0 | if checkpoint == 0 { |
2830 | 0 | self.write.append(&mut self.read); |
2831 | 0 | std::mem::swap(&mut self.read, &mut self.write); |
2832 | 0 | } else { |
2833 | 0 | let mut read = self.write.split_off(checkpoint); |
2834 | 0 | read.append(&mut self.read); |
2835 | 0 | self.read = read; |
2836 | 0 | } |
2837 | 0 | } |
2838 | | |
2839 | | #[inline] |
2840 | 0 | fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> { |
2841 | 0 | self.read_string_impl(true) |
2842 | 0 | } |
2843 | | |
2844 | | /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_) |
2845 | | /// events, merge them into one string. If there are no such events, returns |
2846 | | /// an empty string. |
2847 | | /// |
2848 | | /// If `allow_start` is `false`, then only text events are consumed, for other |
2849 | | /// events an error is returned (see table below). |
2850 | | /// |
2851 | | /// If `allow_start` is `true`, then two or three events are expected: |
2852 | | /// - [`DeEvent::Start`]; |
2853 | | /// - _(optional)_ [`DeEvent::Text`] which content is returned; |
2854 | | /// - [`DeEvent::End`]. If text event was missed, an empty string is returned. |
2855 | | /// |
2856 | | /// Corresponding events are consumed. |
2857 | | /// |
2858 | | /// # Handling events |
2859 | | /// |
2860 | | /// The table below shows how events is handled by this method: |
2861 | | /// |
2862 | | /// |Event |XML |Handling |
2863 | | /// |------------------|---------------------------|---------------------------------------- |
2864 | | /// |[`DeEvent::Start`]|`<tag>...</tag>` |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart) |
2865 | | /// |[`DeEvent::End`] |`</any-tag>` |This is impossible situation, the method will panic if it happens |
2866 | | /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged |
2867 | | /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof) |
2868 | | /// |
2869 | | /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`: |
2870 | | /// |
2871 | | /// |Event |XML |Handling |
2872 | | /// |------------------|---------------------------|---------------------------------------------------------------------------------- |
2873 | | /// |[`DeEvent::Start`]|`<any-tag>...</any-tag>` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart) |
2874 | | /// |[`DeEvent::End`] |`</tag>` |Returns an empty slice. The reader guarantee that tag will match the open one |
2875 | | /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, expects the `</tag>` after that |
2876 | | /// |[`DeEvent::Eof`] | |Emits [`InvalidXml(IllFormed(MissingEndTag))`](DeError::InvalidXml) |
2877 | | /// |
2878 | | /// [`Text`]: Event::Text |
2879 | | /// [`CData`]: Event::CData |
2880 | 0 | fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> { |
2881 | 0 | match self.next()? { |
2882 | | // Reached by doc tests only: this file, lines 979 and 996 |
2883 | 0 | DeEvent::Text(e) => Ok(e.text), |
2884 | | // allow one nested level |
2885 | | // Reached by trivial::{...}::{field, field_nested, field_tag_after, field_tag_before, nested, tag_after, tag_before, wrapped} |
2886 | 0 | DeEvent::Start(e) if allow_start => self.read_text(e.name()), |
2887 | | // TODO: not reached by any tests |
2888 | 0 | DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())), |
2889 | | // SAFETY: The reader is guaranteed that we don't have unmatched tags |
2890 | | // If we here, then our deserializer has a bug |
2891 | 0 | DeEvent::End(e) => unreachable!("{:?}", e), |
2892 | | // Reached by trivial::{empty_doc, only_comment} |
2893 | 0 | DeEvent::Eof => Err(DeError::UnexpectedEof), |
2894 | | } |
2895 | 0 | } |
2896 | | /// Consumes one [`DeEvent::Text`] event and ensures that it is followed by the |
2897 | | /// [`DeEvent::End`] event. |
2898 | | /// |
2899 | | /// # Parameters |
2900 | | /// - `name`: name of a tag opened before reading text. The corresponding end tag |
2901 | | /// should present in input just after the text |
2902 | 0 | fn read_text(&mut self, name: QName) -> Result<Cow<'de, str>, DeError> { |
2903 | 0 | match self.next()? { |
2904 | 0 | DeEvent::Text(e) => match self.next()? { |
2905 | | // The matching tag name is guaranteed by the reader |
2906 | | // Reached by trivial::{...}::{field, wrapped} |
2907 | 0 | DeEvent::End(_) => Ok(e.text), |
2908 | | // SAFETY: Cannot be two consequent Text events, they would be merged into one |
2909 | 0 | DeEvent::Text(_) => unreachable!(), |
2910 | | // Reached by trivial::{...}::{field_tag_after, tag_after} |
2911 | 0 | DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())), |
2912 | | // Reached by struct_::non_closed::elements_child |
2913 | 0 | DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()), |
2914 | | }, |
2915 | | // We can get End event in case of `<tag></tag>` or `<tag/>` input |
2916 | | // Return empty text in that case |
2917 | | // The matching tag name is guaranteed by the reader |
2918 | | // Reached by {...}::xs_list::empty |
2919 | 0 | DeEvent::End(_) => Ok("".into()), |
2920 | | // Reached by trivial::{...}::{field_nested, field_tag_before, nested, tag_before} |
2921 | 0 | DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())), |
2922 | | // Reached by struct_::non_closed::elements_child |
2923 | 0 | DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()), |
2924 | | } |
2925 | 0 | } |
2926 | | |
2927 | | /// Drops all events until event with [name](BytesEnd::name()) `name` won't be |
2928 | | /// dropped. This method should be called after [`Self::next()`] |
2929 | | #[cfg(feature = "overlapped-lists")] |
2930 | 0 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { |
2931 | 0 | let mut depth = 0; |
2932 | | loop { |
2933 | 0 | match self.read.pop_front() { |
2934 | 0 | Some(DeEvent::Start(e)) if e.name() == name => { |
2935 | 0 | depth += 1; |
2936 | 0 | } |
2937 | 0 | Some(DeEvent::End(e)) if e.name() == name => { |
2938 | 0 | if depth == 0 { |
2939 | 0 | break; |
2940 | 0 | } |
2941 | 0 | depth -= 1; |
2942 | | } |
2943 | | |
2944 | | // Drop all other skipped events |
2945 | 0 | Some(_) => continue, |
2946 | | |
2947 | | // If we do not have skipped events, use effective reading that will |
2948 | | // not allocate memory for events |
2949 | | None => { |
2950 | | // We should close all opened tags, because we could buffer |
2951 | | // Start events, but not the corresponding End events. So we |
2952 | | // keep reading events until we exit all nested tags. |
2953 | | // `read_to_end()` will return an error if an Eof was encountered |
2954 | | // preliminary (in case of malformed XML). |
2955 | | // |
2956 | | // <tag><tag></tag></tag> |
2957 | | // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2 |
2958 | | // ^^^^^^ - read by the first call of `self.reader.read_to_end()` |
2959 | | // ^^^^^^ - read by the second call of `self.reader.read_to_end()` |
2960 | | loop { |
2961 | 0 | self.reader.read_to_end(name)?; |
2962 | 0 | if depth == 0 { |
2963 | 0 | break; |
2964 | 0 | } |
2965 | 0 | depth -= 1; |
2966 | | } |
2967 | 0 | break; |
2968 | | } |
2969 | | } |
2970 | | } |
2971 | 0 | Ok(()) |
2972 | 0 | } |
2973 | | #[cfg(not(feature = "overlapped-lists"))] |
2974 | | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { |
2975 | | // First one might be in self.peek |
2976 | | match self.next()? { |
2977 | | DeEvent::Start(e) => self.reader.read_to_end(e.name())?, |
2978 | | DeEvent::End(e) if e.name() == name => return Ok(()), |
2979 | | _ => (), |
2980 | | } |
2981 | | self.reader.read_to_end(name) |
2982 | | } |
2983 | | |
2984 | 0 | fn skip_next_tree(&mut self) -> Result<(), DeError> { |
2985 | 0 | let DeEvent::Start(start) = self.next()? else { |
2986 | 0 | unreachable!("Only call this if the next event is a start event") |
2987 | | }; |
2988 | 0 | let name = start.name(); |
2989 | 0 | self.read_to_end(name) |
2990 | 0 | } |
2991 | | |
2992 | | /// Method for testing Deserializer implementation. Checks that all events was consumed during |
2993 | | /// deserialization. Panics if the next event will not be [`DeEvent::Eof`]. |
2994 | | #[doc(hidden)] |
2995 | | #[track_caller] |
2996 | 0 | pub fn check_eof_reached(&mut self) { |
2997 | | // Deserializer may not consume trailing spaces, that is normal |
2998 | 0 | self.skip_whitespaces().expect("cannot skip whitespaces"); |
2999 | 0 | let event = self.peek().expect("cannot peek event"); |
3000 | 0 | assert_eq!( |
3001 | | *event, |
3002 | | DeEvent::Eof, |
3003 | 0 | "the whole XML document should be consumed, expected `Eof`", |
3004 | | ); |
3005 | 0 | } |
3006 | | } |
3007 | | |
3008 | | impl<'de> Deserializer<'de, SliceReader<'de>> { |
3009 | | /// Create a new deserializer that will borrow data from the specified string. |
3010 | | /// |
3011 | | /// Deserializer created with this method will not resolve custom entities. |
3012 | | #[allow(clippy::should_implement_trait)] |
3013 | 0 | pub fn from_str(source: &'de str) -> Self { |
3014 | 0 | Self::from_str_with_resolver(source, PredefinedEntityResolver) |
3015 | 0 | } |
3016 | | |
3017 | | /// Create a new deserializer that will borrow data from the specified preconfigured |
3018 | | /// reader. |
3019 | | /// |
3020 | | /// Deserializer created with this method will not resolve custom entities. |
3021 | | /// |
3022 | | /// Note, that config option [`Config::expand_empty_elements`] will be set to `true`. |
3023 | | /// |
3024 | | /// # Example |
3025 | | /// |
3026 | | /// ``` |
3027 | | /// # use pretty_assertions::assert_eq; |
3028 | | /// # use quick_xml::de::Deserializer; |
3029 | | /// # use quick_xml::NsReader; |
3030 | | /// # use serde::Deserialize; |
3031 | | /// # |
3032 | | /// #[derive(Deserialize, PartialEq, Debug)] |
3033 | | /// struct Object<'a> { |
3034 | | /// tag: &'a str, |
3035 | | /// } |
3036 | | /// |
3037 | | /// let mut reader = NsReader::from_str("<xml><tag> test </tag></xml>"); |
3038 | | /// |
3039 | | /// let mut de = Deserializer::borrowing(reader.clone()); |
3040 | | /// let obj = Object::deserialize(&mut de).unwrap(); |
3041 | | /// assert_eq!(obj, Object { tag: " test " }); |
3042 | | /// |
3043 | | /// reader.config_mut().trim_text(true); |
3044 | | /// |
3045 | | /// let mut de = Deserializer::borrowing(reader); |
3046 | | /// let obj = Object::deserialize(&mut de).unwrap(); |
3047 | | /// assert_eq!(obj, Object { tag: "test" }); |
3048 | | /// ``` |
3049 | | /// |
3050 | | /// [`Config::expand_empty_elements`]: crate::reader::Config::expand_empty_elements |
3051 | | #[inline] |
3052 | 0 | pub fn borrowing(reader: NsReader<&'de [u8]>) -> Self { |
3053 | 0 | Self::borrowing_with_resolver(reader, PredefinedEntityResolver) |
3054 | 0 | } |
3055 | | } |
3056 | | |
3057 | | impl<'de, E> Deserializer<'de, SliceReader<'de>, E> |
3058 | | where |
3059 | | E: EntityResolver, |
3060 | | { |
3061 | | /// Create a new deserializer that will borrow data from the specified string |
3062 | | /// and use the specified entity resolver. |
3063 | 0 | pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self { |
3064 | 0 | Self::borrowing_with_resolver(NsReader::from_str(source), entity_resolver) |
3065 | 0 | } |
3066 | | |
3067 | | /// Create a new deserializer that will borrow data from the specified preconfigured |
3068 | | /// reader and use the specified entity resolver. |
3069 | | /// |
3070 | | /// Note, that config option [`Config::expand_empty_elements`] will be set to `true`. |
3071 | | /// |
3072 | | /// [`Config::expand_empty_elements`]: crate::reader::Config::expand_empty_elements |
3073 | 0 | pub fn borrowing_with_resolver(mut reader: NsReader<&'de [u8]>, entity_resolver: E) -> Self { |
3074 | 0 | let config = reader.config_mut(); |
3075 | 0 | config.expand_empty_elements = true; |
3076 | | |
3077 | 0 | Self::new(SliceReader { reader }, entity_resolver) |
3078 | 0 | } |
3079 | | } |
3080 | | |
3081 | | impl<'de, R> Deserializer<'de, IoReader<R>> |
3082 | | where |
3083 | | R: BufRead, |
3084 | | { |
3085 | | /// Create a new deserializer that will copy data from the specified reader |
3086 | | /// into internal buffer. |
3087 | | /// |
3088 | | /// If you already have a string use [`Self::from_str`] instead, because it |
3089 | | /// will borrow instead of copy. If you have `&[u8]` which is known to represent |
3090 | | /// UTF-8, you can decode it first before using [`from_str`]. |
3091 | | /// |
3092 | | /// Deserializer created with this method will not resolve custom entities. |
3093 | 0 | pub fn from_reader(reader: R) -> Self { |
3094 | 0 | Self::with_resolver(reader, PredefinedEntityResolver) |
3095 | 0 | } |
3096 | | |
3097 | | /// Create a new deserializer that will copy data from the specified preconfigured |
3098 | | /// reader into internal buffer. |
3099 | | /// |
3100 | | /// Deserializer created with this method will not resolve custom entities. |
3101 | | /// |
3102 | | /// Note, that config option [`Config::expand_empty_elements`] will be set to `true`. |
3103 | | /// |
3104 | | /// # Example |
3105 | | /// |
3106 | | /// ``` |
3107 | | /// # use pretty_assertions::assert_eq; |
3108 | | /// # use quick_xml::de::Deserializer; |
3109 | | /// # use quick_xml::NsReader; |
3110 | | /// # use serde::Deserialize; |
3111 | | /// # |
3112 | | /// #[derive(Deserialize, PartialEq, Debug)] |
3113 | | /// struct Object { |
3114 | | /// tag: String, |
3115 | | /// } |
3116 | | /// |
3117 | | /// let mut reader = NsReader::from_str("<xml><tag> test </tag></xml>"); |
3118 | | /// |
3119 | | /// let mut de = Deserializer::buffering(reader.clone()); |
3120 | | /// let obj = Object::deserialize(&mut de).unwrap(); |
3121 | | /// assert_eq!(obj, Object { tag: " test ".to_string() }); |
3122 | | /// |
3123 | | /// reader.config_mut().trim_text(true); |
3124 | | /// |
3125 | | /// let mut de = Deserializer::buffering(reader); |
3126 | | /// let obj = Object::deserialize(&mut de).unwrap(); |
3127 | | /// assert_eq!(obj, Object { tag: "test".to_string() }); |
3128 | | /// ``` |
3129 | | /// |
3130 | | /// [`Config::expand_empty_elements`]: crate::reader::Config::expand_empty_elements |
3131 | | #[inline] |
3132 | 0 | pub fn buffering(reader: NsReader<R>) -> Self { |
3133 | 0 | Self::buffering_with_resolver(reader, PredefinedEntityResolver) |
3134 | 0 | } |
3135 | | } |
3136 | | |
3137 | | impl<'de, R, E> Deserializer<'de, IoReader<R>, E> |
3138 | | where |
3139 | | R: BufRead, |
3140 | | E: EntityResolver, |
3141 | | { |
3142 | | /// Create a new deserializer that will copy data from the specified reader |
3143 | | /// into internal buffer and use the specified entity resolver. |
3144 | | /// |
3145 | | /// If you already have a string use [`Self::from_str`] instead, because it |
3146 | | /// will borrow instead of copy. If you have `&[u8]` which is known to represent |
3147 | | /// UTF-8, you can decode it first before using [`from_str`]. |
3148 | 0 | pub fn with_resolver(reader: R, entity_resolver: E) -> Self { |
3149 | 0 | let mut reader = NsReader::from_reader(reader); |
3150 | 0 | let config = reader.config_mut(); |
3151 | 0 | config.expand_empty_elements = true; |
3152 | | |
3153 | 0 | Self::new( |
3154 | 0 | IoReader { |
3155 | 0 | reader, |
3156 | 0 | buf: Vec::new(), |
3157 | 0 | }, |
3158 | 0 | entity_resolver, |
3159 | | ) |
3160 | 0 | } |
3161 | | |
3162 | | /// Create new deserializer that will copy data from the specified preconfigured reader |
3163 | | /// into internal buffer and use the specified entity resolver. |
3164 | | /// |
3165 | | /// Note, that config option [`Config::expand_empty_elements`] will be set to `true`. |
3166 | | /// |
3167 | | /// [`Config::expand_empty_elements`]: crate::reader::Config::expand_empty_elements |
3168 | 0 | pub fn buffering_with_resolver(mut reader: NsReader<R>, entity_resolver: E) -> Self { |
3169 | 0 | let config = reader.config_mut(); |
3170 | 0 | config.expand_empty_elements = true; |
3171 | | |
3172 | 0 | Self::new( |
3173 | 0 | IoReader { |
3174 | 0 | reader, |
3175 | 0 | buf: Vec::new(), |
3176 | 0 | }, |
3177 | 0 | entity_resolver, |
3178 | | ) |
3179 | 0 | } |
3180 | | } |
3181 | | |
3182 | | impl<'de, R, E> de::Deserializer<'de> for &mut Deserializer<'de, R, E> |
3183 | | where |
3184 | | R: XmlRead<'de>, |
3185 | | E: EntityResolver, |
3186 | | { |
3187 | | type Error = DeError; |
3188 | | |
3189 | | deserialize_primitives!(); |
3190 | | |
3191 | 0 | fn deserialize_struct<V>( |
3192 | 0 | self, |
3193 | 0 | _name: &'static str, |
3194 | 0 | fields: &'static [&'static str], |
3195 | 0 | visitor: V, |
3196 | 0 | ) -> Result<V::Value, DeError> |
3197 | 0 | where |
3198 | 0 | V: Visitor<'de>, |
3199 | | { |
3200 | | // When document is pretty-printed there could be whitespaces before the root element |
3201 | 0 | self.skip_whitespaces()?; |
3202 | 0 | match self.next()? { |
3203 | 0 | DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self, e, fields)), |
3204 | | // SAFETY: The reader is guaranteed that we don't have unmatched tags |
3205 | | // If we here, then our deserializer has a bug |
3206 | 0 | DeEvent::End(e) => unreachable!("{:?}", e), |
3207 | | // Deserializer methods are only hints, if deserializer could not satisfy |
3208 | | // request, it should return the data that it has. It is responsibility |
3209 | | // of a Visitor to return an error if it does not understand the data |
3210 | 0 | DeEvent::Text(e) => match e.text { |
3211 | 0 | Cow::Borrowed(s) => visitor.visit_borrowed_str(s), |
3212 | 0 | Cow::Owned(s) => visitor.visit_string(s), |
3213 | | }, |
3214 | 0 | DeEvent::Eof => Err(DeError::UnexpectedEof), |
3215 | | } |
3216 | 0 | } |
3217 | | |
3218 | | /// Unit represented in XML as a `xs:element` or text/CDATA content. |
3219 | | /// Any content inside `xs:element` is ignored and skipped. |
3220 | | /// |
3221 | | /// Produces unit struct from any of following inputs: |
3222 | | /// - any `<tag ...>...</tag>` |
3223 | | /// - any `<tag .../>` |
3224 | | /// - any consequent text / CDATA content (can consist of several parts |
3225 | | /// delimited by comments and processing instructions) |
3226 | | /// |
3227 | | /// # Events handling |
3228 | | /// |
3229 | | /// |Event |XML |Handling |
3230 | | /// |------------------|---------------------------|------------------------------------------- |
3231 | | /// |[`DeEvent::Start`]|`<tag>...</tag>` |Calls `visitor.visit_unit()`, consumes all events up to and including corresponding `End` event |
3232 | | /// |[`DeEvent::End`] |`</tag>` |This is impossible situation, the method will panic if it happens |
3233 | | /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Calls `visitor.visit_unit()`. The content is ignored |
3234 | | /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof) |
3235 | 0 | fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, DeError> |
3236 | 0 | where |
3237 | 0 | V: Visitor<'de>, |
3238 | | { |
3239 | 0 | match self.next()? { |
3240 | 0 | DeEvent::Start(s) => { |
3241 | 0 | self.read_to_end(s.name())?; |
3242 | 0 | visitor.visit_unit() |
3243 | | } |
3244 | 0 | DeEvent::Text(_) => visitor.visit_unit(), |
3245 | | // SAFETY: The reader is guaranteed that we don't have unmatched tags |
3246 | | // If we here, then our deserializer has a bug |
3247 | 0 | DeEvent::End(e) => unreachable!("{:?}", e), |
3248 | 0 | DeEvent::Eof => Err(DeError::UnexpectedEof), |
3249 | | } |
3250 | 0 | } |
3251 | | |
3252 | | /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] |
3253 | | /// with the same deserializer. |
3254 | 0 | fn deserialize_newtype_struct<V>( |
3255 | 0 | self, |
3256 | 0 | _name: &'static str, |
3257 | 0 | visitor: V, |
3258 | 0 | ) -> Result<V::Value, DeError> |
3259 | 0 | where |
3260 | 0 | V: Visitor<'de>, |
3261 | | { |
3262 | 0 | visitor.visit_newtype_struct(self) |
3263 | 0 | } |
3264 | | |
3265 | 0 | fn deserialize_enum<V>( |
3266 | 0 | self, |
3267 | 0 | _name: &'static str, |
3268 | 0 | _variants: &'static [&'static str], |
3269 | 0 | visitor: V, |
3270 | 0 | ) -> Result<V::Value, DeError> |
3271 | 0 | where |
3272 | 0 | V: Visitor<'de>, |
3273 | | { |
3274 | | // When document is pretty-printed there could be whitespaces before the root element |
3275 | | // which represents the enum variant |
3276 | | // Checked by `top_level::list_of_enum` test in serde-de-seq |
3277 | 0 | self.skip_whitespaces()?; |
3278 | 0 | visitor.visit_enum(var::EnumAccess::new(self)) |
3279 | 0 | } |
3280 | | |
3281 | 0 | fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError> |
3282 | 0 | where |
3283 | 0 | V: Visitor<'de>, |
3284 | | { |
3285 | 0 | visitor.visit_seq(self) |
3286 | 0 | } |
3287 | | |
3288 | 0 | fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError> |
3289 | 0 | where |
3290 | 0 | V: Visitor<'de>, |
3291 | | { |
3292 | | // We cannot use result of `peek()` directly because of borrow checker |
3293 | 0 | let _ = self.peek()?; |
3294 | 0 | match self.last_peeked() { |
3295 | 0 | DeEvent::Text(t) if t.is_empty() => visitor.visit_none(), |
3296 | 0 | DeEvent::Eof => visitor.visit_none(), |
3297 | | // if the `xsi:nil` attribute is set to true we got a none value |
3298 | 0 | DeEvent::Start(start) if self.reader.reader.has_nil_attr(start) => { |
3299 | 0 | self.skip_next_tree()?; |
3300 | 0 | visitor.visit_none() |
3301 | | } |
3302 | 0 | _ => visitor.visit_some(self), |
3303 | | } |
3304 | 0 | } |
3305 | | |
3306 | 0 | fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError> |
3307 | 0 | where |
3308 | 0 | V: Visitor<'de>, |
3309 | | { |
3310 | 0 | match self.peek()? { |
3311 | 0 | DeEvent::Text(_) => self.deserialize_str(visitor), |
3312 | 0 | _ => self.deserialize_map(visitor), |
3313 | | } |
3314 | 0 | } |
3315 | | } |
3316 | | |
3317 | | /// An accessor to sequence elements forming a value for top-level sequence of XML |
3318 | | /// elements. |
3319 | | /// |
3320 | | /// Technically, multiple top-level elements violates XML rule of only one top-level |
3321 | | /// element, but we consider this as several concatenated XML documents. |
3322 | | impl<'de, R, E> SeqAccess<'de> for &mut Deserializer<'de, R, E> |
3323 | | where |
3324 | | R: XmlRead<'de>, |
3325 | | E: EntityResolver, |
3326 | | { |
3327 | | type Error = DeError; |
3328 | | |
3329 | 0 | fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error> |
3330 | 0 | where |
3331 | 0 | T: DeserializeSeed<'de>, |
3332 | | { |
3333 | | // When document is pretty-printed there could be whitespaces before, between |
3334 | | // and after root elements. We cannot defer decision if we need to skip spaces |
3335 | | // or not: if we have a sequence of type that does not accept blank text, it |
3336 | | // will need to return something and it can return only error. For example, |
3337 | | // it can be enum without `$text` variant |
3338 | | // Checked by `top_level::list_of_enum` test in serde-de-seq |
3339 | 0 | self.skip_whitespaces()?; |
3340 | 0 | match self.peek()? { |
3341 | 0 | DeEvent::Eof => Ok(None), |
3342 | | |
3343 | | // Start(tag), End(tag), Text |
3344 | 0 | _ => seed.deserialize(&mut **self).map(Some), |
3345 | | } |
3346 | 0 | } |
3347 | | } |
3348 | | |
3349 | | impl<'de, R, E> IntoDeserializer<'de, DeError> for &mut Deserializer<'de, R, E> |
3350 | | where |
3351 | | R: XmlRead<'de>, |
3352 | | E: EntityResolver, |
3353 | | { |
3354 | | type Deserializer = Self; |
3355 | | |
3356 | | #[inline] |
3357 | 0 | fn into_deserializer(self) -> Self { |
3358 | 0 | self |
3359 | 0 | } |
3360 | | } |
3361 | | |
3362 | | //////////////////////////////////////////////////////////////////////////////////////////////////// |
3363 | | |
3364 | | /// Converts raw reader's event into a payload event. |
3365 | | /// Returns `None`, if event should be skipped. |
3366 | | #[inline(always)] |
3367 | 0 | fn skip_uninterested<'a>(event: Event<'a>) -> Option<PayloadEvent<'a>> { |
3368 | 0 | let event = match event { |
3369 | 0 | Event::DocType(e) => PayloadEvent::DocType(e), |
3370 | 0 | Event::Start(e) => PayloadEvent::Start(e), |
3371 | 0 | Event::End(e) => PayloadEvent::End(e), |
3372 | 0 | Event::Eof => PayloadEvent::Eof, |
3373 | | |
3374 | | // Do not trim next text event after Text, CDATA or reference event |
3375 | 0 | Event::CData(e) => PayloadEvent::CData(e), |
3376 | 0 | Event::Text(e) => PayloadEvent::Text(e), |
3377 | 0 | Event::GeneralRef(e) => PayloadEvent::GeneralRef(e), |
3378 | | |
3379 | 0 | _ => return None, |
3380 | | }; |
3381 | 0 | Some(event) |
3382 | 0 | } |
3383 | | |
3384 | | //////////////////////////////////////////////////////////////////////////////////////////////////// |
3385 | | |
3386 | | /// Trait used by the deserializer for iterating over input. This is manually |
3387 | | /// "specialized" for iterating over `&[u8]`. |
3388 | | /// |
3389 | | /// You do not need to implement this trait, it is needed to abstract from |
3390 | | /// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in |
3391 | | /// deserializer |
3392 | | pub trait XmlRead<'i> { |
3393 | | /// Return an input-borrowing event. |
3394 | | fn next(&mut self) -> Result<PayloadEvent<'i>, DeError>; |
3395 | | |
3396 | | /// Skips until end element is found. Unlike `next()` it will not allocate |
3397 | | /// when it cannot satisfy the lifetime. |
3398 | | fn read_to_end(&mut self, name: QName) -> Result<(), DeError>; |
3399 | | |
3400 | | /// A copy of the reader's decoder used to decode strings. |
3401 | | fn decoder(&self) -> Decoder; |
3402 | | |
3403 | | /// Checks if the `start` tag has a [`xsi:nil`] attribute. This method ignores |
3404 | | /// any errors in attributes. |
3405 | | /// |
3406 | | /// [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil |
3407 | | fn has_nil_attr(&self, start: &BytesStart) -> bool; |
3408 | | } |
3409 | | |
3410 | | /// XML input source that reads from a std::io input stream. |
3411 | | /// |
3412 | | /// You cannot create it, it is created automatically when you call |
3413 | | /// [`Deserializer::from_reader`] |
3414 | | pub struct IoReader<R: BufRead> { |
3415 | | reader: NsReader<R>, |
3416 | | buf: Vec<u8>, |
3417 | | } |
3418 | | |
3419 | | impl<R: BufRead> IoReader<R> { |
3420 | | /// Returns the underlying XML reader. |
3421 | | /// |
3422 | | /// ``` |
3423 | | /// # use pretty_assertions::assert_eq; |
3424 | | /// use serde::Deserialize; |
3425 | | /// use std::io::Cursor; |
3426 | | /// use quick_xml::de::Deserializer; |
3427 | | /// use quick_xml::NsReader; |
3428 | | /// |
3429 | | /// #[derive(Deserialize)] |
3430 | | /// struct SomeStruct { |
3431 | | /// field1: String, |
3432 | | /// field2: String, |
3433 | | /// } |
3434 | | /// |
3435 | | /// // Try to deserialize from broken XML |
3436 | | /// let mut de = Deserializer::from_reader(Cursor::new( |
3437 | | /// "<SomeStruct><field1><field2></SomeStruct>" |
3438 | | /// // 0 ^= 28 ^= 41 |
3439 | | /// )); |
3440 | | /// |
3441 | | /// let err = SomeStruct::deserialize(&mut de); |
3442 | | /// assert!(err.is_err()); |
3443 | | /// |
3444 | | /// let reader: &NsReader<Cursor<&str>> = de.get_ref().get_ref(); |
3445 | | /// |
3446 | | /// assert_eq!(reader.error_position(), 28); |
3447 | | /// assert_eq!(reader.buffer_position(), 41); |
3448 | | /// ``` |
3449 | 0 | pub const fn get_ref(&self) -> &NsReader<R> { |
3450 | 0 | &self.reader |
3451 | 0 | } |
3452 | | } |
3453 | | |
3454 | | impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> { |
3455 | 0 | fn next(&mut self) -> Result<PayloadEvent<'static>, DeError> { |
3456 | | loop { |
3457 | 0 | self.buf.clear(); |
3458 | | |
3459 | 0 | let event = self.reader.read_event_into(&mut self.buf)?; |
3460 | 0 | if let Some(event) = skip_uninterested(event) { |
3461 | 0 | return Ok(event.into_owned()); |
3462 | 0 | } |
3463 | | } |
3464 | 0 | } |
3465 | | |
3466 | 0 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { |
3467 | 0 | match self.reader.read_to_end_into(name, &mut self.buf) { |
3468 | 0 | Err(e) => Err(e.into()), |
3469 | 0 | Ok(_) => Ok(()), |
3470 | | } |
3471 | 0 | } |
3472 | | |
3473 | 0 | fn decoder(&self) -> Decoder { |
3474 | 0 | self.reader.decoder() |
3475 | 0 | } |
3476 | | |
3477 | 0 | fn has_nil_attr(&self, start: &BytesStart) -> bool { |
3478 | 0 | start.attributes().has_nil(&self.reader) |
3479 | 0 | } |
3480 | | } |
3481 | | |
3482 | | /// XML input source that reads from a slice of bytes and can borrow from it. |
3483 | | /// |
3484 | | /// You cannot create it, it is created automatically when you call |
3485 | | /// [`Deserializer::from_str`]. |
3486 | | pub struct SliceReader<'de> { |
3487 | | reader: NsReader<&'de [u8]>, |
3488 | | } |
3489 | | |
3490 | | impl<'de> SliceReader<'de> { |
3491 | | /// Returns the underlying XML reader. |
3492 | | /// |
3493 | | /// ``` |
3494 | | /// # use pretty_assertions::assert_eq; |
3495 | | /// use serde::Deserialize; |
3496 | | /// use quick_xml::de::Deserializer; |
3497 | | /// use quick_xml::NsReader; |
3498 | | /// |
3499 | | /// #[derive(Deserialize)] |
3500 | | /// struct SomeStruct { |
3501 | | /// field1: String, |
3502 | | /// field2: String, |
3503 | | /// } |
3504 | | /// |
3505 | | /// // Try to deserialize from broken XML |
3506 | | /// let mut de = Deserializer::from_str( |
3507 | | /// "<SomeStruct><field1><field2></SomeStruct>" |
3508 | | /// // 0 ^= 28 ^= 41 |
3509 | | /// ); |
3510 | | /// |
3511 | | /// let err = SomeStruct::deserialize(&mut de); |
3512 | | /// assert!(err.is_err()); |
3513 | | /// |
3514 | | /// let reader: &NsReader<&[u8]> = de.get_ref().get_ref(); |
3515 | | /// |
3516 | | /// assert_eq!(reader.error_position(), 28); |
3517 | | /// assert_eq!(reader.buffer_position(), 41); |
3518 | | /// ``` |
3519 | 0 | pub const fn get_ref(&self) -> &NsReader<&'de [u8]> { |
3520 | 0 | &self.reader |
3521 | 0 | } |
3522 | | } |
3523 | | |
3524 | | impl<'de> XmlRead<'de> for SliceReader<'de> { |
3525 | 0 | fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> { |
3526 | | loop { |
3527 | 0 | let event = self.reader.read_event()?; |
3528 | 0 | if let Some(event) = skip_uninterested(event) { |
3529 | 0 | return Ok(event); |
3530 | 0 | } |
3531 | | } |
3532 | 0 | } |
3533 | | |
3534 | 0 | fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { |
3535 | 0 | match self.reader.read_to_end(name) { |
3536 | 0 | Err(e) => Err(e.into()), |
3537 | 0 | Ok(_) => Ok(()), |
3538 | | } |
3539 | 0 | } |
3540 | | |
3541 | 0 | fn decoder(&self) -> Decoder { |
3542 | 0 | self.reader.decoder() |
3543 | 0 | } |
3544 | | |
3545 | 0 | fn has_nil_attr(&self, start: &BytesStart) -> bool { |
3546 | 0 | start.attributes().has_nil(&self.reader) |
3547 | 0 | } |
3548 | | } |
3549 | | |
3550 | | #[cfg(test)] |
3551 | | mod tests { |
3552 | | use super::*; |
3553 | | use crate::errors::IllFormedError; |
3554 | | use pretty_assertions::assert_eq; |
3555 | | |
3556 | | fn make_de<'de>(source: &'de str) -> Deserializer<'de, SliceReader<'de>> { |
3557 | | dbg!(source); |
3558 | | Deserializer::from_str(source) |
3559 | | } |
3560 | | |
3561 | | #[cfg(feature = "overlapped-lists")] |
3562 | | mod skip { |
3563 | | use super::*; |
3564 | | use crate::de::DeEvent::*; |
3565 | | use crate::events::BytesEnd; |
3566 | | use pretty_assertions::assert_eq; |
3567 | | |
3568 | | /// Checks that `peek()` and `read()` behaves correctly after `skip()` |
3569 | | #[test] |
3570 | | fn read_and_peek() { |
3571 | | let mut de = make_de( |
3572 | | "\ |
3573 | | <root>\ |
3574 | | <inner>\ |
3575 | | text\ |
3576 | | <inner/>\ |
3577 | | </inner>\ |
3578 | | <next/>\ |
3579 | | <target/>\ |
3580 | | </root>\ |
3581 | | ", |
3582 | | ); |
3583 | | |
3584 | | // Initial conditions - both are empty |
3585 | | assert_eq!(de.read, vec![]); |
3586 | | assert_eq!(de.write, vec![]); |
3587 | | |
3588 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); |
3589 | | assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner"))); |
3590 | | |
3591 | | // Mark that start_replay() should begin replay from this point |
3592 | | let checkpoint = de.skip_checkpoint(); |
3593 | | assert_eq!(checkpoint, 0); |
3594 | | |
3595 | | // Should skip first <inner> tree |
3596 | | de.skip().unwrap(); |
3597 | | assert_eq!(de.read, vec![]); |
3598 | | assert_eq!( |
3599 | | de.write, |
3600 | | vec![ |
3601 | | Start(BytesStart::new("inner")), |
3602 | | Text("text".into()), |
3603 | | Start(BytesStart::new("inner")), |
3604 | | End(BytesEnd::new("inner")), |
3605 | | End(BytesEnd::new("inner")), |
3606 | | ] |
3607 | | ); |
3608 | | |
3609 | | // Consume <next/>. Now unconsumed XML looks like: |
3610 | | // |
3611 | | // <inner> |
3612 | | // text |
3613 | | // <inner/> |
3614 | | // </inner> |
3615 | | // <target/> |
3616 | | // </root> |
3617 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("next"))); |
3618 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("next"))); |
3619 | | |
3620 | | // We finish writing. Next call to `next()` should start replay that messages: |
3621 | | // |
3622 | | // <inner> |
3623 | | // text |
3624 | | // <inner/> |
3625 | | // </inner> |
3626 | | // |
3627 | | // and after that stream that messages: |
3628 | | // |
3629 | | // <target/> |
3630 | | // </root> |
3631 | | de.start_replay(checkpoint); |
3632 | | assert_eq!( |
3633 | | de.read, |
3634 | | vec![ |
3635 | | Start(BytesStart::new("inner")), |
3636 | | Text("text".into()), |
3637 | | Start(BytesStart::new("inner")), |
3638 | | End(BytesEnd::new("inner")), |
3639 | | End(BytesEnd::new("inner")), |
3640 | | ] |
3641 | | ); |
3642 | | assert_eq!(de.write, vec![]); |
3643 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); |
3644 | | |
3645 | | // Mark that start_replay() should begin replay from this point |
3646 | | let checkpoint = de.skip_checkpoint(); |
3647 | | assert_eq!(checkpoint, 0); |
3648 | | |
3649 | | // Skip `$text` node and consume <inner/> after it |
3650 | | de.skip().unwrap(); |
3651 | | assert_eq!( |
3652 | | de.read, |
3653 | | vec![ |
3654 | | Start(BytesStart::new("inner")), |
3655 | | End(BytesEnd::new("inner")), |
3656 | | End(BytesEnd::new("inner")), |
3657 | | ] |
3658 | | ); |
3659 | | assert_eq!( |
3660 | | de.write, |
3661 | | vec![ |
3662 | | // This comment here to keep the same formatting of both arrays |
3663 | | // otherwise rustfmt suggest one-line it |
3664 | | Text("text".into()), |
3665 | | ] |
3666 | | ); |
3667 | | |
3668 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); |
3669 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); |
3670 | | |
3671 | | // We finish writing. Next call to `next()` should start replay messages: |
3672 | | // |
3673 | | // text |
3674 | | // </inner> |
3675 | | // |
3676 | | // and after that stream that messages: |
3677 | | // |
3678 | | // <target/> |
3679 | | // </root> |
3680 | | de.start_replay(checkpoint); |
3681 | | assert_eq!( |
3682 | | de.read, |
3683 | | vec![ |
3684 | | // This comment here to keep the same formatting as others |
3685 | | // otherwise rustfmt suggest one-line it |
3686 | | Text("text".into()), |
3687 | | End(BytesEnd::new("inner")), |
3688 | | ] |
3689 | | ); |
3690 | | assert_eq!(de.write, vec![]); |
3691 | | assert_eq!(de.next().unwrap(), Text("text".into())); |
3692 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); |
3693 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("target"))); |
3694 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("target"))); |
3695 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); |
3696 | | assert_eq!(de.next().unwrap(), Eof); |
3697 | | } |
3698 | | |
3699 | | /// Checks that `read_to_end()` behaves correctly after `skip()` |
3700 | | #[test] |
3701 | | fn read_to_end() { |
3702 | | let mut de = make_de( |
3703 | | "\ |
3704 | | <root>\ |
3705 | | <skip>\ |
3706 | | text\ |
3707 | | <skip/>\ |
3708 | | </skip>\ |
3709 | | <target>\ |
3710 | | <target/>\ |
3711 | | </target>\ |
3712 | | </root>\ |
3713 | | ", |
3714 | | ); |
3715 | | |
3716 | | // Initial conditions - both are empty |
3717 | | assert_eq!(de.read, vec![]); |
3718 | | assert_eq!(de.write, vec![]); |
3719 | | |
3720 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); |
3721 | | |
3722 | | // Mark that start_replay() should begin replay from this point |
3723 | | let checkpoint = de.skip_checkpoint(); |
3724 | | assert_eq!(checkpoint, 0); |
3725 | | |
3726 | | // Skip the <skip> tree |
3727 | | de.skip().unwrap(); |
3728 | | assert_eq!(de.read, vec![]); |
3729 | | assert_eq!( |
3730 | | de.write, |
3731 | | vec![ |
3732 | | Start(BytesStart::new("skip")), |
3733 | | Text("text".into()), |
3734 | | Start(BytesStart::new("skip")), |
3735 | | End(BytesEnd::new("skip")), |
3736 | | End(BytesEnd::new("skip")), |
3737 | | ] |
3738 | | ); |
3739 | | |
3740 | | // Drop all events that represents <target> tree. Now unconsumed XML looks like: |
3741 | | // |
3742 | | // <skip> |
3743 | | // text |
3744 | | // <skip/> |
3745 | | // </skip> |
3746 | | // </root> |
3747 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("target"))); |
3748 | | de.read_to_end(QName(b"target")).unwrap(); |
3749 | | assert_eq!(de.read, vec![]); |
3750 | | assert_eq!( |
3751 | | de.write, |
3752 | | vec![ |
3753 | | Start(BytesStart::new("skip")), |
3754 | | Text("text".into()), |
3755 | | Start(BytesStart::new("skip")), |
3756 | | End(BytesEnd::new("skip")), |
3757 | | End(BytesEnd::new("skip")), |
3758 | | ] |
3759 | | ); |
3760 | | |
3761 | | // We finish writing. Next call to `next()` should start replay that messages: |
3762 | | // |
3763 | | // <skip> |
3764 | | // text |
3765 | | // <skip/> |
3766 | | // </skip> |
3767 | | // |
3768 | | // and after that stream that messages: |
3769 | | // |
3770 | | // </root> |
3771 | | de.start_replay(checkpoint); |
3772 | | assert_eq!( |
3773 | | de.read, |
3774 | | vec![ |
3775 | | Start(BytesStart::new("skip")), |
3776 | | Text("text".into()), |
3777 | | Start(BytesStart::new("skip")), |
3778 | | End(BytesEnd::new("skip")), |
3779 | | End(BytesEnd::new("skip")), |
3780 | | ] |
3781 | | ); |
3782 | | assert_eq!(de.write, vec![]); |
3783 | | |
3784 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip"))); |
3785 | | de.read_to_end(QName(b"skip")).unwrap(); |
3786 | | |
3787 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); |
3788 | | assert_eq!(de.next().unwrap(), Eof); |
3789 | | } |
3790 | | |
3791 | | /// Checks that replay replayes only part of events |
3792 | | /// Test for https://github.com/tafia/quick-xml/issues/435 |
3793 | | #[test] |
3794 | | fn partial_replay() { |
3795 | | let mut de = make_de( |
3796 | | "\ |
3797 | | <root>\ |
3798 | | <skipped-1/>\ |
3799 | | <skipped-2/>\ |
3800 | | <inner>\ |
3801 | | <skipped-3/>\ |
3802 | | <skipped-4/>\ |
3803 | | <target-2/>\ |
3804 | | </inner>\ |
3805 | | <target-1/>\ |
3806 | | </root>\ |
3807 | | ", |
3808 | | ); |
3809 | | |
3810 | | // Initial conditions - both are empty |
3811 | | assert_eq!(de.read, vec![]); |
3812 | | assert_eq!(de.write, vec![]); |
3813 | | |
3814 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); |
3815 | | |
3816 | | // start_replay() should start replay from this point |
3817 | | let checkpoint1 = de.skip_checkpoint(); |
3818 | | assert_eq!(checkpoint1, 0); |
3819 | | |
3820 | | // Should skip first and second <skipped-N/> elements |
3821 | | de.skip().unwrap(); // skipped-1 |
3822 | | de.skip().unwrap(); // skipped-2 |
3823 | | assert_eq!(de.read, vec![]); |
3824 | | assert_eq!( |
3825 | | de.write, |
3826 | | vec![ |
3827 | | Start(BytesStart::new("skipped-1")), |
3828 | | End(BytesEnd::new("skipped-1")), |
3829 | | Start(BytesStart::new("skipped-2")), |
3830 | | End(BytesEnd::new("skipped-2")), |
3831 | | ] |
3832 | | ); |
3833 | | |
3834 | | //////////////////////////////////////////////////////////////////////////////////////// |
3835 | | |
3836 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); |
3837 | | assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3"))); |
3838 | | assert_eq!( |
3839 | | de.read, |
3840 | | vec![ |
3841 | | // This comment here to keep the same formatting of both arrays |
3842 | | // otherwise rustfmt suggest one-line it |
3843 | | Start(BytesStart::new("skipped-3")), |
3844 | | ] |
3845 | | ); |
3846 | | assert_eq!( |
3847 | | de.write, |
3848 | | vec![ |
3849 | | Start(BytesStart::new("skipped-1")), |
3850 | | End(BytesEnd::new("skipped-1")), |
3851 | | Start(BytesStart::new("skipped-2")), |
3852 | | End(BytesEnd::new("skipped-2")), |
3853 | | ] |
3854 | | ); |
3855 | | |
3856 | | // start_replay() should start replay from this point |
3857 | | let checkpoint2 = de.skip_checkpoint(); |
3858 | | assert_eq!(checkpoint2, 4); |
3859 | | |
3860 | | // Should skip third and forth <skipped-N/> elements |
3861 | | de.skip().unwrap(); // skipped-3 |
3862 | | de.skip().unwrap(); // skipped-4 |
3863 | | assert_eq!(de.read, vec![]); |
3864 | | assert_eq!( |
3865 | | de.write, |
3866 | | vec![ |
3867 | | // checkpoint 1 |
3868 | | Start(BytesStart::new("skipped-1")), |
3869 | | End(BytesEnd::new("skipped-1")), |
3870 | | Start(BytesStart::new("skipped-2")), |
3871 | | End(BytesEnd::new("skipped-2")), |
3872 | | // checkpoint 2 |
3873 | | Start(BytesStart::new("skipped-3")), |
3874 | | End(BytesEnd::new("skipped-3")), |
3875 | | Start(BytesStart::new("skipped-4")), |
3876 | | End(BytesEnd::new("skipped-4")), |
3877 | | ] |
3878 | | ); |
3879 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2"))); |
3880 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2"))); |
3881 | | assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner"))); |
3882 | | assert_eq!( |
3883 | | de.read, |
3884 | | vec![ |
3885 | | // This comment here to keep the same formatting of both arrays |
3886 | | // otherwise rustfmt suggest one-line it |
3887 | | End(BytesEnd::new("inner")), |
3888 | | ] |
3889 | | ); |
3890 | | assert_eq!( |
3891 | | de.write, |
3892 | | vec![ |
3893 | | // checkpoint 1 |
3894 | | Start(BytesStart::new("skipped-1")), |
3895 | | End(BytesEnd::new("skipped-1")), |
3896 | | Start(BytesStart::new("skipped-2")), |
3897 | | End(BytesEnd::new("skipped-2")), |
3898 | | // checkpoint 2 |
3899 | | Start(BytesStart::new("skipped-3")), |
3900 | | End(BytesEnd::new("skipped-3")), |
3901 | | Start(BytesStart::new("skipped-4")), |
3902 | | End(BytesEnd::new("skipped-4")), |
3903 | | ] |
3904 | | ); |
3905 | | |
3906 | | // Start replay events from checkpoint 2 |
3907 | | de.start_replay(checkpoint2); |
3908 | | assert_eq!( |
3909 | | de.read, |
3910 | | vec![ |
3911 | | Start(BytesStart::new("skipped-3")), |
3912 | | End(BytesEnd::new("skipped-3")), |
3913 | | Start(BytesStart::new("skipped-4")), |
3914 | | End(BytesEnd::new("skipped-4")), |
3915 | | End(BytesEnd::new("inner")), |
3916 | | ] |
3917 | | ); |
3918 | | assert_eq!( |
3919 | | de.write, |
3920 | | vec![ |
3921 | | Start(BytesStart::new("skipped-1")), |
3922 | | End(BytesEnd::new("skipped-1")), |
3923 | | Start(BytesStart::new("skipped-2")), |
3924 | | End(BytesEnd::new("skipped-2")), |
3925 | | ] |
3926 | | ); |
3927 | | |
3928 | | // Replayed events |
3929 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3"))); |
3930 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3"))); |
3931 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4"))); |
3932 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4"))); |
3933 | | |
3934 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); |
3935 | | assert_eq!(de.read, vec![]); |
3936 | | assert_eq!( |
3937 | | de.write, |
3938 | | vec![ |
3939 | | Start(BytesStart::new("skipped-1")), |
3940 | | End(BytesEnd::new("skipped-1")), |
3941 | | Start(BytesStart::new("skipped-2")), |
3942 | | End(BytesEnd::new("skipped-2")), |
3943 | | ] |
3944 | | ); |
3945 | | |
3946 | | //////////////////////////////////////////////////////////////////////////////////////// |
3947 | | |
3948 | | // New events |
3949 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1"))); |
3950 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1"))); |
3951 | | |
3952 | | assert_eq!(de.read, vec![]); |
3953 | | assert_eq!( |
3954 | | de.write, |
3955 | | vec![ |
3956 | | Start(BytesStart::new("skipped-1")), |
3957 | | End(BytesEnd::new("skipped-1")), |
3958 | | Start(BytesStart::new("skipped-2")), |
3959 | | End(BytesEnd::new("skipped-2")), |
3960 | | ] |
3961 | | ); |
3962 | | |
3963 | | // Start replay events from checkpoint 1 |
3964 | | de.start_replay(checkpoint1); |
3965 | | assert_eq!( |
3966 | | de.read, |
3967 | | vec![ |
3968 | | Start(BytesStart::new("skipped-1")), |
3969 | | End(BytesEnd::new("skipped-1")), |
3970 | | Start(BytesStart::new("skipped-2")), |
3971 | | End(BytesEnd::new("skipped-2")), |
3972 | | ] |
3973 | | ); |
3974 | | assert_eq!(de.write, vec![]); |
3975 | | |
3976 | | // Replayed events |
3977 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1"))); |
3978 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1"))); |
3979 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2"))); |
3980 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2"))); |
3981 | | |
3982 | | assert_eq!(de.read, vec![]); |
3983 | | assert_eq!(de.write, vec![]); |
3984 | | |
3985 | | // New events |
3986 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); |
3987 | | assert_eq!(de.next().unwrap(), Eof); |
3988 | | } |
3989 | | |
3990 | | /// Checks that limiting buffer size works correctly |
3991 | | #[test] |
3992 | | fn limit() { |
3993 | | use serde::Deserialize; |
3994 | | |
3995 | | #[derive(Debug, Deserialize)] |
3996 | | #[allow(unused)] |
3997 | | struct List { |
3998 | | item: Vec<()>, |
3999 | | } |
4000 | | |
4001 | | let mut de = make_de( |
4002 | | "\ |
4003 | | <any-name>\ |
4004 | | <item/>\ |
4005 | | <another-item>\ |
4006 | | <some-element>with text</some-element>\ |
4007 | | <yet-another-element/>\ |
4008 | | </another-item>\ |
4009 | | <item/>\ |
4010 | | <item/>\ |
4011 | | </any-name>\ |
4012 | | ", |
4013 | | ); |
4014 | | de.event_buffer_size(NonZeroUsize::new(3)); |
4015 | | |
4016 | | match List::deserialize(&mut de) { |
4017 | | Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3), |
4018 | | e => panic!("Expected `Err(TooManyEvents(3))`, but got `{:?}`", e), |
4019 | | } |
4020 | | } |
4021 | | |
4022 | | /// Without handling Eof in `skip` this test failed with memory allocation |
4023 | | #[test] |
4024 | | fn invalid_xml() { |
4025 | | use crate::de::DeEvent::*; |
4026 | | |
4027 | | let mut de = make_de("<root>"); |
4028 | | |
4029 | | // Cache all events |
4030 | | let checkpoint = de.skip_checkpoint(); |
4031 | | de.skip().unwrap(); |
4032 | | de.start_replay(checkpoint); |
4033 | | assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]); |
4034 | | } |
4035 | | } |
4036 | | |
4037 | | mod read_to_end { |
4038 | | use super::*; |
4039 | | use crate::de::DeEvent::*; |
4040 | | use pretty_assertions::assert_eq; |
4041 | | |
4042 | | #[test] |
4043 | | fn complex() { |
4044 | | let mut de = make_de( |
4045 | | r#" |
4046 | | <root> |
4047 | | <tag a="1"><tag>text</tag>content</tag> |
4048 | | <tag a="2"><![CDATA[cdata content]]></tag> |
4049 | | <self-closed/> |
4050 | | </root> |
4051 | | "#, |
4052 | | ); |
4053 | | |
4054 | | assert_eq!(de.next().unwrap(), Text("\n ".into())); |
4055 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); |
4056 | | |
4057 | | assert_eq!(de.next().unwrap(), Text("\n ".into())); |
4058 | | assert_eq!( |
4059 | | de.next().unwrap(), |
4060 | | Start(BytesStart::from_content(r#"tag a="1""#, 3)) |
4061 | | ); |
4062 | | assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ()); |
4063 | | |
4064 | | assert_eq!(de.next().unwrap(), Text("\n ".into())); |
4065 | | assert_eq!( |
4066 | | de.next().unwrap(), |
4067 | | Start(BytesStart::from_content(r#"tag a="2""#, 3)) |
4068 | | ); |
4069 | | assert_eq!(de.next().unwrap(), Text("cdata content".into())); |
4070 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag"))); |
4071 | | |
4072 | | assert_eq!(de.next().unwrap(), Text("\n ".into())); |
4073 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed"))); |
4074 | | assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ()); |
4075 | | |
4076 | | assert_eq!(de.next().unwrap(), Text("\n ".into())); |
4077 | | assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); |
4078 | | assert_eq!(de.next().unwrap(), Text("\n ".into())); |
4079 | | assert_eq!(de.next().unwrap(), Eof); |
4080 | | } |
4081 | | |
4082 | | #[test] |
4083 | | fn invalid_xml1() { |
4084 | | let mut de = make_de("<tag><tag></tag>"); |
4085 | | |
4086 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag"))); |
4087 | | assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag"))); |
4088 | | |
4089 | | match de.read_to_end(QName(b"tag")) { |
4090 | | Err(DeError::InvalidXml(Error::IllFormed(cause))) => { |
4091 | | assert_eq!(cause, IllFormedError::MissingEndTag("tag".into())) |
4092 | | } |
4093 | | x => panic!( |
4094 | | "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", |
4095 | | x |
4096 | | ), |
4097 | | } |
4098 | | assert_eq!(de.next().unwrap(), Eof); |
4099 | | } |
4100 | | |
4101 | | #[test] |
4102 | | fn invalid_xml2() { |
4103 | | let mut de = make_de("<tag><![CDATA[]]><tag></tag>"); |
4104 | | |
4105 | | assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag"))); |
4106 | | assert_eq!(de.peek().unwrap(), &Text("".into())); |
4107 | | |
4108 | | match de.read_to_end(QName(b"tag")) { |
4109 | | Err(DeError::InvalidXml(Error::IllFormed(cause))) => { |
4110 | | assert_eq!(cause, IllFormedError::MissingEndTag("tag".into())) |
4111 | | } |
4112 | | x => panic!( |
4113 | | "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", |
4114 | | x |
4115 | | ), |
4116 | | } |
4117 | | assert_eq!(de.next().unwrap(), Eof); |
4118 | | } |
4119 | | } |
4120 | | |
4121 | | #[test] |
4122 | | fn borrowing_reader_parity() { |
4123 | | let s = r#" |
4124 | | <item name="hello" source="world.rs">Some text</item> |
4125 | | <item2/> |
4126 | | <item3 value="world" /> |
4127 | | "#; |
4128 | | |
4129 | | let mut reader1 = IoReader { |
4130 | | reader: NsReader::from_reader(s.as_bytes()), |
4131 | | buf: Vec::new(), |
4132 | | }; |
4133 | | let mut reader2 = SliceReader { |
4134 | | reader: NsReader::from_str(s), |
4135 | | }; |
4136 | | |
4137 | | loop { |
4138 | | let event1 = reader1.next().unwrap(); |
4139 | | let event2 = reader2.next().unwrap(); |
4140 | | |
4141 | | if let (PayloadEvent::Eof, PayloadEvent::Eof) = (&event1, &event2) { |
4142 | | break; |
4143 | | } |
4144 | | |
4145 | | assert_eq!(event1, event2); |
4146 | | } |
4147 | | } |
4148 | | |
4149 | | #[test] |
4150 | | fn borrowing_reader_events() { |
4151 | | let s = r#" |
4152 | | <item name="hello" source="world.rs">Some text</item> |
4153 | | <item2></item2> |
4154 | | <item3/> |
4155 | | <item4 value="world" /> |
4156 | | "#; |
4157 | | |
4158 | | let mut reader = SliceReader { |
4159 | | reader: NsReader::from_str(s), |
4160 | | }; |
4161 | | |
4162 | | let config = reader.reader.config_mut(); |
4163 | | config.expand_empty_elements = true; |
4164 | | |
4165 | | let mut events = Vec::new(); |
4166 | | |
4167 | | loop { |
4168 | | let event = reader.next().unwrap(); |
4169 | | if let PayloadEvent::Eof = event { |
4170 | | break; |
4171 | | } |
4172 | | events.push(event); |
4173 | | } |
4174 | | |
4175 | | use crate::de::PayloadEvent::*; |
4176 | | |
4177 | | assert_eq!( |
4178 | | events, |
4179 | | vec![ |
4180 | | Text(BytesText::from_escaped("\n ")), |
4181 | | Start(BytesStart::from_content( |
4182 | | r#"item name="hello" source="world.rs""#, |
4183 | | 4 |
4184 | | )), |
4185 | | Text(BytesText::from_escaped("Some text")), |
4186 | | End(BytesEnd::new("item")), |
4187 | | Text(BytesText::from_escaped("\n ")), |
4188 | | Start(BytesStart::from_content("item2", 5)), |
4189 | | End(BytesEnd::new("item2")), |
4190 | | Text(BytesText::from_escaped("\n ")), |
4191 | | Start(BytesStart::from_content("item3", 5)), |
4192 | | End(BytesEnd::new("item3")), |
4193 | | Text(BytesText::from_escaped("\n ")), |
4194 | | Start(BytesStart::from_content(r#"item4 value="world" "#, 5)), |
4195 | | End(BytesEnd::new("item4")), |
4196 | | Text(BytesText::from_escaped("\n ")), |
4197 | | ] |
4198 | | ) |
4199 | | } |
4200 | | |
4201 | | /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event, |
4202 | | /// because parser reports error early |
4203 | | #[test] |
4204 | | fn read_string() { |
4205 | | match from_str::<String>(r#"</root>"#) { |
4206 | | Err(DeError::InvalidXml(Error::IllFormed(cause))) => { |
4207 | | assert_eq!(cause, IllFormedError::UnmatchedEndTag("root".into())); |
4208 | | } |
4209 | | x => panic!( |
4210 | | "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", |
4211 | | x |
4212 | | ), |
4213 | | } |
4214 | | |
4215 | | let s: String = from_str(r#"<root></root>"#).unwrap(); |
4216 | | assert_eq!(s, ""); |
4217 | | |
4218 | | match from_str::<String>(r#"<root></other>"#) { |
4219 | | Err(DeError::InvalidXml(Error::IllFormed(cause))) => assert_eq!( |
4220 | | cause, |
4221 | | IllFormedError::MismatchedEndTag { |
4222 | | expected: "root".into(), |
4223 | | found: "other".into(), |
4224 | | } |
4225 | | ), |
4226 | | x => panic!("Expected `Err(InvalidXml(IllFormed(_))`, but got `{:?}`", x), |
4227 | | } |
4228 | | } |
4229 | | |
4230 | | /// Tests for https://github.com/tafia/quick-xml/issues/474. |
4231 | | /// |
4232 | | /// That tests ensures that comments and processed instructions is ignored |
4233 | | /// and can split one logical string in pieces. |
4234 | | mod merge_text { |
4235 | | use super::*; |
4236 | | use pretty_assertions::assert_eq; |
4237 | | |
4238 | | #[test] |
4239 | | fn text() { |
4240 | | let mut de = make_de("text"); |
4241 | | assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); |
4242 | | } |
4243 | | |
4244 | | #[test] |
4245 | | fn cdata() { |
4246 | | let mut de = make_de("<![CDATA[cdata]]>"); |
4247 | | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata".into())); |
4248 | | } |
4249 | | |
4250 | | #[test] |
4251 | | fn text_and_cdata() { |
4252 | | let mut de = make_de("text and <![CDATA[cdata]]>"); |
4253 | | assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata".into())); |
4254 | | } |
4255 | | |
4256 | | #[test] |
4257 | | fn text_and_empty_cdata() { |
4258 | | let mut de = make_de("text and <![CDATA[]]>"); |
4259 | | assert_eq!(de.next().unwrap(), DeEvent::Text("text and ".into())); |
4260 | | } |
4261 | | |
4262 | | #[test] |
4263 | | fn cdata_and_text() { |
4264 | | let mut de = make_de("<![CDATA[cdata]]> and text"); |
4265 | | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text".into())); |
4266 | | } |
4267 | | |
4268 | | #[test] |
4269 | | fn empty_cdata_and_text() { |
4270 | | let mut de = make_de("<![CDATA[]]> and text"); |
4271 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" and text".into())); |
4272 | | } |
4273 | | |
4274 | | #[test] |
4275 | | fn cdata_and_cdata() { |
4276 | | let mut de = make_de( |
4277 | | "\ |
4278 | | <![CDATA[cdata]]]]>\ |
4279 | | <![CDATA[>cdata]]>\ |
4280 | | ", |
4281 | | ); |
4282 | | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); |
4283 | | } |
4284 | | |
4285 | | mod comment_between { |
4286 | | use super::*; |
4287 | | use pretty_assertions::assert_eq; |
4288 | | |
4289 | | #[test] |
4290 | | fn text() { |
4291 | | let mut de = make_de( |
4292 | | "\ |
4293 | | text \ |
4294 | | <!--comment 1--><!--comment 2--> \ |
4295 | | text\ |
4296 | | ", |
4297 | | ); |
4298 | | assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into())); |
4299 | | } |
4300 | | |
4301 | | #[test] |
4302 | | fn cdata() { |
4303 | | let mut de = make_de( |
4304 | | "\ |
4305 | | <![CDATA[cdata]]]]>\ |
4306 | | <!--comment 1--><!--comment 2-->\ |
4307 | | <![CDATA[>cdata]]>\ |
4308 | | ", |
4309 | | ); |
4310 | | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); |
4311 | | } |
4312 | | |
4313 | | #[test] |
4314 | | fn text_and_cdata() { |
4315 | | let mut de = make_de( |
4316 | | "\ |
4317 | | text \ |
4318 | | <!--comment 1--><!--comment 2-->\ |
4319 | | <![CDATA[ cdata]]>\ |
4320 | | ", |
4321 | | ); |
4322 | | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into())); |
4323 | | } |
4324 | | |
4325 | | #[test] |
4326 | | fn text_and_empty_cdata() { |
4327 | | let mut de = make_de( |
4328 | | "\ |
4329 | | text \ |
4330 | | <!--comment 1--><!--comment 2-->\ |
4331 | | <![CDATA[]]>\ |
4332 | | ", |
4333 | | ); |
4334 | | assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into())); |
4335 | | } |
4336 | | |
4337 | | #[test] |
4338 | | fn cdata_and_text() { |
4339 | | let mut de = make_de( |
4340 | | "\ |
4341 | | <![CDATA[cdata ]]>\ |
4342 | | <!--comment 1--><!--comment 2--> \ |
4343 | | text \ |
4344 | | ", |
4345 | | ); |
4346 | | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text ".into())); |
4347 | | } |
4348 | | |
4349 | | #[test] |
4350 | | fn empty_cdata_and_text() { |
4351 | | let mut de = make_de( |
4352 | | "\ |
4353 | | <![CDATA[]]>\ |
4354 | | <!--comment 1--><!--comment 2--> \ |
4355 | | text \ |
4356 | | ", |
4357 | | ); |
4358 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4359 | | } |
4360 | | |
4361 | | #[test] |
4362 | | fn cdata_and_cdata() { |
4363 | | let mut de = make_de( |
4364 | | "\ |
4365 | | <![CDATA[cdata]]]>\ |
4366 | | <!--comment 1--><!--comment 2-->\ |
4367 | | <![CDATA[]>cdata]]>\ |
4368 | | ", |
4369 | | ); |
4370 | | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); |
4371 | | } |
4372 | | } |
4373 | | |
4374 | | mod pi_between { |
4375 | | use super::*; |
4376 | | use pretty_assertions::assert_eq; |
4377 | | |
4378 | | #[test] |
4379 | | fn text() { |
4380 | | let mut de = make_de( |
4381 | | "\ |
4382 | | text \ |
4383 | | <?pi 1?><?pi 2?> \ |
4384 | | text\ |
4385 | | ", |
4386 | | ); |
4387 | | assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into())); |
4388 | | } |
4389 | | |
4390 | | #[test] |
4391 | | fn cdata() { |
4392 | | let mut de = make_de( |
4393 | | "\ |
4394 | | <![CDATA[cdata]]]]>\ |
4395 | | <?pi 1?><?pi 2?>\ |
4396 | | <![CDATA[>cdata]]>\ |
4397 | | ", |
4398 | | ); |
4399 | | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); |
4400 | | } |
4401 | | |
4402 | | #[test] |
4403 | | fn text_and_cdata() { |
4404 | | let mut de = make_de( |
4405 | | "\ |
4406 | | text \ |
4407 | | <?pi 1?><?pi 2?>\ |
4408 | | <![CDATA[ cdata]]>\ |
4409 | | ", |
4410 | | ); |
4411 | | assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into())); |
4412 | | } |
4413 | | |
4414 | | #[test] |
4415 | | fn text_and_empty_cdata() { |
4416 | | let mut de = make_de( |
4417 | | "\ |
4418 | | text \ |
4419 | | <?pi 1?><?pi 2?>\ |
4420 | | <![CDATA[]]>\ |
4421 | | ", |
4422 | | ); |
4423 | | assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into())); |
4424 | | } |
4425 | | |
4426 | | #[test] |
4427 | | fn cdata_and_text() { |
4428 | | let mut de = make_de( |
4429 | | "\ |
4430 | | <![CDATA[cdata ]]>\ |
4431 | | <?pi 1?><?pi 2?> \ |
4432 | | text \ |
4433 | | ", |
4434 | | ); |
4435 | | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text ".into())); |
4436 | | } |
4437 | | |
4438 | | #[test] |
4439 | | fn empty_cdata_and_text() { |
4440 | | let mut de = make_de( |
4441 | | "\ |
4442 | | <![CDATA[]]>\ |
4443 | | <?pi 1?><?pi 2?> \ |
4444 | | text \ |
4445 | | ", |
4446 | | ); |
4447 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4448 | | } |
4449 | | |
4450 | | #[test] |
4451 | | fn cdata_and_cdata() { |
4452 | | let mut de = make_de( |
4453 | | "\ |
4454 | | <![CDATA[cdata]]]>\ |
4455 | | <?pi 1?><?pi 2?>\ |
4456 | | <![CDATA[]>cdata]]>\ |
4457 | | ", |
4458 | | ); |
4459 | | assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); |
4460 | | } |
4461 | | } |
4462 | | } |
4463 | | |
4464 | | /// Tests for https://github.com/tafia/quick-xml/issues/474. |
4465 | | /// |
4466 | | /// This tests ensures that any combination of payload data is processed |
4467 | | /// as expected. |
4468 | | mod triples { |
4469 | | use super::*; |
4470 | | use pretty_assertions::assert_eq; |
4471 | | |
4472 | | mod start { |
4473 | | use super::*; |
4474 | | |
4475 | | /// <tag1><tag2>... |
4476 | | // The same name is intentional |
4477 | | #[allow(clippy::module_inception)] |
4478 | | mod start { |
4479 | | use super::*; |
4480 | | use pretty_assertions::assert_eq; |
4481 | | |
4482 | | #[test] |
4483 | | fn start() { |
4484 | | let mut de = make_de("<tag1><tag2><tag3>"); |
4485 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); |
4486 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); |
4487 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3"))); |
4488 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4489 | | } |
4490 | | |
4491 | | /// Not matching end tag will result to error |
4492 | | #[test] |
4493 | | fn end() { |
4494 | | let mut de = make_de("<tag1><tag2></tag2>"); |
4495 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); |
4496 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); |
4497 | | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2"))); |
4498 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4499 | | } |
4500 | | |
4501 | | #[test] |
4502 | | fn text() { |
4503 | | let mut de = make_de("<tag1><tag2> text "); |
4504 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); |
4505 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); |
4506 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4507 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4508 | | } |
4509 | | |
4510 | | #[test] |
4511 | | fn cdata() { |
4512 | | let mut de = make_de("<tag1><tag2><![CDATA[ cdata ]]>"); |
4513 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); |
4514 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); |
4515 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); |
4516 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4517 | | } |
4518 | | |
4519 | | #[test] |
4520 | | fn eof() { |
4521 | | let mut de = make_de("<tag1><tag2>"); |
4522 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); |
4523 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); |
4524 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4525 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4526 | | } |
4527 | | } |
4528 | | |
4529 | | /// <tag></tag>... |
4530 | | mod end { |
4531 | | use super::*; |
4532 | | use pretty_assertions::assert_eq; |
4533 | | |
4534 | | #[test] |
4535 | | fn start() { |
4536 | | let mut de = make_de("<tag></tag><tag2>"); |
4537 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4538 | | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); |
4539 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); |
4540 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4541 | | } |
4542 | | |
4543 | | #[test] |
4544 | | fn end() { |
4545 | | let mut de = make_de("<tag></tag></tag2>"); |
4546 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4547 | | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); |
4548 | | match de.next() { |
4549 | | Err(DeError::InvalidXml(Error::IllFormed(cause))) => { |
4550 | | assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag2".into())); |
4551 | | } |
4552 | | x => panic!( |
4553 | | "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", |
4554 | | x |
4555 | | ), |
4556 | | } |
4557 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4558 | | } |
4559 | | |
4560 | | #[test] |
4561 | | fn text() { |
4562 | | let mut de = make_de("<tag></tag> text "); |
4563 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4564 | | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); |
4565 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4566 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4567 | | } |
4568 | | |
4569 | | #[test] |
4570 | | fn cdata() { |
4571 | | let mut de = make_de("<tag></tag><![CDATA[ cdata ]]>"); |
4572 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4573 | | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); |
4574 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); |
4575 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4576 | | } |
4577 | | |
4578 | | #[test] |
4579 | | fn eof() { |
4580 | | let mut de = make_de("<tag></tag>"); |
4581 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4582 | | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); |
4583 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4584 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4585 | | } |
4586 | | } |
4587 | | |
4588 | | /// <tag> text ... |
4589 | | mod text { |
4590 | | use super::*; |
4591 | | use pretty_assertions::assert_eq; |
4592 | | |
4593 | | #[test] |
4594 | | fn start() { |
4595 | | let mut de = make_de("<tag> text <tag2>"); |
4596 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4597 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4598 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); |
4599 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4600 | | } |
4601 | | |
4602 | | #[test] |
4603 | | fn end() { |
4604 | | let mut de = make_de("<tag> text </tag>"); |
4605 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4606 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4607 | | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); |
4608 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4609 | | } |
4610 | | |
4611 | | // start::text::text has no difference from start::text |
4612 | | |
4613 | | #[test] |
4614 | | fn cdata() { |
4615 | | let mut de = make_de("<tag> text <![CDATA[ cdata ]]>"); |
4616 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4617 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text cdata ".into())); |
4618 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4619 | | } |
4620 | | |
4621 | | #[test] |
4622 | | fn eof() { |
4623 | | let mut de = make_de("<tag> text "); |
4624 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4625 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4626 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4627 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4628 | | } |
4629 | | } |
4630 | | |
4631 | | /// <tag><![CDATA[ cdata ]]>... |
4632 | | mod cdata { |
4633 | | use super::*; |
4634 | | use pretty_assertions::assert_eq; |
4635 | | |
4636 | | #[test] |
4637 | | fn start() { |
4638 | | let mut de = make_de("<tag><![CDATA[ cdata ]]><tag2>"); |
4639 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4640 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); |
4641 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); |
4642 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4643 | | } |
4644 | | |
4645 | | #[test] |
4646 | | fn end() { |
4647 | | let mut de = make_de("<tag><![CDATA[ cdata ]]></tag>"); |
4648 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4649 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); |
4650 | | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); |
4651 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4652 | | } |
4653 | | |
4654 | | #[test] |
4655 | | fn text() { |
4656 | | let mut de = make_de("<tag><![CDATA[ cdata ]]> text "); |
4657 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4658 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text ".into())); |
4659 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4660 | | } |
4661 | | |
4662 | | #[test] |
4663 | | fn cdata() { |
4664 | | let mut de = make_de("<tag><![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>"); |
4665 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4666 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); |
4667 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4668 | | } |
4669 | | |
4670 | | #[test] |
4671 | | fn eof() { |
4672 | | let mut de = make_de("<tag><![CDATA[ cdata ]]>"); |
4673 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4674 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); |
4675 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4676 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4677 | | } |
4678 | | } |
4679 | | } |
4680 | | |
4681 | | /// Start from End event will always generate an error |
4682 | | #[test] |
4683 | | fn end() { |
4684 | | let mut de = make_de("</tag>"); |
4685 | | match de.next() { |
4686 | | Err(DeError::InvalidXml(Error::IllFormed(cause))) => { |
4687 | | assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); |
4688 | | } |
4689 | | x => panic!( |
4690 | | "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", |
4691 | | x |
4692 | | ), |
4693 | | } |
4694 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4695 | | } |
4696 | | |
4697 | | mod text { |
4698 | | use super::*; |
4699 | | use pretty_assertions::assert_eq; |
4700 | | |
4701 | | mod start { |
4702 | | use super::*; |
4703 | | use pretty_assertions::assert_eq; |
4704 | | |
4705 | | #[test] |
4706 | | fn start() { |
4707 | | let mut de = make_de(" text <tag1><tag2>"); |
4708 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4709 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); |
4710 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); |
4711 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4712 | | } |
4713 | | |
4714 | | /// Not matching end tag will result in error |
4715 | | #[test] |
4716 | | fn end() { |
4717 | | let mut de = make_de(" text <tag></tag>"); |
4718 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4719 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4720 | | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); |
4721 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4722 | | } |
4723 | | |
4724 | | #[test] |
4725 | | fn text() { |
4726 | | let mut de = make_de(" text <tag> text2 "); |
4727 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4728 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4729 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text2 ".into())); |
4730 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4731 | | } |
4732 | | |
4733 | | #[test] |
4734 | | fn cdata() { |
4735 | | let mut de = make_de(" text <tag><![CDATA[ cdata ]]>"); |
4736 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4737 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4738 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); |
4739 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4740 | | } |
4741 | | |
4742 | | #[test] |
4743 | | fn eof() { |
4744 | | let mut de = make_de(" text <tag>"); |
4745 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4746 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4747 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4748 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4749 | | } |
4750 | | } |
4751 | | |
4752 | | /// End event without corresponding start event will always generate an error |
4753 | | #[test] |
4754 | | fn end() { |
4755 | | let mut de = make_de(" text </tag>"); |
4756 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4757 | | match de.next() { |
4758 | | Err(DeError::InvalidXml(Error::IllFormed(cause))) => { |
4759 | | assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); |
4760 | | } |
4761 | | x => panic!( |
4762 | | "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", |
4763 | | x |
4764 | | ), |
4765 | | } |
4766 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4767 | | } |
4768 | | |
4769 | | // text::text::something is equivalent to text::something |
4770 | | |
4771 | | mod cdata { |
4772 | | use super::*; |
4773 | | use pretty_assertions::assert_eq; |
4774 | | |
4775 | | #[test] |
4776 | | fn start() { |
4777 | | let mut de = make_de(" text <![CDATA[ cdata ]]><tag>"); |
4778 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text cdata ".into())); |
4779 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4780 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4781 | | } |
4782 | | |
4783 | | #[test] |
4784 | | fn end() { |
4785 | | let mut de = make_de(" text <![CDATA[ cdata ]]></tag>"); |
4786 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text cdata ".into())); |
4787 | | match de.next() { |
4788 | | Err(DeError::InvalidXml(Error::IllFormed(cause))) => { |
4789 | | assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); |
4790 | | } |
4791 | | x => panic!( |
4792 | | "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", |
4793 | | x |
4794 | | ), |
4795 | | } |
4796 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4797 | | } |
4798 | | |
4799 | | #[test] |
4800 | | fn text() { |
4801 | | let mut de = make_de(" text <![CDATA[ cdata ]]> text2 "); |
4802 | | assert_eq!( |
4803 | | de.next().unwrap(), |
4804 | | DeEvent::Text(" text cdata text2 ".into()) |
4805 | | ); |
4806 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4807 | | } |
4808 | | |
4809 | | #[test] |
4810 | | fn cdata() { |
4811 | | let mut de = make_de(" text <![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>"); |
4812 | | assert_eq!( |
4813 | | de.next().unwrap(), |
4814 | | DeEvent::Text(" text cdata cdata2 ".into()) |
4815 | | ); |
4816 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4817 | | } |
4818 | | |
4819 | | #[test] |
4820 | | fn eof() { |
4821 | | let mut de = make_de(" text <![CDATA[ cdata ]]>"); |
4822 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text cdata ".into())); |
4823 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4824 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4825 | | } |
4826 | | } |
4827 | | } |
4828 | | |
4829 | | mod cdata { |
4830 | | use super::*; |
4831 | | use pretty_assertions::assert_eq; |
4832 | | |
4833 | | mod start { |
4834 | | use super::*; |
4835 | | use pretty_assertions::assert_eq; |
4836 | | |
4837 | | #[test] |
4838 | | fn start() { |
4839 | | let mut de = make_de("<![CDATA[ cdata ]]><tag1><tag2>"); |
4840 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); |
4841 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); |
4842 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); |
4843 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4844 | | } |
4845 | | |
4846 | | /// Not matching end tag will result in error |
4847 | | #[test] |
4848 | | fn end() { |
4849 | | let mut de = make_de("<![CDATA[ cdata ]]><tag></tag>"); |
4850 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); |
4851 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4852 | | assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); |
4853 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4854 | | } |
4855 | | |
4856 | | #[test] |
4857 | | fn text() { |
4858 | | let mut de = make_de("<![CDATA[ cdata ]]><tag> text "); |
4859 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); |
4860 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4861 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); |
4862 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4863 | | } |
4864 | | |
4865 | | #[test] |
4866 | | fn cdata() { |
4867 | | let mut de = make_de("<![CDATA[ cdata ]]><tag><![CDATA[ cdata2 ]]>"); |
4868 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); |
4869 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4870 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 ".into())); |
4871 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4872 | | } |
4873 | | |
4874 | | #[test] |
4875 | | fn eof() { |
4876 | | let mut de = make_de("<![CDATA[ cdata ]]><tag>"); |
4877 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); |
4878 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4879 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4880 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4881 | | } |
4882 | | } |
4883 | | |
4884 | | /// End event without corresponding start event will always generate an error |
4885 | | #[test] |
4886 | | fn end() { |
4887 | | let mut de = make_de("<![CDATA[ cdata ]]></tag>"); |
4888 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); |
4889 | | match de.next() { |
4890 | | Err(DeError::InvalidXml(Error::IllFormed(cause))) => { |
4891 | | assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); |
4892 | | } |
4893 | | x => panic!( |
4894 | | "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", |
4895 | | x |
4896 | | ), |
4897 | | } |
4898 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4899 | | } |
4900 | | |
4901 | | mod text { |
4902 | | use super::*; |
4903 | | use pretty_assertions::assert_eq; |
4904 | | |
4905 | | #[test] |
4906 | | fn start() { |
4907 | | let mut de = make_de("<![CDATA[ cdata ]]> text <tag>"); |
4908 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text ".into())); |
4909 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4910 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4911 | | } |
4912 | | |
4913 | | #[test] |
4914 | | fn end() { |
4915 | | let mut de = make_de("<![CDATA[ cdata ]]> text </tag>"); |
4916 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text ".into())); |
4917 | | match de.next() { |
4918 | | Err(DeError::InvalidXml(Error::IllFormed(cause))) => { |
4919 | | assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); |
4920 | | } |
4921 | | x => panic!( |
4922 | | "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", |
4923 | | x |
4924 | | ), |
4925 | | } |
4926 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4927 | | } |
4928 | | |
4929 | | // cdata::text::text is equivalent to cdata::text |
4930 | | |
4931 | | #[test] |
4932 | | fn cdata() { |
4933 | | let mut de = make_de("<![CDATA[ cdata ]]> text <![CDATA[ cdata2 ]]>"); |
4934 | | assert_eq!( |
4935 | | de.next().unwrap(), |
4936 | | DeEvent::Text(" cdata text cdata2 ".into()) |
4937 | | ); |
4938 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4939 | | } |
4940 | | |
4941 | | #[test] |
4942 | | fn eof() { |
4943 | | let mut de = make_de("<![CDATA[ cdata ]]> text "); |
4944 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text ".into())); |
4945 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4946 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4947 | | } |
4948 | | } |
4949 | | |
4950 | | // The same name is intentional |
4951 | | #[allow(clippy::module_inception)] |
4952 | | mod cdata { |
4953 | | use super::*; |
4954 | | use pretty_assertions::assert_eq; |
4955 | | |
4956 | | #[test] |
4957 | | fn start() { |
4958 | | let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><tag>"); |
4959 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); |
4960 | | assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); |
4961 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4962 | | } |
4963 | | |
4964 | | #[test] |
4965 | | fn end() { |
4966 | | let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]></tag>"); |
4967 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); |
4968 | | match de.next() { |
4969 | | Err(DeError::InvalidXml(Error::IllFormed(cause))) => { |
4970 | | assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); |
4971 | | } |
4972 | | x => panic!( |
4973 | | "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", |
4974 | | x |
4975 | | ), |
4976 | | } |
4977 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4978 | | } |
4979 | | |
4980 | | #[test] |
4981 | | fn text() { |
4982 | | let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]> text "); |
4983 | | assert_eq!( |
4984 | | de.next().unwrap(), |
4985 | | DeEvent::Text(" cdata cdata2 text ".into()) |
4986 | | ); |
4987 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4988 | | } |
4989 | | |
4990 | | #[test] |
4991 | | fn cdata() { |
4992 | | let mut de = |
4993 | | make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><![CDATA[ cdata3 ]]>"); |
4994 | | assert_eq!( |
4995 | | de.next().unwrap(), |
4996 | | DeEvent::Text(" cdata cdata2 cdata3 ".into()) |
4997 | | ); |
4998 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
4999 | | } |
5000 | | |
5001 | | #[test] |
5002 | | fn eof() { |
5003 | | let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>"); |
5004 | | assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); |
5005 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
5006 | | assert_eq!(de.next().unwrap(), DeEvent::Eof); |
5007 | | } |
5008 | | } |
5009 | | } |
5010 | | } |
5011 | | } |