Coverage Report

Created: 2025-06-16 06:50

/rust/registry/src/index.crates.io-6f17d22bba15001f/quick-xml-0.29.0/src/events/mod.rs
Line
Count
Source (jump to first uncovered line)
1
//! Defines zero-copy XML events used throughout this library.
2
//!
3
//! A XML event often represents part of a XML element.
4
//! They occur both during reading and writing and are
5
//! usually used with the stream-oriented API.
6
//!
7
//! For example, the XML element
8
//! ```xml
9
//! <name attr="value">Inner text</name>
10
//! ```
11
//! consists of the three events `Start`, `Text` and `End`.
12
//! They can also represent other parts in an XML document like the
13
//! XML declaration. Each Event usually contains further information,
14
//! like the tag name, the attribute or the inner text.
15
//!
16
//! See [`Event`] for a list of all possible events.
17
//!
18
//! # Reading
19
//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20
//! and [`Reader::read_event_into`]. You must listen
21
//! for the different types of events you are interested in.
22
//!
23
//! See [`Reader`] for further information.
24
//!
25
//! # Writing
26
//! When writing the XML document, you must create the XML element
27
//! by constructing the events it consists of and pass them to the writer
28
//! sequentially.
29
//!
30
//! See [`Writer`] for further information.
31
//!
32
//! [`Reader::read_event`]: crate::reader::Reader::read_event
33
//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34
//! [`Reader`]: crate::reader::Reader
35
//! [`Writer`]: crate::writer::Writer
36
//! [`Event`]: crate::events::Event
37
38
pub mod attributes;
39
40
#[cfg(feature = "encoding")]
41
use encoding_rs::Encoding;
42
use std::borrow::Cow;
43
use std::fmt::{self, Debug, Formatter};
44
use std::ops::Deref;
45
use std::str::from_utf8;
46
47
use crate::encoding::Decoder;
48
use crate::errors::{Error, Result};
49
use crate::escape::{escape, partial_escape, unescape_with};
50
use crate::name::{LocalName, QName};
51
use crate::reader::is_whitespace;
52
use crate::utils::write_cow_string;
53
#[cfg(feature = "serialize")]
54
use crate::utils::CowRef;
55
use attributes::{Attribute, Attributes};
56
use std::mem::replace;
57
58
/// Opening tag data (`Event::Start`), with optional attributes.
59
///
60
/// `<name attr="value">`.
61
///
62
/// The name can be accessed using the [`name`] or [`local_name`] methods.
63
/// An iterator over the attributes is returned by the [`attributes`] method.
64
///
65
/// [`name`]: Self::name
66
/// [`local_name`]: Self::local_name
67
/// [`attributes`]: Self::attributes
68
#[derive(Clone, Eq, PartialEq)]
69
pub struct BytesStart<'a> {
70
    /// content of the element, before any utf8 conversion
71
    pub(crate) buf: Cow<'a, [u8]>,
72
    /// end of the element name, the name starts at that the start of `buf`
73
    pub(crate) name_len: usize,
74
}
75
76
impl<'a> BytesStart<'a> {
77
    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
78
    #[inline]
79
38.6M
    pub(crate) fn wrap(content: &'a [u8], name_len: usize) -> Self {
80
38.6M
        BytesStart {
81
38.6M
            buf: Cow::Borrowed(content),
82
38.6M
            name_len,
83
38.6M
        }
84
38.6M
    }
85
86
    /// Creates a new `BytesStart` from the given name.
87
    ///
88
    /// # Warning
89
    ///
90
    /// `name` must be a valid name.
91
    #[inline]
92
0
    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
93
0
        let buf = str_cow_to_bytes(name);
94
0
        BytesStart {
95
0
            name_len: buf.len(),
96
0
            buf,
97
0
        }
98
0
    }
99
100
    /// Creates a new `BytesStart` from the given content (name + attributes).
101
    ///
102
    /// # Warning
103
    ///
104
    /// `&content[..name_len]` must be a valid name, and the remainder of `content`
105
    /// must be correctly-formed attributes. Neither are checked, it is possible
106
    /// to generate invalid XML if `content` or `name_len` are incorrect.
107
    #[inline]
108
0
    pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
109
0
        BytesStart {
110
0
            buf: str_cow_to_bytes(content),
111
0
            name_len,
112
0
        }
113
0
    }
114
115
    /// Converts the event into an owned event.
116
0
    pub fn into_owned(self) -> BytesStart<'static> {
117
0
        BytesStart {
118
0
            buf: Cow::Owned(self.buf.into_owned()),
119
0
            name_len: self.name_len,
120
0
        }
121
0
    }
122
123
    /// Converts the event into an owned event without taking ownership of Event
124
0
    pub fn to_owned(&self) -> BytesStart<'static> {
125
0
        BytesStart {
126
0
            buf: Cow::Owned(self.buf.clone().into_owned()),
127
0
            name_len: self.name_len,
128
0
        }
129
0
    }
130
131
    /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
132
    ///
133
    /// # Example
134
    ///
135
    /// ```
136
    /// use quick_xml::events::{BytesStart, Event};
137
    /// # use quick_xml::writer::Writer;
138
    /// # use quick_xml::Error;
139
    ///
140
    /// struct SomeStruct<'a> {
141
    ///     attrs: BytesStart<'a>,
142
    ///     // ...
143
    /// }
144
    /// # impl<'a> SomeStruct<'a> {
145
    /// # fn example(&self) -> Result<(), Error> {
146
    /// # let mut writer = Writer::new(Vec::new());
147
    ///
148
    /// writer.write_event(Event::Start(self.attrs.borrow()))?;
149
    /// // ...
150
    /// writer.write_event(Event::End(self.attrs.to_end()))?;
151
    /// # Ok(())
152
    /// # }}
153
    /// ```
154
    ///
155
    /// [`to_end`]: Self::to_end
156
0
    pub fn borrow(&self) -> BytesStart {
157
0
        BytesStart {
158
0
            buf: Cow::Borrowed(&self.buf),
159
0
            name_len: self.name_len,
160
0
        }
161
0
    }
162
163
    /// Creates new paired close tag
164
28.8M
    pub fn to_end(&self) -> BytesEnd {
165
28.8M
        BytesEnd::wrap(self.name().into_inner().into())
166
28.8M
    }
167
168
    /// Gets the undecoded raw tag name, as present in the input stream.
169
    #[inline]
170
67.4M
    pub fn name(&self) -> QName {
171
67.4M
        QName(&self.buf[..self.name_len])
172
67.4M
    }
<quick_xml::events::BytesStart>::name
Line
Count
Source
170
38.5M
    pub fn name(&self) -> QName {
171
38.5M
        QName(&self.buf[..self.name_len])
172
38.5M
    }
<quick_xml::events::BytesStart>::name
Line
Count
Source
170
28.8M
    pub fn name(&self) -> QName {
171
28.8M
        QName(&self.buf[..self.name_len])
172
28.8M
    }
173
174
    /// Gets the undecoded raw local tag name (excluding namespace) as present
175
    /// in the input stream.
176
    ///
177
    /// All content up to and including the first `:` character is removed from the tag name.
178
    #[inline]
179
0
    pub fn local_name(&self) -> LocalName {
180
0
        self.name().into()
181
0
    }
182
183
    /// Edit the name of the BytesStart in-place
184
    ///
185
    /// # Warning
186
    ///
187
    /// `name` must be a valid name.
188
0
    pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
189
0
        let bytes = self.buf.to_mut();
190
0
        bytes.splice(..self.name_len, name.iter().cloned());
191
0
        self.name_len = name.len();
192
0
        self
193
0
    }
194
195
    /// Gets the undecoded raw tag name, as present in the input stream, which
196
    /// is borrowed either to the input, or to the event.
197
    ///
198
    /// # Lifetimes
199
    ///
200
    /// - `'a`: Lifetime of the input data from which this event is borrow
201
    /// - `'e`: Lifetime of the concrete event instance
202
    // TODO: We should made this is a part of public API, but with safe wrapped for a name
203
    #[cfg(feature = "serialize")]
204
    pub(crate) fn raw_name<'e>(&'e self) -> CowRef<'a, 'e, [u8]> {
205
        match self.buf {
206
            Cow::Borrowed(b) => CowRef::Input(&b[..self.name_len]),
207
            Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
208
        }
209
    }
210
}
211
212
/// Attribute-related methods
213
impl<'a> BytesStart<'a> {
214
    /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
215
    ///
216
    /// The yielded items must be convertible to [`Attribute`] using `Into`.
217
0
    pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
218
0
    where
219
0
        I: IntoIterator,
220
0
        I::Item: Into<Attribute<'b>>,
221
0
    {
222
0
        self.extend_attributes(attributes);
223
0
        self
224
0
    }
225
226
    /// Add additional attributes to this tag using an iterator.
227
    ///
228
    /// The yielded items must be convertible to [`Attribute`] using `Into`.
229
0
    pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
230
0
    where
231
0
        I: IntoIterator,
232
0
        I::Item: Into<Attribute<'b>>,
233
0
    {
234
0
        for attr in attributes {
235
0
            self.push_attribute(attr);
236
0
        }
237
0
        self
238
0
    }
239
240
    /// Adds an attribute to this element.
241
0
    pub fn push_attribute<'b, A>(&mut self, attr: A)
242
0
    where
243
0
        A: Into<Attribute<'b>>,
244
0
    {
245
0
        let a = attr.into();
246
0
        let bytes = self.buf.to_mut();
247
0
        bytes.push(b' ');
248
0
        bytes.extend_from_slice(a.key.as_ref());
249
0
        bytes.extend_from_slice(b"=\"");
250
0
        bytes.extend_from_slice(a.value.as_ref());
251
0
        bytes.push(b'"');
252
0
    }
253
254
    /// Remove all attributes from the ByteStart
255
0
    pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
256
0
        self.buf.to_mut().truncate(self.name_len);
257
0
        self
258
0
    }
259
260
    /// Returns an iterator over the attributes of this tag.
261
2.32M
    pub fn attributes(&self) -> Attributes {
262
2.32M
        Attributes::wrap(&self.buf, self.name_len, false)
263
2.32M
    }
264
265
    /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
266
0
    pub fn html_attributes(&self) -> Attributes {
267
0
        Attributes::wrap(&self.buf, self.name_len, true)
268
0
    }
269
270
    /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
271
    /// including the whitespace after the tag name if there is any.
272
    #[inline]
273
0
    pub fn attributes_raw(&self) -> &[u8] {
274
0
        &self.buf[self.name_len..]
275
0
    }
276
277
    /// Try to get an attribute
278
2.32M
    pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
279
2.32M
        &'a self,
280
2.32M
        attr_name: N,
281
2.32M
    ) -> Result<Option<Attribute<'a>>> {
282
3.46M
        for a in self.attributes().with_checks(false) {
283
3.46M
            let a = a?;
284
3.46M
            if a.key.as_ref() == attr_name.as_ref() {
285
1.51M
                return Ok(Some(a));
286
1.94M
            }
287
        }
288
804k
        Ok(None)
289
2.32M
    }
290
}
291
292
impl<'a> Debug for BytesStart<'a> {
293
0
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
294
0
        write!(f, "BytesStart {{ buf: ")?;
295
0
        write_cow_string(f, &self.buf)?;
296
0
        write!(f, ", name_len: {} }}", self.name_len)
297
0
    }
298
}
299
300
impl<'a> Deref for BytesStart<'a> {
301
    type Target = [u8];
302
303
0
    fn deref(&self) -> &[u8] {
304
0
        &self.buf
305
0
    }
306
}
307
308
#[cfg(feature = "arbitrary")]
309
impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
310
    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
311
        let s = <&str>::arbitrary(u)?;
312
        if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
313
            return Err(arbitrary::Error::IncorrectFormat);
314
        }
315
        let mut result = Self::new(s);
316
        result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
317
        Ok(result)
318
    }
319
320
    fn size_hint(depth: usize) -> (usize, Option<usize>) {
321
        return <&str as arbitrary::Arbitrary>::size_hint(depth);
322
    }
323
}
324
////////////////////////////////////////////////////////////////////////////////////////////////////
325
326
/// An XML declaration (`Event::Decl`).
327
///
328
/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
329
#[derive(Clone, Debug, Eq, PartialEq)]
330
pub struct BytesDecl<'a> {
331
    content: BytesStart<'a>,
332
}
333
334
impl<'a> BytesDecl<'a> {
335
    /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
336
    /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
337
    /// attribute.
338
    ///
339
    /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
340
    /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
341
    /// the double quote character is not allowed in any of the attribute values.
342
0
    pub fn new(
343
0
        version: &str,
344
0
        encoding: Option<&str>,
345
0
        standalone: Option<&str>,
346
0
    ) -> BytesDecl<'static> {
347
        // Compute length of the buffer based on supplied attributes
348
        // ' encoding=""'   => 12
349
0
        let encoding_attr_len = if let Some(xs) = encoding {
350
0
            12 + xs.len()
351
        } else {
352
0
            0
353
        };
354
        // ' standalone=""' => 14
355
0
        let standalone_attr_len = if let Some(xs) = standalone {
356
0
            14 + xs.len()
357
        } else {
358
0
            0
359
        };
360
        // 'xml version=""' => 14
361
0
        let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
362
0
363
0
        buf.push_str("xml version=\"");
364
0
        buf.push_str(version);
365
366
0
        if let Some(encoding_val) = encoding {
367
0
            buf.push_str("\" encoding=\"");
368
0
            buf.push_str(encoding_val);
369
0
        }
370
371
0
        if let Some(standalone_val) = standalone {
372
0
            buf.push_str("\" standalone=\"");
373
0
            buf.push_str(standalone_val);
374
0
        }
375
0
        buf.push('"');
376
0
377
0
        BytesDecl {
378
0
            content: BytesStart::from_content(buf, 3),
379
0
        }
380
0
    }
381
382
    /// Creates a `BytesDecl` from a `BytesStart`
383
13.1k
    pub fn from_start(start: BytesStart<'a>) -> Self {
384
13.1k
        Self { content: start }
385
13.1k
    }
386
387
    /// Gets xml version, excluding quotes (`'` or `"`).
388
    ///
389
    /// According to the [grammar], the version *must* be the first thing in the declaration.
390
    /// This method tries to extract the first thing in the declaration and return it.
391
    /// In case of multiple attributes value of the first one is returned.
392
    ///
393
    /// If version is missed in the declaration, or the first thing is not a version,
394
    /// [`Error::XmlDeclWithoutVersion`] will be returned.
395
    ///
396
    /// # Examples
397
    ///
398
    /// ```
399
    /// use quick_xml::Error;
400
    /// use quick_xml::events::{BytesDecl, BytesStart};
401
    ///
402
    /// // <?xml version='1.1'?>
403
    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
404
    /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
405
    ///
406
    /// // <?xml version='1.0' version='1.1'?>
407
    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
408
    /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
409
    ///
410
    /// // <?xml encoding='utf-8'?>
411
    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
412
    /// match decl.version() {
413
    ///     Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding"),
414
    ///     _ => assert!(false),
415
    /// }
416
    ///
417
    /// // <?xml encoding='utf-8' version='1.1'?>
418
    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
419
    /// match decl.version() {
420
    ///     Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding"),
421
    ///     _ => assert!(false),
422
    /// }
423
    ///
424
    /// // <?xml?>
425
    /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
426
    /// match decl.version() {
427
    ///     Err(Error::XmlDeclWithoutVersion(None)) => {},
428
    ///     _ => assert!(false),
429
    /// }
430
    /// ```
431
    ///
432
    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
433
0
    pub fn version(&self) -> Result<Cow<[u8]>> {
434
0
        // The version *must* be the first thing in the declaration.
435
0
        match self.content.attributes().with_checks(false).next() {
436
0
            Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
437
            // first attribute was not "version"
438
0
            Some(Ok(a)) => {
439
0
                let found = from_utf8(a.key.as_ref())?.to_string();
440
0
                Err(Error::XmlDeclWithoutVersion(Some(found)))
441
            }
442
            // error parsing attributes
443
0
            Some(Err(e)) => Err(e.into()),
444
            // no attributes
445
0
            None => Err(Error::XmlDeclWithoutVersion(None)),
446
        }
447
0
    }
448
449
    /// Gets xml encoding, excluding quotes (`'` or `"`).
450
    ///
451
    /// Although according to the [grammar] encoding must appear before `"standalone"`
452
    /// and after `"version"`, this method does not check that. The first occurrence
453
    /// of the attribute will be returned even if there are several. Also, method does
454
    /// not restrict symbols that can forming the encoding, so the returned encoding
455
    /// name may not correspond to the grammar.
456
    ///
457
    /// # Examples
458
    ///
459
    /// ```
460
    /// use std::borrow::Cow;
461
    /// use quick_xml::Error;
462
    /// use quick_xml::events::{BytesDecl, BytesStart};
463
    ///
464
    /// // <?xml version='1.1'?>
465
    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
466
    /// assert!(decl.encoding().is_none());
467
    ///
468
    /// // <?xml encoding='utf-8'?>
469
    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
470
    /// match decl.encoding() {
471
    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
472
    ///     _ => assert!(false),
473
    /// }
474
    ///
475
    /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
476
    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
477
    /// match decl.encoding() {
478
    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
479
    ///     _ => assert!(false),
480
    /// }
481
    /// ```
482
    ///
483
    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
484
0
    pub fn encoding(&self) -> Option<Result<Cow<[u8]>>> {
485
0
        self.content
486
0
            .try_get_attribute("encoding")
487
0
            .map(|a| a.map(|a| a.value))
488
0
            .transpose()
489
0
    }
490
491
    /// Gets xml standalone, excluding quotes (`'` or `"`).
492
    ///
493
    /// Although according to the [grammar] standalone flag must appear after `"version"`
494
    /// and `"encoding"`, this method does not check that. The first occurrence of the
495
    /// attribute will be returned even if there are several. Also, method does not
496
    /// restrict symbols that can forming the value, so the returned flag name may not
497
    /// correspond to the grammar.
498
    ///
499
    /// # Examples
500
    ///
501
    /// ```
502
    /// use std::borrow::Cow;
503
    /// use quick_xml::Error;
504
    /// use quick_xml::events::{BytesDecl, BytesStart};
505
    ///
506
    /// // <?xml version='1.1'?>
507
    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
508
    /// assert!(decl.standalone().is_none());
509
    ///
510
    /// // <?xml standalone='yes'?>
511
    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
512
    /// match decl.standalone() {
513
    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
514
    ///     _ => assert!(false),
515
    /// }
516
    ///
517
    /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
518
    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
519
    /// match decl.standalone() {
520
    ///     Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
521
    ///     _ => assert!(false),
522
    /// }
523
    /// ```
524
    ///
525
    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
526
0
    pub fn standalone(&self) -> Option<Result<Cow<[u8]>>> {
527
0
        self.content
528
0
            .try_get_attribute("standalone")
529
0
            .map(|a| a.map(|a| a.value))
530
0
            .transpose()
531
0
    }
532
533
    /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
534
    /// algorithm.
535
    ///
536
    /// If encoding in not known, or `encoding` key was not found, returns `None`.
537
    /// In case of duplicated `encoding` key, encoding, corresponding to the first
538
    /// one, is returned.
539
    #[cfg(feature = "encoding")]
540
    pub fn encoder(&self) -> Option<&'static Encoding> {
541
        self.encoding()
542
            .and_then(|e| e.ok())
543
            .and_then(|e| Encoding::for_label(&e))
544
    }
545
546
    /// Converts the event into an owned event.
547
0
    pub fn into_owned(self) -> BytesDecl<'static> {
548
0
        BytesDecl {
549
0
            content: self.content.into_owned(),
550
0
        }
551
0
    }
552
553
    /// Converts the event into a borrowed event.
554
    #[inline]
555
0
    pub fn borrow(&self) -> BytesDecl {
556
0
        BytesDecl {
557
0
            content: self.content.borrow(),
558
0
        }
559
0
    }
560
}
561
562
impl<'a> Deref for BytesDecl<'a> {
563
    type Target = [u8];
564
565
0
    fn deref(&self) -> &[u8] {
566
0
        &self.content
567
0
    }
568
}
569
570
#[cfg(feature = "arbitrary")]
571
impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
572
    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
573
        Ok(Self::new(
574
            <&str>::arbitrary(u)?,
575
            Option::<&str>::arbitrary(u)?,
576
            Option::<&str>::arbitrary(u)?,
577
        ))
578
    }
579
580
    fn size_hint(depth: usize) -> (usize, Option<usize>) {
581
        return <&str as arbitrary::Arbitrary>::size_hint(depth);
582
    }
583
}
584
585
////////////////////////////////////////////////////////////////////////////////////////////////////
586
587
/// A struct to manage `Event::End` events
588
#[derive(Clone, Eq, PartialEq)]
589
pub struct BytesEnd<'a> {
590
    name: Cow<'a, [u8]>,
591
}
592
593
impl<'a> BytesEnd<'a> {
594
    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
595
    #[inline]
596
65.1M
    pub(crate) fn wrap(name: Cow<'a, [u8]>) -> Self {
597
65.1M
        BytesEnd { name }
598
65.1M
    }
599
600
    /// Creates a new `BytesEnd` borrowing a slice.
601
    ///
602
    /// # Warning
603
    ///
604
    /// `name` must be a valid name.
605
    #[inline]
606
0
    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
607
0
        Self::wrap(str_cow_to_bytes(name))
608
0
    }
609
610
    /// Converts the event into an owned event.
611
0
    pub fn into_owned(self) -> BytesEnd<'static> {
612
0
        BytesEnd {
613
0
            name: Cow::Owned(self.name.into_owned()),
614
0
        }
615
0
    }
616
617
    /// Converts the event into a borrowed event.
618
    #[inline]
619
0
    pub fn borrow(&self) -> BytesEnd {
620
0
        BytesEnd {
621
0
            name: Cow::Borrowed(&self.name),
622
0
        }
623
0
    }
624
625
    /// Gets the undecoded raw tag name, as present in the input stream.
626
    #[inline]
627
65.1M
    pub fn name(&self) -> QName {
628
65.1M
        QName(&self.name)
629
65.1M
    }
<quick_xml::events::BytesEnd>::name
Line
Count
Source
627
36.2M
    pub fn name(&self) -> QName {
628
36.2M
        QName(&self.name)
629
36.2M
    }
<quick_xml::events::BytesEnd>::name
Line
Count
Source
627
28.8M
    pub fn name(&self) -> QName {
628
28.8M
        QName(&self.name)
629
28.8M
    }
630
631
    /// Gets the undecoded raw local tag name (excluding namespace) as present
632
    /// in the input stream.
633
    ///
634
    /// All content up to and including the first `:` character is removed from the tag name.
635
    #[inline]
636
0
    pub fn local_name(&self) -> LocalName {
637
0
        self.name().into()
638
0
    }
639
}
640
641
impl<'a> Debug for BytesEnd<'a> {
642
0
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
643
0
        write!(f, "BytesEnd {{ name: ")?;
644
0
        write_cow_string(f, &self.name)?;
645
0
        write!(f, " }}")
646
0
    }
647
}
648
649
impl<'a> Deref for BytesEnd<'a> {
650
    type Target = [u8];
651
652
0
    fn deref(&self) -> &[u8] {
653
0
        &self.name
654
0
    }
655
}
656
657
#[cfg(feature = "arbitrary")]
658
impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
659
    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
660
        Ok(Self::new(<&str>::arbitrary(u)?))
661
    }
662
    fn size_hint(depth: usize) -> (usize, Option<usize>) {
663
        return <&str as arbitrary::Arbitrary>::size_hint(depth);
664
    }
665
}
666
667
////////////////////////////////////////////////////////////////////////////////////////////////////
668
669
/// Data from various events (most notably, `Event::Text`) that stored in XML
670
/// in escaped form. Internally data is stored in escaped form
671
#[derive(Clone, Eq, PartialEq)]
672
pub struct BytesText<'a> {
673
    /// Escaped then encoded content of the event. Content is encoded in the XML
674
    /// document encoding when event comes from the reader and should be in the
675
    /// document encoding when event passed to the writer
676
    content: Cow<'a, [u8]>,
677
    /// Encoding in which the `content` is stored inside the event
678
    decoder: Decoder,
679
}
680
681
impl<'a> BytesText<'a> {
682
    /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
683
    #[inline]
684
74.7M
    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
685
74.7M
        Self {
686
74.7M
            content: content.into(),
687
74.7M
            decoder,
688
74.7M
        }
689
74.7M
    }
Unexecuted instantiation: <quick_xml::events::BytesText>::wrap::<alloc::borrow::Cow<[u8]>>
<quick_xml::events::BytesText>::wrap::<&[u8]>
Line
Count
Source
684
74.7M
    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
685
74.7M
        Self {
686
74.7M
            content: content.into(),
687
74.7M
            decoder,
688
74.7M
        }
689
74.7M
    }
690
691
    /// Creates a new `BytesText` from an escaped string.
692
    #[inline]
693
0
    pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
694
0
        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
695
0
    }
696
697
    /// Creates a new `BytesText` from a string. The string is expected not to
698
    /// be escaped.
699
    #[inline]
700
0
    pub fn new(content: &'a str) -> Self {
701
0
        Self::from_escaped(escape(content))
702
0
    }
703
704
    /// Ensures that all data is owned to extend the object's lifetime if
705
    /// necessary.
706
    #[inline]
707
0
    pub fn into_owned(self) -> BytesText<'static> {
708
0
        BytesText {
709
0
            content: self.content.into_owned().into(),
710
0
            decoder: self.decoder,
711
0
        }
712
0
    }
713
714
    /// Extracts the inner `Cow` from the `BytesText` event container.
715
    #[inline]
716
0
    pub fn into_inner(self) -> Cow<'a, [u8]> {
717
0
        self.content
718
0
    }
719
720
    /// Converts the event into a borrowed event.
721
    #[inline]
722
0
    pub fn borrow(&self) -> BytesText {
723
0
        BytesText {
724
0
            content: Cow::Borrowed(&self.content),
725
0
            decoder: self.decoder,
726
0
        }
727
0
    }
728
729
    /// Decodes then unescapes the content of the event.
730
    ///
731
    /// This will allocate if the value contains any escape sequences or in
732
    /// non-UTF-8 encoding.
733
0
    pub fn unescape(&self) -> Result<Cow<'a, str>> {
734
0
        self.unescape_with(|_| None)
735
0
    }
736
737
    /// Decodes then unescapes the content of the event with custom entities.
738
    ///
739
    /// This will allocate if the value contains any escape sequences or in
740
    /// non-UTF-8 encoding.
741
0
    pub fn unescape_with<'entity>(
742
0
        &self,
743
0
        resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
744
0
    ) -> Result<Cow<'a, str>> {
745
0
        let decoded = match &self.content {
746
0
            Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
747
            // Convert to owned, because otherwise Cow will be bound with wrong lifetime
748
0
            Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
749
        };
750
751
0
        match unescape_with(&decoded, resolve_entity)? {
752
            // Because result is borrowed, no replacements was done and we can use original string
753
0
            Cow::Borrowed(_) => Ok(decoded),
754
0
            Cow::Owned(s) => Ok(s.into()),
755
        }
756
0
    }
757
758
    /// Removes leading XML whitespace bytes from text content.
759
    ///
760
    /// Returns `true` if content is empty after that
761
0
    pub fn inplace_trim_start(&mut self) -> bool {
762
0
        self.content = trim_cow(
763
0
            replace(&mut self.content, Cow::Borrowed(b"")),
764
0
            trim_xml_start,
765
0
        );
766
0
        self.content.is_empty()
767
0
    }
768
769
    /// Removes trailing XML whitespace bytes from text content.
770
    ///
771
    /// Returns `true` if content is empty after that
772
0
    pub fn inplace_trim_end(&mut self) -> bool {
773
0
        self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
774
0
        self.content.is_empty()
775
0
    }
776
}
777
778
impl<'a> Debug for BytesText<'a> {
779
0
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
780
0
        write!(f, "BytesText {{ content: ")?;
781
0
        write_cow_string(f, &self.content)?;
782
0
        write!(f, " }}")
783
0
    }
784
}
785
786
impl<'a> Deref for BytesText<'a> {
787
    type Target = [u8];
788
789
0
    fn deref(&self) -> &[u8] {
790
0
        &self.content
791
0
    }
792
}
793
794
#[cfg(feature = "arbitrary")]
795
impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
796
    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
797
        let s = <&str>::arbitrary(u)?;
798
        if !s.chars().all(char::is_alphanumeric) {
799
            return Err(arbitrary::Error::IncorrectFormat);
800
        }
801
        Ok(Self::new(s))
802
    }
803
804
    fn size_hint(depth: usize) -> (usize, Option<usize>) {
805
        return <&str as arbitrary::Arbitrary>::size_hint(depth);
806
    }
807
}
808
809
////////////////////////////////////////////////////////////////////////////////////////////////////
810
811
/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
812
/// [convert](Self::escape) it to [`BytesText`]
813
#[derive(Clone, Eq, PartialEq)]
814
pub struct BytesCData<'a> {
815
    content: Cow<'a, [u8]>,
816
    /// Encoding in which the `content` is stored inside the event
817
    decoder: Decoder,
818
}
819
820
impl<'a> BytesCData<'a> {
821
    /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
822
    #[inline]
823
0
    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
824
0
        Self {
825
0
            content: content.into(),
826
0
            decoder,
827
0
        }
828
0
    }
829
830
    /// Creates a new `BytesCData` from a string.
831
    ///
832
    /// # Warning
833
    ///
834
    /// `content` must not contain the `]]>` sequence.
835
    #[inline]
836
0
    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
837
0
        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
838
0
    }
839
840
    /// Ensures that all data is owned to extend the object's lifetime if
841
    /// necessary.
842
    #[inline]
843
0
    pub fn into_owned(self) -> BytesCData<'static> {
844
0
        BytesCData {
845
0
            content: self.content.into_owned().into(),
846
0
            decoder: self.decoder,
847
0
        }
848
0
    }
849
850
    /// Extracts the inner `Cow` from the `BytesCData` event container.
851
    #[inline]
852
0
    pub fn into_inner(self) -> Cow<'a, [u8]> {
853
0
        self.content
854
0
    }
855
856
    /// Converts the event into a borrowed event.
857
    #[inline]
858
0
    pub fn borrow(&self) -> BytesCData {
859
0
        BytesCData {
860
0
            content: Cow::Borrowed(&self.content),
861
0
            decoder: self.decoder,
862
0
        }
863
0
    }
864
865
    /// Converts this CDATA content to an escaped version, that can be written
866
    /// as an usual text in XML.
867
    ///
868
    /// This function performs following replacements:
869
    ///
870
    /// | Character | Replacement
871
    /// |-----------|------------
872
    /// | `<`       | `&lt;`
873
    /// | `>`       | `&gt;`
874
    /// | `&`       | `&amp;`
875
    /// | `'`       | `&apos;`
876
    /// | `"`       | `&quot;`
877
0
    pub fn escape(self) -> Result<BytesText<'a>> {
878
0
        let decoded = self.decode()?;
879
        Ok(BytesText::wrap(
880
0
            match escape(&decoded) {
881
                // Because result is borrowed, no replacements was done and we can use original content
882
0
                Cow::Borrowed(_) => self.content,
883
0
                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
884
            },
885
0
            Decoder::utf8(),
886
        ))
887
0
    }
888
889
    /// Converts this CDATA content to an escaped version, that can be written
890
    /// as an usual text in XML.
891
    ///
892
    /// In XML text content, it is allowed (though not recommended) to leave
893
    /// the quote special characters `"` and `'` unescaped.
894
    ///
895
    /// This function performs following replacements:
896
    ///
897
    /// | Character | Replacement
898
    /// |-----------|------------
899
    /// | `<`       | `&lt;`
900
    /// | `>`       | `&gt;`
901
    /// | `&`       | `&amp;`
902
0
    pub fn partial_escape(self) -> Result<BytesText<'a>> {
903
0
        let decoded = self.decode()?;
904
        Ok(BytesText::wrap(
905
0
            match partial_escape(&decoded) {
906
                // Because result is borrowed, no replacements was done and we can use original content
907
0
                Cow::Borrowed(_) => self.content,
908
0
                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
909
            },
910
0
            Decoder::utf8(),
911
        ))
912
0
    }
913
914
    /// Gets content of this text buffer in the specified encoding
915
0
    pub(crate) fn decode(&self) -> Result<Cow<'a, str>> {
916
0
        Ok(match &self.content {
917
0
            Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
918
            // Convert to owned, because otherwise Cow will be bound with wrong lifetime
919
0
            Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
920
        })
921
0
    }
922
}
923
924
impl<'a> Debug for BytesCData<'a> {
925
0
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
926
0
        write!(f, "BytesCData {{ content: ")?;
927
0
        write_cow_string(f, &self.content)?;
928
0
        write!(f, " }}")
929
0
    }
930
}
931
932
impl<'a> Deref for BytesCData<'a> {
933
    type Target = [u8];
934
935
0
    fn deref(&self) -> &[u8] {
936
0
        &self.content
937
0
    }
938
}
939
940
#[cfg(feature = "arbitrary")]
941
impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
942
    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
943
        Ok(Self::new(<&str>::arbitrary(u)?))
944
    }
945
    fn size_hint(depth: usize) -> (usize, Option<usize>) {
946
        return <&str as arbitrary::Arbitrary>::size_hint(depth);
947
    }
948
}
949
950
////////////////////////////////////////////////////////////////////////////////////////////////////
951
952
/// Event emitted by [`Reader::read_event_into`].
953
///
954
/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
955
#[derive(Clone, Debug, Eq, PartialEq)]
956
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
957
pub enum Event<'a> {
958
    /// Start tag (with attributes) `<tag attr="value">`.
959
    Start(BytesStart<'a>),
960
    /// End tag `</tag>`.
961
    End(BytesEnd<'a>),
962
    /// Empty element tag (with attributes) `<tag attr="value" />`.
963
    Empty(BytesStart<'a>),
964
    /// Escaped character data between tags.
965
    Text(BytesText<'a>),
966
    /// Unescaped character data stored in `<![CDATA[...]]>`.
967
    CData(BytesCData<'a>),
968
    /// Comment `<!-- ... -->`.
969
    Comment(BytesText<'a>),
970
    /// XML declaration `<?xml ...?>`.
971
    Decl(BytesDecl<'a>),
972
    /// Processing instruction `<?...?>`.
973
    PI(BytesText<'a>),
974
    /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
975
    DocType(BytesText<'a>),
976
    /// End of XML document.
977
    Eof,
978
}
979
980
impl<'a> Event<'a> {
981
    /// Converts the event to an owned version, untied to the lifetime of
982
    /// buffer used when reading but incurring a new, separate allocation.
983
0
    pub fn into_owned(self) -> Event<'static> {
984
0
        match self {
985
0
            Event::Start(e) => Event::Start(e.into_owned()),
986
0
            Event::End(e) => Event::End(e.into_owned()),
987
0
            Event::Empty(e) => Event::Empty(e.into_owned()),
988
0
            Event::Text(e) => Event::Text(e.into_owned()),
989
0
            Event::Comment(e) => Event::Comment(e.into_owned()),
990
0
            Event::CData(e) => Event::CData(e.into_owned()),
991
0
            Event::Decl(e) => Event::Decl(e.into_owned()),
992
0
            Event::PI(e) => Event::PI(e.into_owned()),
993
0
            Event::DocType(e) => Event::DocType(e.into_owned()),
994
0
            Event::Eof => Event::Eof,
995
        }
996
0
    }
997
998
    /// Converts the event into a borrowed event.
999
    #[inline]
1000
0
    pub fn borrow(&self) -> Event {
1001
0
        match self {
1002
0
            Event::Start(e) => Event::Start(e.borrow()),
1003
0
            Event::End(e) => Event::End(e.borrow()),
1004
0
            Event::Empty(e) => Event::Empty(e.borrow()),
1005
0
            Event::Text(e) => Event::Text(e.borrow()),
1006
0
            Event::Comment(e) => Event::Comment(e.borrow()),
1007
0
            Event::CData(e) => Event::CData(e.borrow()),
1008
0
            Event::Decl(e) => Event::Decl(e.borrow()),
1009
0
            Event::PI(e) => Event::PI(e.borrow()),
1010
0
            Event::DocType(e) => Event::DocType(e.borrow()),
1011
0
            Event::Eof => Event::Eof,
1012
        }
1013
0
    }
1014
}
1015
1016
impl<'a> Deref for Event<'a> {
1017
    type Target = [u8];
1018
1019
0
    fn deref(&self) -> &[u8] {
1020
0
        match *self {
1021
0
            Event::Start(ref e) | Event::Empty(ref e) => e,
1022
0
            Event::End(ref e) => e,
1023
0
            Event::Text(ref e) => e,
1024
0
            Event::Decl(ref e) => e,
1025
0
            Event::PI(ref e) => e,
1026
0
            Event::CData(ref e) => e,
1027
0
            Event::Comment(ref e) => e,
1028
0
            Event::DocType(ref e) => e,
1029
0
            Event::Eof => &[],
1030
        }
1031
0
    }
1032
}
1033
1034
impl<'a> AsRef<Event<'a>> for Event<'a> {
1035
0
    fn as_ref(&self) -> &Event<'a> {
1036
0
        self
1037
0
    }
1038
}
1039
1040
////////////////////////////////////////////////////////////////////////////////////////////////////
1041
1042
#[inline]
1043
0
fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1044
0
    match content.into() {
1045
0
        Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1046
0
        Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1047
    }
1048
0
}
1049
1050
/// Returns a byte slice with leading XML whitespace bytes removed.
1051
///
1052
/// 'Whitespace' refers to the definition used by [`is_whitespace`].
1053
0
const fn trim_xml_start(mut bytes: &[u8]) -> &[u8] {
1054
    // Note: A pattern matching based approach (instead of indexing) allows
1055
    // making the function const.
1056
0
    while let [first, rest @ ..] = bytes {
1057
0
        if is_whitespace(*first) {
1058
0
            bytes = rest;
1059
0
        } else {
1060
0
            break;
1061
        }
1062
    }
1063
0
    bytes
1064
0
}
1065
1066
/// Returns a byte slice with trailing XML whitespace bytes removed.
1067
///
1068
/// 'Whitespace' refers to the definition used by [`is_whitespace`].
1069
0
const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] {
1070
    // Note: A pattern matching based approach (instead of indexing) allows
1071
    // making the function const.
1072
0
    while let [rest @ .., last] = bytes {
1073
0
        if is_whitespace(*last) {
1074
0
            bytes = rest;
1075
0
        } else {
1076
0
            break;
1077
        }
1078
    }
1079
0
    bytes
1080
0
}
1081
1082
0
fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1083
0
where
1084
0
    F: FnOnce(&[u8]) -> &[u8],
1085
0
{
1086
0
    match value {
1087
0
        Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1088
0
        Cow::Owned(mut bytes) => {
1089
0
            let trimmed = trim(&bytes);
1090
0
            if trimmed.len() != bytes.len() {
1091
0
                bytes = trimmed.to_vec();
1092
0
            }
1093
0
            Cow::Owned(bytes)
1094
        }
1095
    }
1096
0
}
Unexecuted instantiation: quick_xml::events::trim_cow::<quick_xml::events::trim_xml_end>
Unexecuted instantiation: quick_xml::events::trim_cow::<quick_xml::events::trim_xml_start>
1097
1098
#[cfg(test)]
1099
mod test {
1100
    use super::*;
1101
    use pretty_assertions::assert_eq;
1102
1103
    #[test]
1104
    fn bytestart_create() {
1105
        let b = BytesStart::new("test");
1106
        assert_eq!(b.len(), 4);
1107
        assert_eq!(b.name(), QName(b"test"));
1108
    }
1109
1110
    #[test]
1111
    fn bytestart_set_name() {
1112
        let mut b = BytesStart::new("test");
1113
        assert_eq!(b.len(), 4);
1114
        assert_eq!(b.name(), QName(b"test"));
1115
        assert_eq!(b.attributes_raw(), b"");
1116
        b.push_attribute(("x", "a"));
1117
        assert_eq!(b.len(), 10);
1118
        assert_eq!(b.attributes_raw(), b" x=\"a\"");
1119
        b.set_name(b"g");
1120
        assert_eq!(b.len(), 7);
1121
        assert_eq!(b.name(), QName(b"g"));
1122
    }
1123
1124
    #[test]
1125
    fn bytestart_clear_attributes() {
1126
        let mut b = BytesStart::new("test");
1127
        b.push_attribute(("x", "y\"z"));
1128
        b.push_attribute(("x", "y\"z"));
1129
        b.clear_attributes();
1130
        assert!(b.attributes().next().is_none());
1131
        assert_eq!(b.len(), 4);
1132
        assert_eq!(b.name(), QName(b"test"));
1133
    }
1134
}