Coverage Report

Created: 2026-06-14 06:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/quick-xml-0.40.1/src/utils.rs
Line
Count
Source
1
use std::borrow::{Borrow, Cow};
2
use std::fmt::{self, Debug, Formatter};
3
use std::io;
4
use std::iter::FusedIterator;
5
use std::ops::Deref;
6
7
#[cfg(feature = "async-tokio")]
8
use std::{
9
    pin::Pin,
10
    task::{Context, Poll},
11
};
12
13
#[cfg(feature = "serialize")]
14
use serde::de::{Deserialize, Deserializer, Error, Visitor};
15
#[cfg(feature = "serialize")]
16
use serde::ser::{Serialize, Serializer};
17
18
#[allow(clippy::ptr_arg)]
19
0
pub fn write_cow_string(f: &mut Formatter, cow_string: &Cow<[u8]>) -> fmt::Result {
20
0
    match cow_string {
21
0
        Cow::Owned(s) => {
22
0
            write!(f, "Owned(")?;
23
0
            write_byte_string(f, s)?;
24
        }
25
0
        Cow::Borrowed(s) => {
26
0
            write!(f, "Borrowed(")?;
27
0
            write_byte_string(f, s)?;
28
        }
29
    }
30
0
    write!(f, ")")
31
0
}
32
33
0
pub fn write_byte_string(f: &mut Formatter, byte_string: &[u8]) -> fmt::Result {
34
0
    write!(f, "\"")?;
35
0
    for b in byte_string {
36
0
        match *b {
37
0
            32..=33 | 35..=126 => write!(f, "{}", *b as char)?,
38
0
            34 => write!(f, "\\\"")?,
39
0
            _ => write!(f, "{:#02X}", b)?,
40
        }
41
    }
42
0
    write!(f, "\"")?;
43
0
    Ok(())
44
0
}
45
46
////////////////////////////////////////////////////////////////////////////////////////////////////
47
48
/// A version of [`Cow`] that can borrow from two different buffers, one of them
49
/// is a deserializer input.
50
///
51
/// # Lifetimes
52
///
53
/// - `'i`: lifetime of the data that deserializer borrow from the parsed input
54
/// - `'s`: lifetime of the data that owned by a deserializer
55
pub enum CowRef<'i, 's, B>
56
where
57
    B: ToOwned + ?Sized,
58
{
59
    /// An input borrowed from the parsed data
60
    Input(&'i B),
61
    /// An input borrowed from the buffer owned by another deserializer
62
    Slice(&'s B),
63
    /// An input taken from an external deserializer, owned by that deserializer
64
    Owned(<B as ToOwned>::Owned),
65
}
66
impl<'i, 's, B> Deref for CowRef<'i, 's, B>
67
where
68
    B: ToOwned + ?Sized,
69
    B::Owned: Borrow<B>,
70
{
71
    type Target = B;
72
73
0
    fn deref(&self) -> &B {
74
0
        match *self {
75
0
            Self::Input(borrowed) => borrowed,
76
0
            Self::Slice(borrowed) => borrowed,
77
0
            Self::Owned(ref owned) => owned.borrow(),
78
        }
79
0
    }
80
}
81
82
impl<'i, 's, B> Debug for CowRef<'i, 's, B>
83
where
84
    B: ToOwned + ?Sized + Debug,
85
    B::Owned: Debug,
86
{
87
0
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
88
0
        match *self {
89
0
            Self::Input(borrowed) => Debug::fmt(borrowed, f),
90
0
            Self::Slice(borrowed) => Debug::fmt(borrowed, f),
91
0
            Self::Owned(ref owned) => Debug::fmt(owned, f),
92
        }
93
0
    }
94
}
95
96
impl<'i, 's> CowRef<'i, 's, str> {
97
    /// Supply to the visitor a borrowed string, a string slice, or an owned
98
    /// string depending on the kind of input. Unlike [`Self::deserialize_all`],
99
    /// only part of [`Self::Owned`] string will be passed to the visitor.
100
    ///
101
    /// Calls
102
    /// - `visitor.visit_borrowed_str` if data borrowed from the input
103
    /// - `visitor.visit_str` if data borrowed from another source
104
    /// - `visitor.visit_string` if data owned by this type
105
    #[cfg(feature = "serialize")]
106
0
    pub fn deserialize_str<V, E>(self, visitor: V) -> Result<V::Value, E>
107
0
    where
108
0
        V: Visitor<'i>,
109
0
        E: Error,
110
    {
111
0
        match self {
112
0
            Self::Input(s) => visitor.visit_borrowed_str(s),
113
0
            Self::Slice(s) => visitor.visit_str(s),
114
0
            Self::Owned(s) => visitor.visit_string(s),
115
        }
116
0
    }
117
118
    /// Calls [`Visitor::visit_bool`] with `true` or `false` if text contains
119
    /// [valid] boolean representation, otherwise calls [`Self::deserialize_str`].
120
    ///
121
    /// The valid boolean representations are only `"true"`, `"false"`, `"1"`, and `"0"`.
122
    ///
123
    /// [valid]: https://www.w3.org/TR/xmlschema11-2/#boolean
124
    #[cfg(feature = "serialize")]
125
0
    pub fn deserialize_bool<V, E>(self, visitor: V) -> Result<V::Value, E>
126
0
    where
127
0
        V: Visitor<'i>,
128
0
        E: Error,
129
    {
130
0
        match self.as_ref() {
131
0
            "1" | "true" => visitor.visit_bool(true),
132
0
            "0" | "false" => visitor.visit_bool(false),
133
0
            _ => self.deserialize_str(visitor),
134
        }
135
0
    }
136
}
137
138
////////////////////////////////////////////////////////////////////////////////////////////////////
139
140
/// Wrapper around `Vec<u8>` that has a human-readable debug representation:
141
/// printable ASCII symbols output as is, all other output in HEX notation.
142
///
143
/// Also, when [`serialize`] feature is on, this type deserialized using
144
/// [`deserialize_byte_buf`](serde::Deserializer::deserialize_byte_buf) instead
145
/// of vector's generic [`deserialize_seq`](serde::Deserializer::deserialize_seq)
146
///
147
/// [`serialize`]: ../index.html#serialize
148
#[derive(PartialEq, Eq)]
149
pub struct ByteBuf(pub Vec<u8>);
150
151
impl Debug for ByteBuf {
152
0
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
153
0
        write_byte_string(f, &self.0)
154
0
    }
155
}
156
157
#[cfg(feature = "serialize")]
158
impl<'de> Deserialize<'de> for ByteBuf {
159
0
    fn deserialize<D>(d: D) -> Result<Self, D::Error>
160
0
    where
161
0
        D: Deserializer<'de>,
162
    {
163
        struct ValueVisitor;
164
165
        impl<'de> Visitor<'de> for ValueVisitor {
166
            type Value = ByteBuf;
167
168
0
            fn expecting(&self, f: &mut Formatter) -> fmt::Result {
169
0
                f.write_str("byte data")
170
0
            }
171
172
0
            fn visit_bytes<E: Error>(self, v: &[u8]) -> Result<Self::Value, E> {
173
0
                Ok(ByteBuf(v.to_vec()))
174
0
            }
175
176
0
            fn visit_byte_buf<E: Error>(self, v: Vec<u8>) -> Result<Self::Value, E> {
177
0
                Ok(ByteBuf(v))
178
0
            }
179
        }
180
181
0
        d.deserialize_byte_buf(ValueVisitor)
182
0
    }
183
}
184
185
#[cfg(feature = "serialize")]
186
impl Serialize for ByteBuf {
187
0
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
188
0
    where
189
0
        S: Serializer,
190
    {
191
0
        serializer.serialize_bytes(&self.0)
192
0
    }
193
}
194
195
////////////////////////////////////////////////////////////////////////////////////////////////////
196
197
/// Wrapper around `&[u8]` that has a human-readable debug representation:
198
/// printable ASCII symbols output as is, all other output in HEX notation.
199
///
200
/// Also, when [`serialize`] feature is on, this type deserialized using
201
/// [`deserialize_bytes`](serde::Deserializer::deserialize_bytes) instead
202
/// of vector's generic [`deserialize_seq`](serde::Deserializer::deserialize_seq)
203
///
204
/// [`serialize`]: ../index.html#serialize
205
#[derive(PartialEq, Eq)]
206
pub struct Bytes<'de>(pub &'de [u8]);
207
208
impl<'de> Debug for Bytes<'de> {
209
0
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
210
0
        write_byte_string(f, self.0)
211
0
    }
212
}
213
214
#[cfg(feature = "serialize")]
215
impl<'de> Deserialize<'de> for Bytes<'de> {
216
0
    fn deserialize<D>(d: D) -> Result<Self, D::Error>
217
0
    where
218
0
        D: Deserializer<'de>,
219
    {
220
        struct ValueVisitor;
221
222
        impl<'de> Visitor<'de> for ValueVisitor {
223
            type Value = Bytes<'de>;
224
225
0
            fn expecting(&self, f: &mut Formatter) -> fmt::Result {
226
0
                f.write_str("borrowed bytes")
227
0
            }
228
229
0
            fn visit_borrowed_bytes<E: Error>(self, v: &'de [u8]) -> Result<Self::Value, E> {
230
0
                Ok(Bytes(v))
231
0
            }
232
        }
233
234
0
        d.deserialize_bytes(ValueVisitor)
235
0
    }
236
}
237
238
#[cfg(feature = "serialize")]
239
impl<'de> Serialize for Bytes<'de> {
240
0
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
241
0
    where
242
0
        S: Serializer,
243
    {
244
0
        serializer.serialize_bytes(self.0)
245
0
    }
246
}
247
248
////////////////////////////////////////////////////////////////////////////////////////////////////
249
250
/// A simple producer of infinite stream of bytes, useful in tests.
251
///
252
/// Will repeat `chunk` field indefinitely.
253
pub struct Fountain<'a> {
254
    /// That piece of data repeated infinitely...
255
    pub chunk: &'a [u8],
256
    /// Part of `chunk` that was consumed by BufRead impl
257
    pub consumed: usize,
258
    /// The overall count of read bytes
259
    pub overall_read: u64,
260
}
261
262
impl<'a> io::Read for Fountain<'a> {
263
0
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
264
0
        let available = &self.chunk[self.consumed..];
265
0
        let len = buf.len().min(available.len());
266
0
        let (portion, _) = available.split_at(len);
267
268
0
        buf.copy_from_slice(portion);
269
0
        Ok(len)
270
0
    }
271
}
272
273
impl<'a> io::BufRead for Fountain<'a> {
274
    #[inline]
275
0
    fn fill_buf(&mut self) -> io::Result<&[u8]> {
276
0
        Ok(&self.chunk[self.consumed..])
277
0
    }
278
279
0
    fn consume(&mut self, amt: usize) {
280
0
        self.consumed += amt;
281
0
        if self.consumed == self.chunk.len() {
282
0
            self.consumed = 0;
283
0
        }
284
0
        self.overall_read += amt as u64;
285
0
    }
286
}
287
288
#[cfg(feature = "async-tokio")]
289
impl<'a> tokio::io::AsyncRead for Fountain<'a> {
290
    fn poll_read(
291
        self: Pin<&mut Self>,
292
        _cx: &mut Context<'_>,
293
        buf: &mut tokio::io::ReadBuf<'_>,
294
    ) -> Poll<io::Result<()>> {
295
        let available = &self.chunk[self.consumed..];
296
        let len = buf.remaining().min(available.len());
297
        let (portion, _) = available.split_at(len);
298
299
        buf.put_slice(portion);
300
        Poll::Ready(Ok(()))
301
    }
302
}
303
304
#[cfg(feature = "async-tokio")]
305
impl<'a> tokio::io::AsyncBufRead for Fountain<'a> {
306
    #[inline]
307
    fn poll_fill_buf(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<&[u8]>> {
308
        Poll::Ready(io::BufRead::fill_buf(self.get_mut()))
309
    }
310
311
    #[inline]
312
    fn consume(self: Pin<&mut Self>, amt: usize) {
313
        io::BufRead::consume(self.get_mut(), amt);
314
    }
315
}
316
317
////////////////////////////////////////////////////////////////////////////////////////////////////
318
319
/// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab).
320
#[inline]
321
0
pub const fn is_whitespace(b: u8) -> bool {
322
0
    matches!(b, b' ' | b'\r' | b'\n' | b'\t')
323
0
}
324
325
/// Calculates name from an element-like content. Name is the first word in `content`,
326
/// where word boundaries is XML whitespace characters.
327
///
328
/// 'Whitespace' refers to the definition used by [`is_whitespace`].
329
#[inline]
330
0
pub const fn name_len(mut bytes: &[u8]) -> usize {
331
    // Note: A pattern matching based approach (instead of indexing) allows
332
    // making the function const.
333
0
    let mut len = 0;
334
0
    while let [first, rest @ ..] = bytes {
335
0
        if is_whitespace(*first) {
336
0
            break;
337
0
        }
338
0
        len += 1;
339
0
        bytes = rest;
340
    }
341
0
    len
342
0
}
343
344
/// Returns a byte slice with leading XML whitespace bytes removed.
345
///
346
/// 'Whitespace' refers to the definition used by [`is_whitespace`].
347
#[inline]
348
0
pub const fn trim_xml_start(mut bytes: &[u8]) -> &[u8] {
349
    // Note: A pattern matching based approach (instead of indexing) allows
350
    // making the function const.
351
0
    while let [first, rest @ ..] = bytes {
352
0
        if is_whitespace(*first) {
353
0
            bytes = rest;
354
0
        } else {
355
0
            break;
356
        }
357
    }
358
0
    bytes
359
0
}
360
361
/// Returns a byte slice with trailing XML whitespace bytes removed.
362
///
363
/// 'Whitespace' refers to the definition used by [`is_whitespace`].
364
#[inline]
365
0
pub const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] {
366
    // Note: A pattern matching based approach (instead of indexing) allows
367
    // making the function const.
368
0
    while let [rest @ .., last] = bytes {
369
0
        if is_whitespace(*last) {
370
0
            bytes = rest;
371
0
        } else {
372
0
            break;
373
        }
374
    }
375
0
    bytes
376
0
}
377
378
/// Returns a string slice with XML whitespace characters removed from both sides.
379
///
380
/// 'Whitespace' refers to the definition used by [`is_whitespace`].
381
#[inline]
382
0
pub fn trim_xml_spaces(text: &str) -> &str {
383
0
    let bytes = trim_xml_end(trim_xml_start(text.as_bytes()));
384
0
    match core::str::from_utf8(bytes) {
385
0
        Ok(s) => s,
386
        // SAFETY: Removing XML space characters (subset of ASCII) from a `&str` does not invalidate UTF-8.
387
0
        _ => unreachable!(),
388
    }
389
0
}
390
391
////////////////////////////////////////////////////////////////////////////////////////////////////
392
393
/// Splits string into pieces which can be part of a single `CDATA` section.
394
///
395
/// Because CDATA cannot contain the `]]>` sequence, split the string between
396
/// `]]` and `>`.
397
#[derive(Debug, Clone)]
398
pub(crate) struct CDataIterator<'a> {
399
    /// The unprocessed data which should be emitted as `BytesCData` events.
400
    /// At each iteration, the processed data is cut from this slice.
401
    unprocessed: &'a str,
402
    finished: bool,
403
}
404
405
impl<'a> CDataIterator<'a> {
406
0
    pub const fn new(value: &'a str) -> Self {
407
0
        Self {
408
0
            unprocessed: value,
409
0
            finished: false,
410
0
        }
411
0
    }
412
}
413
414
impl<'a> Iterator for CDataIterator<'a> {
415
    type Item = &'a str;
416
417
0
    fn next(&mut self) -> Option<&'a str> {
418
0
        if self.finished {
419
0
            return None;
420
0
        }
421
422
0
        for gt in memchr::memchr_iter(b'>', self.unprocessed.as_bytes()) {
423
0
            let (slice, rest) = self.unprocessed.split_at(gt);
424
0
            if slice.ends_with("]]") {
425
0
                self.unprocessed = rest;
426
0
                return Some(slice);
427
0
            }
428
        }
429
430
0
        self.finished = true;
431
0
        Some(self.unprocessed)
432
0
    }
433
}
434
435
impl FusedIterator for CDataIterator<'_> {}
436
437
////////////////////////////////////////////////////////////////////////////////////////////////////
438
439
#[cfg(test)]
440
mod tests {
441
    use super::*;
442
    use pretty_assertions::assert_eq;
443
444
    #[test]
445
    fn write_byte_string0() {
446
        let bytes = ByteBuf(vec![10, 32, 32, 32, 32, 32, 32, 32, 32]);
447
        assert_eq!(format!("{:?}", bytes), "\"0xA        \"");
448
    }
449
450
    #[test]
451
    fn write_byte_string1() {
452
        let bytes = ByteBuf(vec![
453
            104, 116, 116, 112, 58, 47, 47, 119, 119, 119, 46, 119, 51, 46, 111, 114, 103, 47, 50,
454
            48, 48, 50, 47, 48, 55, 47, 111, 119, 108, 35,
455
        ]);
456
        assert_eq!(
457
            format!("{:?}", bytes),
458
            r##""http://www.w3.org/2002/07/owl#""##
459
        );
460
    }
461
462
    #[test]
463
    fn write_byte_string3() {
464
        let bytes = ByteBuf(vec![
465
            67, 108, 97, 115, 115, 32, 73, 82, 73, 61, 34, 35, 66, 34,
466
        ]);
467
        assert_eq!(format!("{:?}", bytes), r##""Class IRI=\"#B\"""##);
468
    }
469
470
    #[test]
471
    fn name_len() {
472
        assert_eq!(super::name_len(b""), 0);
473
        assert_eq!(super::name_len(b" abc"), 0);
474
        assert_eq!(super::name_len(b" \t\r\n"), 0);
475
476
        assert_eq!(super::name_len(b"abc"), 3);
477
        assert_eq!(super::name_len(b"abc "), 3);
478
479
        assert_eq!(super::name_len(b"a bc"), 1);
480
        assert_eq!(super::name_len(b"ab\tc"), 2);
481
        assert_eq!(super::name_len(b"ab\rc"), 2);
482
        assert_eq!(super::name_len(b"ab\nc"), 2);
483
    }
484
485
    #[test]
486
    fn trim_xml_start() {
487
        assert_eq!(Bytes(super::trim_xml_start(b"")), Bytes(b""));
488
        assert_eq!(Bytes(super::trim_xml_start(b"abc")), Bytes(b"abc"));
489
        assert_eq!(
490
            Bytes(super::trim_xml_start(b"\r\n\t ab \t\r\nc \t\r\n")),
491
            Bytes(b"ab \t\r\nc \t\r\n")
492
        );
493
    }
494
495
    #[test]
496
    fn trim_xml_end() {
497
        assert_eq!(Bytes(super::trim_xml_end(b"")), Bytes(b""));
498
        assert_eq!(Bytes(super::trim_xml_end(b"abc")), Bytes(b"abc"));
499
        assert_eq!(
500
            Bytes(super::trim_xml_end(b"\r\n\t ab \t\r\nc \t\r\n")),
501
            Bytes(b"\r\n\t ab \t\r\nc")
502
        );
503
    }
504
}