Coverage Report

Created: 2025-06-16 06:50

/rust/registry/src/index.crates.io-6f17d22bba15001f/quick-xml-0.29.0/src/reader/mod.rs
Line
Count
Source (jump to first uncovered line)
1
//! Contains high-level interface for a pull-based XML parser.
2
3
#[cfg(feature = "encoding")]
4
use encoding_rs::Encoding;
5
use std::ops::Range;
6
7
use crate::encoding::Decoder;
8
use crate::errors::{Error, Result};
9
use crate::events::Event;
10
use crate::reader::parser::Parser;
11
12
use memchr;
13
14
macro_rules! configure_methods {
15
    ($($holder:ident)?) => {
16
        /// Changes whether empty elements should be split into an `Open` and a `Close` event.
17
        ///
18
        /// When set to `true`, all [`Empty`] events produced by a self-closing tag like `<tag/>` are
19
        /// expanded into a [`Start`] event followed by an [`End`] event. When set to `false` (the
20
        /// default), those tags are represented by an [`Empty`] event instead.
21
        ///
22
        /// Note, that setting this to `true` will lead to additional allocates that
23
        /// needed to store tag name for an [`End`] event. However if [`check_end_names`]
24
        /// is also set, only one additional allocation will be performed that support
25
        /// both these options.
26
        ///
27
        /// (`false` by default)
28
        ///
29
        /// [`Empty`]: Event::Empty
30
        /// [`Start`]: Event::Start
31
        /// [`End`]: Event::End
32
        /// [`check_end_names`]: Self::check_end_names
33
0
        pub fn expand_empty_elements(&mut self, val: bool) -> &mut Self {
34
0
            self $(.$holder)? .parser.expand_empty_elements = val;
35
0
            self
36
0
        }
Unexecuted instantiation: <quick_xml::reader::ns_reader::NsReader<_>>::expand_empty_elements
Unexecuted instantiation: <quick_xml::reader::Reader<_>>::expand_empty_elements
37
38
        /// Changes whether whitespace before and after character data should be removed.
39
        ///
40
        /// When set to `true`, all [`Text`] events are trimmed.
41
        /// If after that the event is empty it will not be pushed.
42
        ///
43
        /// Changing this option automatically changes the [`trim_text_end`] option.
44
        ///
45
        /// (`false` by default).
46
        ///
47
        /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
48
        ///
49
        /// WARNING: With this option every text events will be trimmed which is
50
        /// incorrect behavior when text events delimited by comments, processing
51
        /// instructions or CDATA sections. To correctly trim data manually apply
52
        /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
53
        /// only to necessary events.
54
        /// </div>
55
        ///
56
        /// [`Text`]: Event::Text
57
        /// [`trim_text_end`]: Self::trim_text_end
58
        /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
59
        /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
60
0
        pub fn trim_text(&mut self, val: bool) -> &mut Self {
61
0
            self $(.$holder)? .parser.trim_text_start = val;
62
0
            self $(.$holder)? .parser.trim_text_end = val;
63
0
            self
64
0
        }
Unexecuted instantiation: <quick_xml::reader::ns_reader::NsReader<_>>::trim_text
Unexecuted instantiation: <quick_xml::reader::Reader<_>>::trim_text
65
66
        /// Changes whether whitespace after character data should be removed.
67
        ///
68
        /// When set to `true`, trailing whitespace is trimmed in [`Text`] events.
69
        /// If after that the event is empty it will not be pushed.
70
        ///
71
        /// (`false` by default).
72
        ///
73
        /// <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
74
        ///
75
        /// WARNING: With this option every text events will be trimmed which is
76
        /// incorrect behavior when text events delimited by comments, processing
77
        /// instructions or CDATA sections. To correctly trim data manually apply
78
        /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`]
79
        /// only to necessary events.
80
        /// </div>
81
        ///
82
        /// [`Text`]: Event::Text
83
        /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start
84
        /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end
85
0
        pub fn trim_text_end(&mut self, val: bool) -> &mut Self {
86
0
            self $(.$holder)? .parser.trim_text_end = val;
87
0
            self
88
0
        }
Unexecuted instantiation: <quick_xml::reader::ns_reader::NsReader<_>>::trim_text_end
Unexecuted instantiation: <quick_xml::reader::Reader<_>>::trim_text_end
89
90
        /// Changes whether trailing whitespaces after the markup name are trimmed in closing tags
91
        /// `</a >`.
92
        ///
93
        /// If true the emitted [`End`] event is stripped of trailing whitespace after the markup name.
94
        ///
95
        /// Note that if set to `false` and `check_end_names` is true the comparison of markup names is
96
        /// going to fail erroneously if a closing tag contains trailing whitespaces.
97
        ///
98
        /// (`true` by default)
99
        ///
100
        /// [`End`]: Event::End
101
0
        pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Self {
102
0
            self $(.$holder)? .parser.trim_markup_names_in_closing_tags = val;
103
0
            self
104
0
        }
Unexecuted instantiation: <quick_xml::reader::ns_reader::NsReader<_>>::trim_markup_names_in_closing_tags
Unexecuted instantiation: <quick_xml::reader::Reader<_>>::trim_markup_names_in_closing_tags
105
106
        /// Changes whether mismatched closing tag names should be detected.
107
        ///
108
        /// Note, that start and end tags [should match literally][spec], they cannot
109
        /// have different prefixes even if both prefixes resolve to the same namespace.
110
        /// The XML
111
        ///
112
        /// ```xml
113
        /// <outer xmlns="namespace" xmlns:p="namespace">
114
        /// </p:outer>
115
        /// ```
116
        ///
117
        /// is not valid, even though semantically the start tag is the same as the
118
        /// end tag. The reason is that namespaces are an extension of the original
119
        /// XML specification (without namespaces) and it should be backward-compatible.
120
        ///
121
        /// When set to `false`, it won't check if a closing tag matches the corresponding opening tag.
122
        /// For example, `<mytag></different_tag>` will be permitted.
123
        ///
124
        /// If the XML is known to be sane (already processed, etc.) this saves extra time.
125
        ///
126
        /// Note that the emitted [`End`] event will not be modified if this is disabled, ie. it will
127
        /// contain the data of the mismatched end tag.
128
        ///
129
        /// Note, that setting this to `true` will lead to additional allocates that
130
        /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`]
131
        /// is also set, only one additional allocation will be performed that support
132
        /// both these options.
133
        ///
134
        /// (`true` by default)
135
        ///
136
        /// [spec]: https://www.w3.org/TR/xml11/#dt-etag
137
        /// [`End`]: Event::End
138
        /// [`expand_empty_elements`]: Self::expand_empty_elements
139
0
        pub fn check_end_names(&mut self, val: bool) -> &mut Self {
140
0
            self $(.$holder)? .parser.check_end_names = val;
141
0
            self
142
0
        }
Unexecuted instantiation: <quick_xml::reader::ns_reader::NsReader<_>>::check_end_names
Unexecuted instantiation: <quick_xml::reader::Reader<_>>::check_end_names
143
144
        /// Changes whether comments should be validated.
145
        ///
146
        /// When set to `true`, every [`Comment`] event will be checked for not containing `--`, which
147
        /// is not allowed in XML comments. Most of the time we don't want comments at all so we don't
148
        /// really care about comment correctness, thus the default value is `false` to improve
149
        /// performance.
150
        ///
151
        /// (`false` by default)
152
        ///
153
        /// [`Comment`]: Event::Comment
154
0
        pub fn check_comments(&mut self, val: bool) -> &mut Self {
155
0
            self $(.$holder)? .parser.check_comments = val;
156
0
            self
157
0
        }
Unexecuted instantiation: <quick_xml::reader::ns_reader::NsReader<_>>::check_comments
Unexecuted instantiation: <quick_xml::reader::Reader<_>>::check_comments
158
    };
159
}
160
161
macro_rules! read_event_impl {
162
    (
163
        $self:ident, $buf:ident,
164
        $reader:expr,
165
        $read_until_open:ident,
166
        $read_until_close:ident
167
        $(, $await:ident)?
168
    ) => {{
169
        let event = loop {
170
            match $self.parser.state {
171
                ParseState::Init => { // Go to OpenedTag state
172
                    // If encoding set explicitly, we not need to detect it. For example,
173
                    // explicit UTF-8 set automatically if Reader was created using `from_str`.
174
                    // But we still need to remove BOM for consistency with no encoding
175
                    // feature enabled path
176
                    #[cfg(feature = "encoding")]
177
                    if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? {
178
                        if $self.parser.encoding.can_be_refined() {
179
                            $self.parser.encoding = crate::reader::EncodingRef::BomDetected(encoding);
180
                        }
181
                    }
182
183
                    // Removes UTF-8 BOM if it is present
184
                    #[cfg(not(feature = "encoding"))]
185
                    $reader.remove_utf8_bom() $(.$await)? ?;
186
187
                    // Go to OpenedTag state
188
                    match $self.$read_until_open($buf) $(.$await)? {
189
                        Ok(Ok(ev)) => break Ok(ev),
190
                        Ok(Err(b)) => $buf = b,
191
                        Err(err)   => break Err(err),
192
                    }
193
                },
194
                ParseState::ClosedTag => { // Go to OpenedTag state
195
                    match $self.$read_until_open($buf) $(.$await)? {
196
                        Ok(Ok(ev)) => break Ok(ev),
197
                        Ok(Err(b)) => $buf = b,
198
                        Err(err)   => break Err(err),
199
                    }
200
                },
201
                // Go to ClosedTag state in next two arms
202
                ParseState::OpenedTag => break $self.$read_until_close($buf) $(.$await)?,
203
                ParseState::Empty => break $self.parser.close_expanded_empty(),
204
                ParseState::Exit => break Ok(Event::Eof),
205
            };
206
        };
207
        match event {
208
            Err(_) | Ok(Event::Eof) => $self.parser.state = ParseState::Exit,
209
            _ => {}
210
        }
211
        event
212
    }};
213
}
214
215
/// Read bytes up to `<` and skip it. If current byte (after skipping all space
216
/// characters if [`Parser::trim_text_start`] is `true`) is already `<`, then
217
/// returns the next event, otherwise stay at position just after the `<` symbol.
218
///
219
/// Moves parser to the `OpenedTag` state.
220
///
221
/// This code is executed in two cases:
222
/// - after start of parsing just after skipping BOM if it is present
223
/// - after parsing `</tag>` or `<tag>`
224
macro_rules! read_until_open {
225
    (
226
        $self:ident, $buf:ident,
227
        $reader:expr,
228
        $read_event:ident
229
        $(, $await:ident)?
230
    ) => {{
231
        $self.parser.state = ParseState::OpenedTag;
232
233
        if $self.parser.trim_text_start {
234
            $reader.skip_whitespace(&mut $self.parser.offset) $(.$await)? ?;
235
        }
236
237
        // If we already at the `<` symbol, do not try to return an empty Text event
238
        if $reader.skip_one(b'<', &mut $self.parser.offset) $(.$await)? ? {
239
            // Pass $buf to the next next iteration of parsing loop
240
            return Ok(Err($buf));
241
        }
242
243
        match $reader
244
            .read_bytes_until(b'<', $buf, &mut $self.parser.offset)
245
            $(.$await)?
246
        {
247
            // Return Text event with `bytes` content
248
            Ok(Some(bytes)) => $self.parser.emit_text(bytes).map(Ok),
249
            Ok(None) => Ok(Ok(Event::Eof)),
250
            Err(e) => Err(e),
251
        }
252
    }};
253
}
254
255
/// Read bytes up to the `>` and skip it. This method is expected to be called
256
/// after seeing the `<` symbol and skipping it. Inspects the next (current)
257
/// symbol and returns an appropriate [`Event`]:
258
///
259
/// |Symbol |Event
260
/// |-------|-------------------------------------
261
/// |`!`    |[`Comment`], [`CData`] or [`DocType`]
262
/// |`/`    |[`End`]
263
/// |`?`    |[`PI`]
264
/// |_other_|[`Start`] or [`Empty`]
265
///
266
/// Moves parser to the `ClosedTag` state.
267
///
268
/// [`Comment`]: Event::Comment
269
/// [`CData`]: Event::CData
270
/// [`DocType`]: Event::DocType
271
/// [`End`]: Event::End
272
/// [`PI`]: Event::PI
273
/// [`Start`]: Event::Start
274
/// [`Empty`]: Event::Empty
275
macro_rules! read_until_close {
276
    (
277
        $self:ident, $buf:ident,
278
        $reader:expr
279
        $(, $await:ident)?
280
    ) => {{
281
        $self.parser.state = ParseState::ClosedTag;
282
283
        match $reader.peek_one() $(.$await)? {
284
            // `<!` - comment, CDATA or DOCTYPE declaration
285
            Ok(Some(b'!')) => match $reader
286
                .read_bang_element($buf, &mut $self.parser.offset)
287
                $(.$await)?
288
            {
289
                Ok(None) => Ok(Event::Eof),
290
                Ok(Some((bang_type, bytes))) => $self.parser.emit_bang(bang_type, bytes),
291
                Err(e) => Err(e),
292
            },
293
            // `</` - closing tag
294
            Ok(Some(b'/')) => match $reader
295
                .read_bytes_until(b'>', $buf, &mut $self.parser.offset)
296
                $(.$await)?
297
            {
298
                Ok(None) => Ok(Event::Eof),
299
                Ok(Some(bytes)) => $self.parser.emit_end(bytes),
300
                Err(e) => Err(e),
301
            },
302
            // `<?` - processing instruction
303
            Ok(Some(b'?')) => match $reader
304
                .read_bytes_until(b'>', $buf, &mut $self.parser.offset)
305
                $(.$await)?
306
            {
307
                Ok(None) => Ok(Event::Eof),
308
                Ok(Some(bytes)) => $self.parser.emit_question_mark(bytes),
309
                Err(e) => Err(e),
310
            },
311
            // `<...` - opening or self-closed tag
312
            Ok(Some(_)) => match $reader
313
                .read_element($buf, &mut $self.parser.offset)
314
                $(.$await)?
315
            {
316
                Ok(None) => Ok(Event::Eof),
317
                Ok(Some(bytes)) => $self.parser.emit_start(bytes),
318
                Err(e) => Err(e),
319
            },
320
            Ok(None) => Ok(Event::Eof),
321
            Err(e) => Err(e),
322
        }
323
    }};
324
}
325
326
/// Generalization of `read_to_end` method for buffered and borrowed readers
327
macro_rules! read_to_end {
328
    (
329
        $self:expr, $end:expr, $buf:expr,
330
        $read_event:ident,
331
        // Code block that performs clearing of internal buffer after read of each event
332
        $clear:block
333
        $(, $await:ident)?
334
    ) => {{
335
        let start = $self.buffer_position();
336
        let mut depth = 0;
337
        loop {
338
            $clear
339
            let end = $self.buffer_position();
340
            match $self.$read_event($buf) $(.$await)? {
341
                Err(e) => return Err(e),
342
343
                Ok(Event::Start(e)) if e.name() == $end => depth += 1,
344
                Ok(Event::End(e)) if e.name() == $end => {
345
                    if depth == 0 {
346
                        break start..end;
347
                    }
348
                    depth -= 1;
349
                }
350
                Ok(Event::Eof) => {
351
                    let name = $self.decoder().decode($end.as_ref());
352
                    return Err(Error::UnexpectedEof(format!("</{:?}>", name)));
353
                }
354
                _ => (),
355
            }
356
        }
357
    }};
358
}
359
360
#[cfg(feature = "async-tokio")]
361
mod async_tokio;
362
mod buffered_reader;
363
mod ns_reader;
364
mod parser;
365
mod slice_reader;
366
367
pub use ns_reader::NsReader;
368
369
/// Range of input in bytes, that corresponds to some piece of XML
370
pub type Span = Range<usize>;
371
372
////////////////////////////////////////////////////////////////////////////////////////////////////
373
374
/// Possible reader states. The state transition diagram (`true` and `false` shows
375
/// value of [`Reader::expand_empty_elements()`] option):
376
///
377
/// ```mermaid
378
/// flowchart LR
379
///   subgraph _
380
///     direction LR
381
///
382
///     Init      -- "(no event)"\n                                       --> OpenedTag
383
///     OpenedTag -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> ClosedTag
384
///     ClosedTag -- "#lt;false#gt;\n(no event)"\nText                    --> OpenedTag
385
///   end
386
///   ClosedTag -- "#lt;true#gt;"\nStart --> Empty
387
///   Empty     -- End                   --> ClosedTag
388
///   _ -. Eof .-> Exit
389
/// ```
390
#[derive(Clone)]
391
enum ParseState {
392
    /// Initial state in which reader stay after creation. Transition from that
393
    /// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next
394
    /// state is always `OpenedTag`. The reader will never return to this state. The
395
    /// event emitted during transition to `OpenedTag` is a `StartEvent` if the
396
    /// first symbol not `<`, otherwise no event are emitted.
397
    Init,
398
    /// State after seeing the `<` symbol. Depending on the next symbol all other
399
    /// events could be generated.
400
    ///
401
    /// After generating one event the reader moves to the `ClosedTag` state.
402
    OpenedTag,
403
    /// State in which reader searches the `<` symbol of a markup. All bytes before
404
    /// that symbol will be returned in the [`Event::Text`] event. After that
405
    /// the reader moves to the `OpenedTag` state.
406
    ClosedTag,
407
    /// This state is used only if option [`expand_empty_elements`] is set to `true`.
408
    /// Reader enters to this state when it is in a `ClosedTag` state and emits an
409
    /// [`Event::Start`] event. The next event emitted will be an [`Event::End`],
410
    /// after which reader returned to the `ClosedTag` state.
411
    ///
412
    /// [`expand_empty_elements`]: Parser::expand_empty_elements
413
    Empty,
414
    /// Reader enters this state when `Eof` event generated or an error occurred.
415
    /// This is the last state, the reader stay in it forever.
416
    Exit,
417
}
418
419
/// A reference to an encoding together with information about how it was retrieved.
420
///
421
/// The state transition diagram:
422
///
423
/// ```mermaid
424
/// flowchart LR
425
///   Implicit    -- from_str       --> Explicit
426
///   Implicit    -- BOM            --> BomDetected
427
///   Implicit    -- "encoding=..." --> XmlDetected
428
///   BomDetected -- "encoding=..." --> XmlDetected
429
/// ```
430
#[cfg(feature = "encoding")]
431
#[derive(Clone, Copy)]
432
enum EncodingRef {
433
    /// Encoding was implicitly assumed to have a specified value. It can be refined
434
    /// using BOM or by the XML declaration event (`<?xml encoding=... ?>`)
435
    Implicit(&'static Encoding),
436
    /// Encoding was explicitly set to the desired value. It cannot be changed
437
    /// nor by BOM, nor by parsing XML declaration (`<?xml encoding=... ?>`)
438
    Explicit(&'static Encoding),
439
    /// Encoding was detected from a byte order mark (BOM) or by the first bytes
440
    /// of the content. It can be refined by the XML declaration event (`<?xml encoding=... ?>`)
441
    BomDetected(&'static Encoding),
442
    /// Encoding was detected using XML declaration event (`<?xml encoding=... ?>`).
443
    /// It can no longer change
444
    XmlDetected(&'static Encoding),
445
}
446
#[cfg(feature = "encoding")]
447
impl EncodingRef {
448
    #[inline]
449
    fn encoding(&self) -> &'static Encoding {
450
        match self {
451
            Self::Implicit(e) => e,
452
            Self::Explicit(e) => e,
453
            Self::BomDetected(e) => e,
454
            Self::XmlDetected(e) => e,
455
        }
456
    }
457
    #[inline]
458
    fn can_be_refined(&self) -> bool {
459
        match self {
460
            Self::Implicit(_) | Self::BomDetected(_) => true,
461
            Self::Explicit(_) | Self::XmlDetected(_) => false,
462
        }
463
    }
464
}
465
466
////////////////////////////////////////////////////////////////////////////////////////////////////
467
468
/// A low level encoding-agnostic XML event reader.
469
///
470
/// Consumes bytes and streams XML [`Event`]s.
471
///
472
/// This reader does not manage namespace declarations and not able to resolve
473
/// prefixes. If you want these features, use the [`NsReader`].
474
///
475
/// # Examples
476
///
477
/// ```
478
/// use quick_xml::events::Event;
479
/// use quick_xml::reader::Reader;
480
///
481
/// let xml = r#"<tag1 att1 = "test">
482
///                 <tag2><!--Test comment-->Test</tag2>
483
///                 <tag2>Test 2</tag2>
484
///              </tag1>"#;
485
/// let mut reader = Reader::from_str(xml);
486
/// reader.trim_text(true);
487
///
488
/// let mut count = 0;
489
/// let mut txt = Vec::new();
490
/// let mut buf = Vec::new();
491
///
492
/// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
493
/// loop {
494
///     // NOTE: this is the generic case when we don't know about the input BufRead.
495
///     // when the input is a &str or a &[u8], we don't actually need to use another
496
///     // buffer, we could directly call `reader.read_event()`
497
///     match reader.read_event_into(&mut buf) {
498
///         Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
499
///         // exits the loop when reaching end of file
500
///         Ok(Event::Eof) => break,
501
///
502
///         Ok(Event::Start(e)) => {
503
///             match e.name().as_ref() {
504
///                 b"tag1" => println!("attributes values: {:?}",
505
///                                     e.attributes().map(|a| a.unwrap().value)
506
///                                     .collect::<Vec<_>>()),
507
///                 b"tag2" => count += 1,
508
///                 _ => (),
509
///             }
510
///         }
511
///         Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
512
///
513
///         // There are several other `Event`s we do not consider here
514
///         _ => (),
515
///     }
516
///     // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
517
///     buf.clear();
518
/// }
519
/// ```
520
///
521
/// [`NsReader`]: crate::reader::NsReader
522
#[derive(Clone)]
523
pub struct Reader<R> {
524
    /// Source of data for parse
525
    reader: R,
526
    /// Configuration and current parse state
527
    parser: Parser,
528
}
529
530
/// Builder methods
531
impl<R> Reader<R> {
532
    /// Creates a `Reader` that reads from a given reader.
533
13.1k
    pub fn from_reader(reader: R) -> Self {
534
13.1k
        Self {
535
13.1k
            reader,
536
13.1k
            parser: Parser::default(),
537
13.1k
        }
538
13.1k
    }
539
540
    configure_methods!();
541
}
542
543
/// Getters
544
impl<R> Reader<R> {
545
    /// Consumes `Reader` returning the underlying reader
546
    ///
547
    /// Can be used to compute line and column of a parsing error position
548
    ///
549
    /// # Examples
550
    ///
551
    /// ```
552
    /// # use pretty_assertions::assert_eq;
553
    /// use std::{str, io::Cursor};
554
    /// use quick_xml::events::Event;
555
    /// use quick_xml::reader::Reader;
556
    ///
557
    /// let xml = r#"<tag1 att1 = "test">
558
    ///                 <tag2><!--Test comment-->Test</tag2>
559
    ///                 <tag3>Test 2</tag3>
560
    ///              </tag1>"#;
561
    /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
562
    /// let mut buf = Vec::new();
563
    ///
564
    /// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
565
    ///     let end_pos = reader.buffer_position();
566
    ///     let mut cursor = reader.into_inner();
567
    ///     let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
568
    ///         .expect("can't make a string");
569
    ///     let mut line = 1;
570
    ///     let mut column = 0;
571
    ///     for c in s.chars() {
572
    ///         if c == '\n' {
573
    ///             line += 1;
574
    ///             column = 0;
575
    ///         } else {
576
    ///             column += 1;
577
    ///         }
578
    ///     }
579
    ///     (line, column)
580
    /// }
581
    ///
582
    /// loop {
583
    ///     match reader.read_event_into(&mut buf) {
584
    ///         Ok(Event::Start(ref e)) => match e.name().as_ref() {
585
    ///             b"tag1" | b"tag2" => (),
586
    ///             tag => {
587
    ///                 assert_eq!(b"tag3", tag);
588
    ///                 assert_eq!((3, 22), into_line_and_column(reader));
589
    ///                 break;
590
    ///             }
591
    ///         },
592
    ///         Ok(Event::Eof) => unreachable!(),
593
    ///         _ => (),
594
    ///     }
595
    ///     buf.clear();
596
    /// }
597
    /// ```
598
0
    pub fn into_inner(self) -> R {
599
0
        self.reader
600
0
    }
601
602
    /// Gets a reference to the underlying reader.
603
0
    pub fn get_ref(&self) -> &R {
604
0
        &self.reader
605
0
    }
606
607
    /// Gets a mutable reference to the underlying reader.
608
0
    pub fn get_mut(&mut self) -> &mut R {
609
0
        &mut self.reader
610
0
    }
611
612
    /// Gets the current byte position in the input data.
613
    ///
614
    /// Useful when debugging errors.
615
86.5M
    pub fn buffer_position(&self) -> usize {
616
86.5M
        // when internal state is OpenedTag, we have actually read until '<',
617
86.5M
        // which we don't want to show
618
86.5M
        if let ParseState::OpenedTag = self.parser.state {
619
28.7M
            self.parser.offset - 1
620
        } else {
621
57.7M
            self.parser.offset
622
        }
623
86.5M
    }
624
625
    /// Get the decoder, used to decode bytes, read by this reader, to the strings.
626
    ///
627
    /// If `encoding` feature is enabled, the used encoding may change after
628
    /// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
629
    ///
630
    /// If `encoding` feature is enabled and no encoding is specified in declaration,
631
    /// defaults to UTF-8.
632
    #[inline]
633
28.8M
    pub fn decoder(&self) -> Decoder {
634
28.8M
        self.parser.decoder()
635
28.8M
    }
636
}
637
638
/// Private sync reading methods
639
impl<R> Reader<R> {
640
    /// Read text into the given buffer, and return an event that borrows from
641
    /// either that buffer or from the input itself, based on the type of the
642
    /// reader.
643
149M
    fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result<Event<'i>>
644
149M
    where
645
149M
        R: XmlSource<'i, B>,
646
149M
    {
647
149M
        read_event_impl!(self, buf, self.reader, read_until_open, read_until_close)
648
149M
    }
649
650
    /// Read until '<' is found, moves reader to an `OpenedTag` state and returns a `Text` event.
651
    ///
652
    /// Returns inner `Ok` if the loop should be broken and an event returned.
653
    /// Returns inner `Err` with the same `buf` because Rust borrowck stumbles upon this case in particular.
654
74.9M
    fn read_until_open<'i, B>(&mut self, buf: B) -> Result<std::result::Result<Event<'i>, B>>
655
74.9M
    where
656
74.9M
        R: XmlSource<'i, B>,
657
74.9M
    {
658
74.9M
        read_until_open!(self, buf, self.reader, read_event_impl)
659
74.9M
    }
660
661
    /// Private function to read until `>` is found. This function expects that
662
    /// it was called just after encounter a `<` symbol.
663
74.9M
    fn read_until_close<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
664
74.9M
    where
665
74.9M
        R: XmlSource<'i, B>,
666
74.9M
    {
667
74.9M
        read_until_close!(self, buf, self.reader)
668
74.9M
    }
669
}
670
671
////////////////////////////////////////////////////////////////////////////////////////////////////
672
673
/// Represents an input for a reader that can return borrowed data.
674
///
675
/// There are two implementors of this trait: generic one that read data from
676
/// `Self`, copies some part of it into a provided buffer of type `B` and then
677
/// returns data that borrow from that buffer.
678
///
679
/// The other implementor is for `&[u8]` and instead of copying data returns
680
/// borrowed data from `Self` instead. This implementation allows zero-copy
681
/// deserialization.
682
///
683
/// # Parameters
684
/// - `'r`: lifetime of a buffer from which events will borrow
685
/// - `B`: a type of a buffer that can be used to store data read from `Self` and
686
///   from which events can borrow
687
trait XmlSource<'r, B> {
688
    /// Removes UTF-8 BOM if it is present
689
    #[cfg(not(feature = "encoding"))]
690
    fn remove_utf8_bom(&mut self) -> Result<()>;
691
692
    /// Determines encoding from the start of input and removes BOM if it is present
693
    #[cfg(feature = "encoding")]
694
    fn detect_encoding(&mut self) -> Result<Option<&'static Encoding>>;
695
696
    /// Read input until `byte` is found or end of input is reached.
697
    ///
698
    /// Returns a slice of data read up to `byte`, which does not include into result.
699
    /// If input (`Self`) is exhausted, returns `None`.
700
    ///
701
    /// # Example
702
    ///
703
    /// ```ignore
704
    /// let mut position = 0;
705
    /// let mut input = b"abc*def".as_ref();
706
    /// //                    ^= 4
707
    ///
708
    /// assert_eq!(
709
    ///     input.read_bytes_until(b'*', (), &mut position).unwrap(),
710
    ///     Some(b"abc".as_ref())
711
    /// );
712
    /// assert_eq!(position, 4); // position after the symbol matched
713
    /// ```
714
    ///
715
    /// # Parameters
716
    /// - `byte`: Byte for search
717
    /// - `buf`: Buffer that could be filled from an input (`Self`) and
718
    ///   from which [events] could borrow their data
719
    /// - `position`: Will be increased by amount of bytes consumed
720
    ///
721
    /// [events]: crate::events::Event
722
    fn read_bytes_until(
723
        &mut self,
724
        byte: u8,
725
        buf: B,
726
        position: &mut usize,
727
    ) -> Result<Option<&'r [u8]>>;
728
729
    /// Read input until comment, CDATA or processing instruction is finished.
730
    ///
731
    /// This method expect that `<` already was read.
732
    ///
733
    /// Returns a slice of data read up to end of comment, CDATA or processing
734
    /// instruction (`>`), which does not include into result.
735
    ///
736
    /// If input (`Self`) is exhausted and nothing was read, returns `None`.
737
    ///
738
    /// # Parameters
739
    /// - `buf`: Buffer that could be filled from an input (`Self`) and
740
    ///   from which [events] could borrow their data
741
    /// - `position`: Will be increased by amount of bytes consumed
742
    ///
743
    /// [events]: crate::events::Event
744
    fn read_bang_element(
745
        &mut self,
746
        buf: B,
747
        position: &mut usize,
748
    ) -> Result<Option<(BangType, &'r [u8])>>;
749
750
    /// Read input until XML element is closed by approaching a `>` symbol.
751
    /// Returns `Some(buffer)` that contains a data between `<` and `>` or
752
    /// `None` if end-of-input was reached and nothing was read.
753
    ///
754
    /// Derived from `read_until`, but modified to handle XML attributes
755
    /// using a minimal state machine.
756
    ///
757
    /// Attribute values are [defined] as follows:
758
    /// ```plain
759
    /// AttValue := '"' (([^<&"]) | Reference)* '"'
760
    ///           | "'" (([^<&']) | Reference)* "'"
761
    /// ```
762
    /// (`Reference` is something like `&quot;`, but we don't care about
763
    /// escaped characters at this level)
764
    ///
765
    /// # Parameters
766
    /// - `buf`: Buffer that could be filled from an input (`Self`) and
767
    ///   from which [events] could borrow their data
768
    /// - `position`: Will be increased by amount of bytes consumed
769
    ///
770
    /// [defined]: https://www.w3.org/TR/xml11/#NT-AttValue
771
    /// [events]: crate::events::Event
772
    fn read_element(&mut self, buf: B, position: &mut usize) -> Result<Option<&'r [u8]>>;
773
774
    /// Consume and discard all the whitespace until the next non-whitespace
775
    /// character or EOF.
776
    ///
777
    /// # Parameters
778
    /// - `position`: Will be increased by amount of bytes consumed
779
    fn skip_whitespace(&mut self, position: &mut usize) -> Result<()>;
780
781
    /// Consume and discard one character if it matches the given byte. Return
782
    /// `true` if it matched.
783
    ///
784
    /// # Parameters
785
    /// - `position`: Will be increased by 1 if byte is matched
786
    fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result<bool>;
787
788
    /// Return one character without consuming it, so that future `read_*` calls
789
    /// will still include it. On EOF, return `None`.
790
    fn peek_one(&mut self) -> Result<Option<u8>>;
791
}
792
793
/// Possible elements started with `<!`
794
#[derive(Debug, PartialEq)]
795
enum BangType {
796
    /// <![CDATA[...]]>
797
    CData,
798
    /// <!--...-->
799
    Comment,
800
    /// <!DOCTYPE...>
801
    DocType,
802
}
803
impl BangType {
804
    #[inline(always)]
805
13.1k
    fn new(byte: Option<u8>) -> Result<Self> {
806
13.1k
        Ok(match byte {
807
0
            Some(b'[') => Self::CData,
808
0
            Some(b'-') => Self::Comment,
809
13.1k
            Some(b'D') | Some(b'd') => Self::DocType,
810
0
            Some(b) => return Err(Error::UnexpectedBang(b)),
811
0
            None => return Err(Error::UnexpectedEof("Bang".to_string())),
812
        })
813
13.1k
    }
814
815
    /// If element is finished, returns its content up to `>` symbol and
816
    /// an index of this symbol, otherwise returns `None`
817
    ///
818
    /// # Parameters
819
    /// - `buf`: buffer with data consumed on previous iterations
820
    /// - `chunk`: data read on current iteration and not yet consumed from reader
821
    #[inline(always)]
822
13.1k
    fn parse<'b>(&self, buf: &[u8], chunk: &'b [u8]) -> Option<(&'b [u8], usize)> {
823
13.1k
        for i in memchr::memchr_iter(b'>', chunk) {
824
0
            match self {
825
0
                // Need to read at least 6 symbols (`!---->`) for properly finished comment
826
0
                // <!----> - XML comment
827
0
                //  012345 - i
828
0
                Self::Comment if buf.len() + i > 4 => {
829
0
                    if chunk[..i].ends_with(b"--") {
830
                        // We cannot strip last `--` from the buffer because we need it in case of
831
                        // check_comments enabled option. XML standard requires that comment
832
                        // will not end with `--->` sequence because this is a special case of
833
                        // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
834
0
                        return Some((&chunk[..i], i + 1)); // +1 for `>`
835
0
                    }
836
0
                    // End sequence `-|->` was splitted at |
837
0
                    //        buf --/   \-- chunk
838
0
                    if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' {
839
0
                        return Some((&chunk[..i], i + 1)); // +1 for `>`
840
0
                    }
841
0
                    // End sequence `--|>` was splitted at |
842
0
                    //         buf --/   \-- chunk
843
0
                    if i == 0 && buf.ends_with(b"--") {
844
0
                        return Some((&[], i + 1)); // +1 for `>`
845
0
                    }
846
                }
847
0
                Self::Comment => {}
848
                Self::CData => {
849
0
                    if chunk[..i].ends_with(b"]]") {
850
0
                        return Some((&chunk[..i], i + 1)); // +1 for `>`
851
0
                    }
852
0
                    // End sequence `]|]>` was splitted at |
853
0
                    //        buf --/   \-- chunk
854
0
                    if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' {
855
0
                        return Some((&chunk[..i], i + 1)); // +1 for `>`
856
0
                    }
857
0
                    // End sequence `]]|>` was splitted at |
858
0
                    //         buf --/   \-- chunk
859
0
                    if i == 0 && buf.ends_with(b"]]") {
860
0
                        return Some((&[], i + 1)); // +1 for `>`
861
0
                    }
862
                }
863
                Self::DocType => {
864
13.1k
                    let content = &chunk[..i];
865
13.1k
                    let balance = memchr::memchr2_iter(b'<', b'>', content)
866
13.1k
                        .map(|p| if content[p] == b'<' { 1i32 } else { -1 })
867
13.1k
                        .sum::<i32>();
868
13.1k
                    if balance == 0 {
869
13.1k
                        return Some((content, i + 1)); // +1 for `>`
870
0
                    }
871
                }
872
            }
873
        }
874
0
        None
875
13.1k
    }
876
    #[inline]
877
0
    fn to_err(&self) -> Error {
878
0
        let bang_str = match self {
879
0
            Self::CData => "CData",
880
0
            Self::Comment => "Comment",
881
0
            Self::DocType => "DOCTYPE",
882
        };
883
0
        Error::UnexpectedEof(bang_str.to_string())
884
0
    }
885
}
886
887
/// State machine for the [`XmlSource::read_element`]
888
#[derive(Clone, Copy)]
889
enum ReadElementState {
890
    /// The initial state (inside element, but outside of attribute value)
891
    Elem,
892
    /// Inside a single-quoted attribute value
893
    SingleQ,
894
    /// Inside a double-quoted attribute value
895
    DoubleQ,
896
}
897
impl ReadElementState {
898
    /// Changes state by analyzing part of input.
899
    /// Returns a tuple with part of chunk up to element closing symbol `>`
900
    /// and a position after that symbol or `None` if such symbol was not found
901
    #[inline(always)]
902
38.5M
    fn change<'b>(&mut self, chunk: &'b [u8]) -> Option<(&'b [u8], usize)> {
903
42.7M
        for i in memchr::memchr3_iter(b'>', b'\'', b'"', chunk) {
904
42.7M
            *self = match (*self, chunk[i]) {
905
                // only allowed to match `>` while we are in state `Elem`
906
38.5M
                (Self::Elem, b'>') => return Some((&chunk[..i], i + 1)),
907
0
                (Self::Elem, b'\'') => Self::SingleQ,
908
2.06M
                (Self::Elem, b'\"') => Self::DoubleQ,
909
910
                // the only end_byte that gets us out if the same character
911
2.06M
                (Self::SingleQ, b'\'') | (Self::DoubleQ, b'"') => Self::Elem,
912
913
                // all other bytes: no state change
914
36.3k
                _ => *self,
915
            };
916
        }
917
0
        None
918
38.5M
    }
919
}
920
921
/// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab)
922
#[inline]
923
236M
pub(crate) const fn is_whitespace(b: u8) -> bool {
924
236M
    matches!(b, b' ' | b'\r' | b'\n' | b'\t')
925
236M
}
926
927
////////////////////////////////////////////////////////////////////////////////////////////////////
928
929
#[cfg(test)]
930
mod test {
931
    /// Checks the internal implementation of the various reader methods
932
    macro_rules! check {
933
        (
934
            #[$test:meta]
935
            $read_event:ident,
936
            $read_until_close:ident,
937
            // constructor of the XML source on which internal functions will be called
938
            $source:path,
939
            // constructor of the buffer to which read data will stored
940
            $buf:expr
941
            $(, $async:ident, $await:ident)?
942
        ) => {
943
            mod read_bytes_until {
944
                use super::*;
945
                // Use Bytes for printing bytes as strings for ASCII range
946
                use crate::utils::Bytes;
947
                use pretty_assertions::assert_eq;
948
949
                /// Checks that search in the empty buffer returns `None`
950
                #[$test]
951
                $($async)? fn empty() {
952
                    let buf = $buf;
953
                    let mut position = 0;
954
                    let mut input = b"".as_ref();
955
                    //                ^= 0
956
957
                    assert_eq!(
958
                        $source(&mut input)
959
                            .read_bytes_until(b'*', buf, &mut position)
960
                            $(.$await)?
961
                            .unwrap()
962
                            .map(Bytes),
963
                        None
964
                    );
965
                    assert_eq!(position, 0);
966
                }
967
968
                /// Checks that search in the buffer non-existent value returns entire buffer
969
                /// as a result and set `position` to `len()`
970
                #[$test]
971
                $($async)? fn non_existent() {
972
                    let buf = $buf;
973
                    let mut position = 0;
974
                    let mut input = b"abcdef".as_ref();
975
                    //                      ^= 6
976
977
                    assert_eq!(
978
                        $source(&mut input)
979
                            .read_bytes_until(b'*', buf, &mut position)
980
                            $(.$await)?
981
                            .unwrap()
982
                            .map(Bytes),
983
                        Some(Bytes(b"abcdef"))
984
                    );
985
                    assert_eq!(position, 6);
986
                }
987
988
                /// Checks that search in the buffer an element that is located in the front of
989
                /// buffer returns empty slice as a result and set `position` to one symbol
990
                /// after match (`1`)
991
                #[$test]
992
                $($async)? fn at_the_start() {
993
                    let buf = $buf;
994
                    let mut position = 0;
995
                    let mut input = b"*abcdef".as_ref();
996
                    //                 ^= 1
997
998
                    assert_eq!(
999
                        $source(&mut input)
1000
                            .read_bytes_until(b'*', buf, &mut position)
1001
                            $(.$await)?
1002
                            .unwrap()
1003
                            .map(Bytes),
1004
                        Some(Bytes(b""))
1005
                    );
1006
                    assert_eq!(position, 1); // position after the symbol matched
1007
                }
1008
1009
                /// Checks that search in the buffer an element that is located in the middle of
1010
                /// buffer returns slice before that symbol as a result and set `position` to one
1011
                /// symbol after match
1012
                #[$test]
1013
                $($async)? fn inside() {
1014
                    let buf = $buf;
1015
                    let mut position = 0;
1016
                    let mut input = b"abc*def".as_ref();
1017
                    //                    ^= 4
1018
1019
                    assert_eq!(
1020
                        $source(&mut input)
1021
                            .read_bytes_until(b'*', buf, &mut position)
1022
                            $(.$await)?
1023
                            .unwrap()
1024
                            .map(Bytes),
1025
                        Some(Bytes(b"abc"))
1026
                    );
1027
                    assert_eq!(position, 4); // position after the symbol matched
1028
                }
1029
1030
                /// Checks that search in the buffer an element that is located in the end of
1031
                /// buffer returns slice before that symbol as a result and set `position` to one
1032
                /// symbol after match (`len()`)
1033
                #[$test]
1034
                $($async)? fn in_the_end() {
1035
                    let buf = $buf;
1036
                    let mut position = 0;
1037
                    let mut input = b"abcdef*".as_ref();
1038
                    //                       ^= 7
1039
1040
                    assert_eq!(
1041
                        $source(&mut input)
1042
                            .read_bytes_until(b'*', buf, &mut position)
1043
                            $(.$await)?
1044
                            .unwrap()
1045
                            .map(Bytes),
1046
                        Some(Bytes(b"abcdef"))
1047
                    );
1048
                    assert_eq!(position, 7); // position after the symbol matched
1049
                }
1050
            }
1051
1052
            mod read_bang_element {
1053
                use super::*;
1054
1055
                /// Checks that reading CDATA content works correctly
1056
                mod cdata {
1057
                    use super::*;
1058
                    use crate::errors::Error;
1059
                    use crate::reader::BangType;
1060
                    use crate::utils::Bytes;
1061
                    use pretty_assertions::assert_eq;
1062
1063
                    /// Checks that if input begins like CDATA element, but CDATA start sequence
1064
                    /// is not finished, parsing ends with an error
1065
                    #[$test]
1066
                    #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"]
1067
                    $($async)? fn not_properly_start() {
1068
                        let buf = $buf;
1069
                        let mut position = 0;
1070
                        let mut input = b"![]]>other content".as_ref();
1071
                        //                ^= 0
1072
1073
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1074
                            Err(Error::UnexpectedEof(s)) if s == "CData" => {}
1075
                            x => assert!(
1076
                                false,
1077
                                r#"Expected `UnexpectedEof("CData")`, but result is: {:?}"#,
1078
                                x
1079
                            ),
1080
                        }
1081
                        assert_eq!(position, 0);
1082
                    }
1083
1084
                    /// Checks that if CDATA startup sequence was matched, but an end sequence
1085
                    /// is not found, parsing ends with an error
1086
                    #[$test]
1087
                    $($async)? fn not_closed() {
1088
                        let buf = $buf;
1089
                        let mut position = 0;
1090
                        let mut input = b"![CDATA[other content".as_ref();
1091
                        //                ^= 0
1092
1093
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1094
                            Err(Error::UnexpectedEof(s)) if s == "CData" => {}
1095
                            x => assert!(
1096
                                false,
1097
                                r#"Expected `UnexpectedEof("CData")`, but result is: {:?}"#,
1098
                                x
1099
                            ),
1100
                        }
1101
                        assert_eq!(position, 0);
1102
                    }
1103
1104
                    /// Checks that CDATA element without content inside parsed successfully
1105
                    #[$test]
1106
                    $($async)? fn empty() {
1107
                        let buf = $buf;
1108
                        let mut position = 0;
1109
                        let mut input = b"![CDATA[]]>other content".as_ref();
1110
                        //                           ^= 11
1111
1112
                        assert_eq!(
1113
                            $source(&mut input)
1114
                                .read_bang_element(buf, &mut position)
1115
                                $(.$await)?
1116
                                .unwrap()
1117
                                .map(|(ty, data)| (ty, Bytes(data))),
1118
                            Some((BangType::CData, Bytes(b"![CDATA[]]")))
1119
                        );
1120
                        assert_eq!(position, 11);
1121
                    }
1122
1123
                    /// Checks that CDATA element with content parsed successfully.
1124
                    /// Additionally checks that sequences inside CDATA that may look like
1125
                    /// a CDATA end sequence do not interrupt CDATA parsing
1126
                    #[$test]
1127
                    $($async)? fn with_content() {
1128
                        let buf = $buf;
1129
                        let mut position = 0;
1130
                        let mut input = b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref();
1131
                        //                                            ^= 28
1132
1133
                        assert_eq!(
1134
                            $source(&mut input)
1135
                                .read_bang_element(buf, &mut position)
1136
                                $(.$await)?
1137
                                .unwrap()
1138
                                .map(|(ty, data)| (ty, Bytes(data))),
1139
                            Some((BangType::CData, Bytes(b"![CDATA[cdata]] ]>content]]")))
1140
                        );
1141
                        assert_eq!(position, 28);
1142
                    }
1143
                }
1144
1145
                /// Checks that reading XML comments works correctly. According to the [specification],
1146
                /// comment data can contain any sequence except `--`:
1147
                ///
1148
                /// ```peg
1149
                /// comment = '<--' (!'--' char)* '-->';
1150
                /// char = [#x1-#x2C]
1151
                ///      / [#x2E-#xD7FF]
1152
                ///      / [#xE000-#xFFFD]
1153
                ///      / [#x10000-#x10FFFF]
1154
                /// ```
1155
                ///
1156
                /// The presence of this limitation, however, is simply a poorly designed specification
1157
                /// (maybe for purpose of building of LL(1) XML parser) and quick-xml does not check for
1158
                /// presence of these sequences by default. This tests allow such content.
1159
                ///
1160
                /// [specification]: https://www.w3.org/TR/xml11/#dt-comment
1161
                mod comment {
1162
                    use super::*;
1163
                    use crate::errors::Error;
1164
                    use crate::reader::BangType;
1165
                    use crate::utils::Bytes;
1166
                    use pretty_assertions::assert_eq;
1167
1168
                    #[$test]
1169
                    #[ignore = "start comment sequence fully checked outside of `read_bang_element`"]
1170
                    $($async)? fn not_properly_start() {
1171
                        let buf = $buf;
1172
                        let mut position = 0;
1173
                        let mut input = b"!- -->other content".as_ref();
1174
                        //                ^= 0
1175
1176
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1177
                            Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
1178
                            x => assert!(
1179
                                false,
1180
                                r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
1181
                                x
1182
                            ),
1183
                        }
1184
                        assert_eq!(position, 0);
1185
                    }
1186
1187
                    #[$test]
1188
                    $($async)? fn not_properly_end() {
1189
                        let buf = $buf;
1190
                        let mut position = 0;
1191
                        let mut input = b"!->other content".as_ref();
1192
                        //                ^= 0
1193
1194
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1195
                            Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
1196
                            x => assert!(
1197
                                false,
1198
                                r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
1199
                                x
1200
                            ),
1201
                        }
1202
                        assert_eq!(position, 0);
1203
                    }
1204
1205
                    #[$test]
1206
                    $($async)? fn not_closed1() {
1207
                        let buf = $buf;
1208
                        let mut position = 0;
1209
                        let mut input = b"!--other content".as_ref();
1210
                        //                ^= 0
1211
1212
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1213
                            Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
1214
                            x => assert!(
1215
                                false,
1216
                                r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
1217
                                x
1218
                            ),
1219
                        }
1220
                        assert_eq!(position, 0);
1221
                    }
1222
1223
                    #[$test]
1224
                    $($async)? fn not_closed2() {
1225
                        let buf = $buf;
1226
                        let mut position = 0;
1227
                        let mut input = b"!-->other content".as_ref();
1228
                        //                ^= 0
1229
1230
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1231
                            Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
1232
                            x => assert!(
1233
                                false,
1234
                                r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
1235
                                x
1236
                            ),
1237
                        }
1238
                        assert_eq!(position, 0);
1239
                    }
1240
1241
                    #[$test]
1242
                    $($async)? fn not_closed3() {
1243
                        let buf = $buf;
1244
                        let mut position = 0;
1245
                        let mut input = b"!--->other content".as_ref();
1246
                        //                ^= 0
1247
1248
                        match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1249
                            Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
1250
                            x => assert!(
1251
                                false,
1252
                                r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
1253
                                x
1254
                            ),
1255
                        }
1256
                        assert_eq!(position, 0);
1257
                    }
1258
1259
                    #[$test]
1260
                    $($async)? fn empty() {
1261
                        let buf = $buf;
1262
                        let mut position = 0;
1263
                        let mut input = b"!---->other content".as_ref();
1264
                        //                      ^= 6
1265
1266
                        assert_eq!(
1267
                            $source(&mut input)
1268
                                .read_bang_element(buf, &mut position)
1269
                                $(.$await)?
1270
                                .unwrap()
1271
                                .map(|(ty, data)| (ty, Bytes(data))),
1272
                            Some((BangType::Comment, Bytes(b"!----")))
1273
                        );
1274
                        assert_eq!(position, 6);
1275
                    }
1276
1277
                    #[$test]
1278
                    $($async)? fn with_content() {
1279
                        let buf = $buf;
1280
                        let mut position = 0;
1281
                        let mut input = b"!--->comment<--->other content".as_ref();
1282
                        //                                 ^= 17
1283
1284
                        assert_eq!(
1285
                            $source(&mut input)
1286
                                .read_bang_element(buf, &mut position)
1287
                                $(.$await)?
1288
                                .unwrap()
1289
                                .map(|(ty, data)| (ty, Bytes(data))),
1290
                            Some((BangType::Comment, Bytes(b"!--->comment<---")))
1291
                        );
1292
                        assert_eq!(position, 17);
1293
                    }
1294
                }
1295
1296
                /// Checks that reading DOCTYPE definition works correctly
1297
                mod doctype {
1298
                    use super::*;
1299
1300
                    mod uppercase {
1301
                        use super::*;
1302
                        use crate::errors::Error;
1303
                        use crate::reader::BangType;
1304
                        use crate::utils::Bytes;
1305
                        use pretty_assertions::assert_eq;
1306
1307
                        #[$test]
1308
                        $($async)? fn not_properly_start() {
1309
                            let buf = $buf;
1310
                            let mut position = 0;
1311
                            let mut input = b"!D other content".as_ref();
1312
                            //                ^= 0
1313
1314
                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1315
                                Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1316
                                x => assert!(
1317
                                    false,
1318
                                    r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1319
                                    x
1320
                                ),
1321
                            }
1322
                            assert_eq!(position, 0);
1323
                        }
1324
1325
                        #[$test]
1326
                        $($async)? fn without_space() {
1327
                            let buf = $buf;
1328
                            let mut position = 0;
1329
                            let mut input = b"!DOCTYPEother content".as_ref();
1330
                            //                ^= 0
1331
1332
                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1333
                                Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1334
                                x => assert!(
1335
                                    false,
1336
                                    r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1337
                                    x
1338
                                ),
1339
                            }
1340
                            assert_eq!(position, 0);
1341
                        }
1342
1343
                        #[$test]
1344
                        $($async)? fn empty() {
1345
                            let buf = $buf;
1346
                            let mut position = 0;
1347
                            let mut input = b"!DOCTYPE>other content".as_ref();
1348
                            //                         ^= 9
1349
1350
                            assert_eq!(
1351
                                $source(&mut input)
1352
                                    .read_bang_element(buf, &mut position)
1353
                                    $(.$await)?
1354
                                    .unwrap()
1355
                                    .map(|(ty, data)| (ty, Bytes(data))),
1356
                                Some((BangType::DocType, Bytes(b"!DOCTYPE")))
1357
                            );
1358
                            assert_eq!(position, 9);
1359
                        }
1360
1361
                        #[$test]
1362
                        $($async)? fn not_closed() {
1363
                            let buf = $buf;
1364
                            let mut position = 0;
1365
                            let mut input = b"!DOCTYPE other content".as_ref();
1366
                            //                ^= 0
1367
1368
                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1369
                                Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1370
                                x => assert!(
1371
                                    false,
1372
                                    r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1373
                                    x
1374
                                ),
1375
                            }
1376
                            assert_eq!(position, 0);
1377
                        }
1378
                    }
1379
1380
                    mod lowercase {
1381
                        use super::*;
1382
                        use crate::errors::Error;
1383
                        use crate::reader::BangType;
1384
                        use crate::utils::Bytes;
1385
                        use pretty_assertions::assert_eq;
1386
1387
                        #[$test]
1388
                        $($async)? fn not_properly_start() {
1389
                            let buf = $buf;
1390
                            let mut position = 0;
1391
                            let mut input = b"!d other content".as_ref();
1392
                            //                ^= 0
1393
1394
                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1395
                                Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1396
                                x => assert!(
1397
                                    false,
1398
                                    r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1399
                                    x
1400
                                ),
1401
                            }
1402
                            assert_eq!(position, 0);
1403
                        }
1404
1405
                        #[$test]
1406
                        $($async)? fn without_space() {
1407
                            let buf = $buf;
1408
                            let mut position = 0;
1409
                            let mut input = b"!doctypeother content".as_ref();
1410
                            //                ^= 0
1411
1412
                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1413
                                Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1414
                                x => assert!(
1415
                                    false,
1416
                                    r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1417
                                    x
1418
                                ),
1419
                            }
1420
                            assert_eq!(position, 0);
1421
                        }
1422
1423
                        #[$test]
1424
                        $($async)? fn empty() {
1425
                            let buf = $buf;
1426
                            let mut position = 0;
1427
                            let mut input = b"!doctype>other content".as_ref();
1428
                            //                         ^= 9
1429
1430
                            assert_eq!(
1431
                                $source(&mut input)
1432
                                    .read_bang_element(buf, &mut position)
1433
                                    $(.$await)?
1434
                                    .unwrap()
1435
                                    .map(|(ty, data)| (ty, Bytes(data))),
1436
                                Some((BangType::DocType, Bytes(b"!doctype")))
1437
                            );
1438
                            assert_eq!(position, 9);
1439
                        }
1440
1441
                        #[$test]
1442
                        $($async)? fn not_closed() {
1443
                            let buf = $buf;
1444
                            let mut position = 0;
1445
                            let mut input = b"!doctype other content".as_ref();
1446
                            //                ^= 0
1447
1448
                            match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
1449
                                Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1450
                                x => assert!(
1451
                                    false,
1452
                                    r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1453
                                    x
1454
                                ),
1455
                            }
1456
                            assert_eq!(position, 0);
1457
                        }
1458
                    }
1459
                }
1460
            }
1461
1462
            mod read_element {
1463
                use super::*;
1464
                use crate::utils::Bytes;
1465
                use pretty_assertions::assert_eq;
1466
1467
                /// Checks that nothing was read from empty buffer
1468
                #[$test]
1469
                $($async)? fn empty() {
1470
                    let buf = $buf;
1471
                    let mut position = 0;
1472
                    let mut input = b"".as_ref();
1473
                    //                ^= 0
1474
1475
                    assert_eq!(
1476
                        $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1477
                        None
1478
                    );
1479
                    assert_eq!(position, 0);
1480
                }
1481
1482
                mod open {
1483
                    use super::*;
1484
                    use crate::utils::Bytes;
1485
                    use pretty_assertions::assert_eq;
1486
1487
                    #[$test]
1488
                    $($async)? fn empty_tag() {
1489
                        let buf = $buf;
1490
                        let mut position = 0;
1491
                        let mut input = b">".as_ref();
1492
                        //                 ^= 1
1493
1494
                        assert_eq!(
1495
                            $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1496
                            Some(Bytes(b""))
1497
                        );
1498
                        assert_eq!(position, 1);
1499
                    }
1500
1501
                    #[$test]
1502
                    $($async)? fn normal() {
1503
                        let buf = $buf;
1504
                        let mut position = 0;
1505
                        let mut input = b"tag>".as_ref();
1506
                        //                    ^= 4
1507
1508
                        assert_eq!(
1509
                            $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1510
                            Some(Bytes(b"tag"))
1511
                        );
1512
                        assert_eq!(position, 4);
1513
                    }
1514
1515
                    #[$test]
1516
                    $($async)? fn empty_ns_empty_tag() {
1517
                        let buf = $buf;
1518
                        let mut position = 0;
1519
                        let mut input = b":>".as_ref();
1520
                        //                  ^= 2
1521
1522
                        assert_eq!(
1523
                            $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1524
                            Some(Bytes(b":"))
1525
                        );
1526
                        assert_eq!(position, 2);
1527
                    }
1528
1529
                    #[$test]
1530
                    $($async)? fn empty_ns() {
1531
                        let buf = $buf;
1532
                        let mut position = 0;
1533
                        let mut input = b":tag>".as_ref();
1534
                        //                     ^= 5
1535
1536
                        assert_eq!(
1537
                            $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1538
                            Some(Bytes(b":tag"))
1539
                        );
1540
                        assert_eq!(position, 5);
1541
                    }
1542
1543
                    #[$test]
1544
                    $($async)? fn with_attributes() {
1545
                        let buf = $buf;
1546
                        let mut position = 0;
1547
                        let mut input = br#"tag  attr-1=">"  attr2  =  '>'  3attr>"#.as_ref();
1548
                        //                                                        ^= 38
1549
1550
                        assert_eq!(
1551
                            $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1552
                            Some(Bytes(br#"tag  attr-1=">"  attr2  =  '>'  3attr"#))
1553
                        );
1554
                        assert_eq!(position, 38);
1555
                    }
1556
                }
1557
1558
                mod self_closed {
1559
                    use super::*;
1560
                    use crate::utils::Bytes;
1561
                    use pretty_assertions::assert_eq;
1562
1563
                    #[$test]
1564
                    $($async)? fn empty_tag() {
1565
                        let buf = $buf;
1566
                        let mut position = 0;
1567
                        let mut input = b"/>".as_ref();
1568
                        //                  ^= 2
1569
1570
                        assert_eq!(
1571
                            $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1572
                            Some(Bytes(b"/"))
1573
                        );
1574
                        assert_eq!(position, 2);
1575
                    }
1576
1577
                    #[$test]
1578
                    $($async)? fn normal() {
1579
                        let buf = $buf;
1580
                        let mut position = 0;
1581
                        let mut input = b"tag/>".as_ref();
1582
                        //                     ^= 5
1583
1584
                        assert_eq!(
1585
                            $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1586
                            Some(Bytes(b"tag/"))
1587
                        );
1588
                        assert_eq!(position, 5);
1589
                    }
1590
1591
                    #[$test]
1592
                    $($async)? fn empty_ns_empty_tag() {
1593
                        let buf = $buf;
1594
                        let mut position = 0;
1595
                        let mut input = b":/>".as_ref();
1596
                        //                   ^= 3
1597
1598
                        assert_eq!(
1599
                            $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1600
                            Some(Bytes(b":/"))
1601
                        );
1602
                        assert_eq!(position, 3);
1603
                    }
1604
1605
                    #[$test]
1606
                    $($async)? fn empty_ns() {
1607
                        let buf = $buf;
1608
                        let mut position = 0;
1609
                        let mut input = b":tag/>".as_ref();
1610
                        //                      ^= 6
1611
1612
                        assert_eq!(
1613
                            $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1614
                            Some(Bytes(b":tag/"))
1615
                        );
1616
                        assert_eq!(position, 6);
1617
                    }
1618
1619
                    #[$test]
1620
                    $($async)? fn with_attributes() {
1621
                        let buf = $buf;
1622
                        let mut position = 0;
1623
                        let mut input = br#"tag  attr-1="/>"  attr2  =  '/>'  3attr/>"#.as_ref();
1624
                        //                                                           ^= 41
1625
1626
                        assert_eq!(
1627
                            $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
1628
                            Some(Bytes(br#"tag  attr-1="/>"  attr2  =  '/>'  3attr/"#))
1629
                        );
1630
                        assert_eq!(position, 41);
1631
                    }
1632
                }
1633
            }
1634
1635
            mod issue_344 {
1636
                use crate::errors::Error;
1637
                use crate::reader::Reader;
1638
1639
                #[$test]
1640
                $($async)? fn cdata() {
1641
                    let mut reader = Reader::from_str("![]]>");
1642
1643
                    match reader.$read_until_close($buf) $(.$await)? {
1644
                        Err(Error::UnexpectedEof(s)) if s == "CData" => {}
1645
                        x => assert!(
1646
                            false,
1647
                            r#"Expected `UnexpectedEof("CData")`, but result is: {:?}"#,
1648
                            x
1649
                        ),
1650
                    }
1651
                }
1652
1653
                #[$test]
1654
                $($async)? fn comment() {
1655
                    let mut reader = Reader::from_str("!- -->");
1656
1657
                    match reader.$read_until_close($buf) $(.$await)? {
1658
                        Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
1659
                        x => assert!(
1660
                            false,
1661
                            r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
1662
                            x
1663
                        ),
1664
                    }
1665
                }
1666
1667
                #[$test]
1668
                $($async)? fn doctype_uppercase() {
1669
                    let mut reader = Reader::from_str("!D>");
1670
1671
                    match reader.$read_until_close($buf) $(.$await)? {
1672
                        Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1673
                        x => assert!(
1674
                            false,
1675
                            r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1676
                            x
1677
                        ),
1678
                    }
1679
                }
1680
1681
                #[$test]
1682
                $($async)? fn doctype_lowercase() {
1683
                    let mut reader = Reader::from_str("!d>");
1684
1685
                    match reader.$read_until_close($buf) $(.$await)? {
1686
                        Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
1687
                        x => assert!(
1688
                            false,
1689
                            r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
1690
                            x
1691
                        ),
1692
                    }
1693
                }
1694
            }
1695
1696
            /// Ensures, that no empty `Text` events are generated
1697
            mod $read_event {
1698
                use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
1699
                use crate::reader::Reader;
1700
                use pretty_assertions::assert_eq;
1701
1702
                /// When `encoding` feature is enabled, encoding should be detected
1703
                /// from BOM (UTF-8) and BOM should be stripped.
1704
                ///
1705
                /// When `encoding` feature is disabled, UTF-8 is assumed and BOM
1706
                /// character should be stripped for consistency
1707
                #[$test]
1708
                $($async)? fn bom_from_reader() {
1709
                    let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes());
1710
1711
                    assert_eq!(
1712
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1713
                        Event::Text(BytesText::from_escaped("\u{feff}"))
1714
                    );
1715
1716
                    assert_eq!(
1717
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1718
                        Event::Eof
1719
                    );
1720
                }
1721
1722
                /// When parsing from &str, encoding is fixed (UTF-8), so
1723
                /// - when `encoding` feature is disabled, the behavior the
1724
                ///   same as in `bom_from_reader` text
1725
                /// - when `encoding` feature is enabled, the behavior should
1726
                ///   stay consistent, so the first BOM character is stripped
1727
                #[$test]
1728
                $($async)? fn bom_from_str() {
1729
                    let mut reader = Reader::from_str("\u{feff}\u{feff}");
1730
1731
                    assert_eq!(
1732
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1733
                        Event::Text(BytesText::from_escaped("\u{feff}"))
1734
                    );
1735
1736
                    assert_eq!(
1737
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1738
                        Event::Eof
1739
                    );
1740
                }
1741
1742
                #[$test]
1743
                $($async)? fn declaration() {
1744
                    let mut reader = Reader::from_str("<?xml ?>");
1745
1746
                    assert_eq!(
1747
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1748
                        Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
1749
                    );
1750
                }
1751
1752
                #[$test]
1753
                $($async)? fn doctype() {
1754
                    let mut reader = Reader::from_str("<!DOCTYPE x>");
1755
1756
                    assert_eq!(
1757
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1758
                        Event::DocType(BytesText::from_escaped("x"))
1759
                    );
1760
                }
1761
1762
                #[$test]
1763
                $($async)? fn processing_instruction() {
1764
                    let mut reader = Reader::from_str("<?xml-stylesheet?>");
1765
1766
                    assert_eq!(
1767
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1768
                        Event::PI(BytesText::from_escaped("xml-stylesheet"))
1769
                    );
1770
                }
1771
1772
                #[$test]
1773
                $($async)? fn start() {
1774
                    let mut reader = Reader::from_str("<tag>");
1775
1776
                    assert_eq!(
1777
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1778
                        Event::Start(BytesStart::new("tag"))
1779
                    );
1780
                }
1781
1782
                #[$test]
1783
                $($async)? fn end() {
1784
                    let mut reader = Reader::from_str("</tag>");
1785
                    // Because we expect invalid XML, do not check that
1786
                    // the end name paired with the start name
1787
                    reader.check_end_names(false);
1788
1789
                    assert_eq!(
1790
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1791
                        Event::End(BytesEnd::new("tag"))
1792
                    );
1793
                }
1794
1795
                #[$test]
1796
                $($async)? fn empty() {
1797
                    let mut reader = Reader::from_str("<tag/>");
1798
1799
                    assert_eq!(
1800
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1801
                        Event::Empty(BytesStart::new("tag"))
1802
                    );
1803
                }
1804
1805
                #[$test]
1806
                $($async)? fn text() {
1807
                    let mut reader = Reader::from_str("text");
1808
1809
                    assert_eq!(
1810
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1811
                        Event::Text(BytesText::from_escaped("text"))
1812
                    );
1813
                }
1814
1815
                #[$test]
1816
                $($async)? fn cdata() {
1817
                    let mut reader = Reader::from_str("<![CDATA[]]>");
1818
1819
                    assert_eq!(
1820
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1821
                        Event::CData(BytesCData::new(""))
1822
                    );
1823
                }
1824
1825
                #[$test]
1826
                $($async)? fn comment() {
1827
                    let mut reader = Reader::from_str("<!---->");
1828
1829
                    assert_eq!(
1830
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1831
                        Event::Comment(BytesText::from_escaped(""))
1832
                    );
1833
                }
1834
1835
                #[$test]
1836
                $($async)? fn eof() {
1837
                    let mut reader = Reader::from_str("");
1838
1839
                    assert_eq!(
1840
                        reader.$read_event($buf) $(.$await)? .unwrap(),
1841
                        Event::Eof
1842
                    );
1843
                }
1844
            }
1845
        };
1846
    }
1847
1848
    /// Tests for https://github.com/tafia/quick-xml/issues/469
1849
    macro_rules! small_buffers {
1850
        (
1851
            #[$test:meta]
1852
            $read_event:ident: $BufReader:ty
1853
            $(, $async:ident, $await:ident)?
1854
        ) => {
1855
            mod small_buffers {
1856
                use crate::events::{BytesCData, BytesDecl, BytesStart, BytesText, Event};
1857
                use crate::reader::Reader;
1858
                use pretty_assertions::assert_eq;
1859
1860
                #[$test]
1861
                $($async)? fn decl() {
1862
                    let xml = "<?xml ?>";
1863
                    //         ^^^^^^^ data that fit into buffer
1864
                    let size = xml.match_indices("?>").next().unwrap().0 + 1;
1865
                    let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1866
                    let mut reader = Reader::from_reader(br);
1867
                    let mut buf = Vec::new();
1868
1869
                    assert_eq!(
1870
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1871
                        Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
1872
                    );
1873
                    assert_eq!(
1874
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1875
                        Event::Eof
1876
                    );
1877
                }
1878
1879
                #[$test]
1880
                $($async)? fn pi() {
1881
                    let xml = "<?pi?>";
1882
                    //         ^^^^^ data that fit into buffer
1883
                    let size = xml.match_indices("?>").next().unwrap().0 + 1;
1884
                    let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1885
                    let mut reader = Reader::from_reader(br);
1886
                    let mut buf = Vec::new();
1887
1888
                    assert_eq!(
1889
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1890
                        Event::PI(BytesText::new("pi"))
1891
                    );
1892
                    assert_eq!(
1893
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1894
                        Event::Eof
1895
                    );
1896
                }
1897
1898
                #[$test]
1899
                $($async)? fn empty() {
1900
                    let xml = "<empty/>";
1901
                    //         ^^^^^^^ data that fit into buffer
1902
                    let size = xml.match_indices("/>").next().unwrap().0 + 1;
1903
                    let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1904
                    let mut reader = Reader::from_reader(br);
1905
                    let mut buf = Vec::new();
1906
1907
                    assert_eq!(
1908
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1909
                        Event::Empty(BytesStart::new("empty"))
1910
                    );
1911
                    assert_eq!(
1912
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1913
                        Event::Eof
1914
                    );
1915
                }
1916
1917
                #[$test]
1918
                $($async)? fn cdata1() {
1919
                    let xml = "<![CDATA[cdata]]>";
1920
                    //         ^^^^^^^^^^^^^^^ data that fit into buffer
1921
                    let size = xml.match_indices("]]>").next().unwrap().0 + 1;
1922
                    let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1923
                    let mut reader = Reader::from_reader(br);
1924
                    let mut buf = Vec::new();
1925
1926
                    assert_eq!(
1927
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1928
                        Event::CData(BytesCData::new("cdata"))
1929
                    );
1930
                    assert_eq!(
1931
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1932
                        Event::Eof
1933
                    );
1934
                }
1935
1936
                #[$test]
1937
                $($async)? fn cdata2() {
1938
                    let xml = "<![CDATA[cdata]]>";
1939
                    //         ^^^^^^^^^^^^^^^^ data that fit into buffer
1940
                    let size = xml.match_indices("]]>").next().unwrap().0 + 2;
1941
                    let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1942
                    let mut reader = Reader::from_reader(br);
1943
                    let mut buf = Vec::new();
1944
1945
                    assert_eq!(
1946
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1947
                        Event::CData(BytesCData::new("cdata"))
1948
                    );
1949
                    assert_eq!(
1950
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1951
                        Event::Eof
1952
                    );
1953
                }
1954
1955
                #[$test]
1956
                $($async)? fn comment1() {
1957
                    let xml = "<!--comment-->";
1958
                    //         ^^^^^^^^^^^^ data that fit into buffer
1959
                    let size = xml.match_indices("-->").next().unwrap().0 + 1;
1960
                    let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1961
                    let mut reader = Reader::from_reader(br);
1962
                    let mut buf = Vec::new();
1963
1964
                    assert_eq!(
1965
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1966
                        Event::Comment(BytesText::new("comment"))
1967
                    );
1968
                    assert_eq!(
1969
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1970
                        Event::Eof
1971
                    );
1972
                }
1973
1974
                #[$test]
1975
                $($async)? fn comment2() {
1976
                    let xml = "<!--comment-->";
1977
                    //         ^^^^^^^^^^^^^ data that fit into buffer
1978
                    let size = xml.match_indices("-->").next().unwrap().0 + 2;
1979
                    let br = <$BufReader>::with_capacity(size, xml.as_bytes());
1980
                    let mut reader = Reader::from_reader(br);
1981
                    let mut buf = Vec::new();
1982
1983
                    assert_eq!(
1984
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1985
                        Event::Comment(BytesText::new("comment"))
1986
                    );
1987
                    assert_eq!(
1988
                        reader.$read_event(&mut buf) $(.$await)? .unwrap(),
1989
                        Event::Eof
1990
                    );
1991
                }
1992
            }
1993
        };
1994
    }
1995
1996
    // Export macros for the child modules:
1997
    // - buffered_reader
1998
    // - slice_reader
1999
    pub(super) use check;
2000
    pub(super) use small_buffers;
2001
}